1use std::fmt::Display;
2use std::sync::Arc;
3
4use arrow_array::types::{
5 Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
6 UInt64Type, UInt8Type,
7};
8use arrow_array::{
9 Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Datum, FixedSizeBinaryArray,
10 LargeBinaryArray, LargeStringArray, PrimitiveArray, StringArray, StringViewArray,
11};
12use arrow_cast::cast;
13use arrow_cast::display::ArrayFormatter;
14use arrow_schema::{ArrowError, DataType, Field, FieldRef};
15use arrow_select::concat::concat;
16use arrow_select::take::take;
17use numpy::PyUntypedArray;
18use pyo3::exceptions::{PyIndexError, PyNotImplementedError, PyValueError};
19use pyo3::intern;
20use pyo3::prelude::*;
21use pyo3::pybacked::{PyBackedBytes, PyBackedStr};
22use pyo3::types::{PyCapsule, PyTuple, PyType};
23
24#[cfg(feature = "buffer_protocol")]
25use crate::buffer::AnyBufferProtocol;
26use crate::error::PyArrowResult;
27use crate::export::{Arro3Array, Arro3DataType, Arro3Field};
28use crate::ffi::from_python::utils::import_array_pycapsules;
29use crate::ffi::to_python::nanoarrow::to_nanoarrow_array;
30use crate::ffi::{to_array_pycapsules, to_schema_pycapsule};
31use crate::input::AnyArray;
32use crate::interop::numpy::from_numpy::from_numpy;
33use crate::interop::numpy::to_numpy::to_numpy;
34use crate::scalar::PyScalar;
35use crate::utils::default_repr_options;
36use crate::{PyDataType, PyField};
37
38#[derive(Debug)]
48#[pyclass(module = "arro3.core._core", name = "Array", subclass, frozen)]
49pub struct PyArray {
50 array: ArrayRef,
51 field: FieldRef,
52}
53
54impl PyArray {
55 pub fn new(array: ArrayRef, field: FieldRef) -> Self {
59 Self::try_new(array, field).unwrap()
60 }
61
62 pub fn try_new(array: ArrayRef, field: FieldRef) -> Result<Self, ArrowError> {
64 if array.data_type() != field.data_type() {
68 return Err(ArrowError::SchemaError(
69 format!("Array DataType must match Field DataType. Array DataType is {}; field DataType is {}", array.data_type(), field.data_type())
70 ));
71 }
72 Ok(Self { array, field })
73 }
74
75 pub fn from_array_ref(array: ArrayRef) -> Self {
77 let field = Field::new("", array.data_type().clone(), true);
78 Self::new(array, Arc::new(field))
79 }
80
81 pub fn from_arrow_pycapsule(
83 schema_capsule: &Bound<PyCapsule>,
84 array_capsule: &Bound<PyCapsule>,
85 ) -> PyResult<Self> {
86 let (array, field) = import_array_pycapsules(schema_capsule, array_capsule)?;
87 Ok(Self::new(array, Arc::new(field)))
88 }
89
90 pub fn array(&self) -> &ArrayRef {
92 &self.array
93 }
94
95 pub fn field(&self) -> &FieldRef {
97 &self.field
98 }
99
100 pub fn into_inner(self) -> (ArrayRef, FieldRef) {
102 (self.array, self.field)
103 }
104
105 pub fn to_arro3<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
109 let arro3_mod = py.import(intern!(py, "arro3.core"))?;
110 arro3_mod.getattr(intern!(py, "Array"))?.call_method1(
111 intern!(py, "from_arrow_pycapsule"),
112 self.__arrow_c_array__(py, None)?,
113 )
114 }
115
116 pub fn into_arro3(self, py: Python) -> PyResult<Bound<PyAny>> {
120 let arro3_mod = py.import(intern!(py, "arro3.core"))?;
121 let array_capsules = to_array_pycapsules(py, self.field.clone(), &self.array, None)?;
122 arro3_mod
123 .getattr(intern!(py, "Array"))?
124 .call_method1(intern!(py, "from_arrow_pycapsule"), array_capsules)
125 }
126
127 pub fn to_nanoarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
129 to_nanoarrow_array(py, self.__arrow_c_array__(py, None)?)
130 }
131
132 pub fn to_pyarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
136 let pyarrow_mod = py.import(intern!(py, "pyarrow"))?;
137 let cloned = Self::new(self.array.clone(), self.field.clone());
138 pyarrow_mod
139 .getattr(intern!(py, "array"))?
140 .call1(PyTuple::new(py, vec![cloned.into_pyobject(py)?])?)
141 }
142}
143
144impl From<ArrayRef> for PyArray {
145 fn from(value: ArrayRef) -> Self {
146 Self::from_array_ref(value)
147 }
148}
149
150impl AsRef<ArrayRef> for PyArray {
151 fn as_ref(&self) -> &ArrayRef {
152 &self.array
153 }
154}
155
156impl Display for PyArray {
157 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158 write!(f, "arro3.core.Array<")?;
159 self.array.data_type().fmt(f)?;
160 writeln!(f, ">")?;
161
162 let options = default_repr_options();
163 let formatter =
164 ArrayFormatter::try_new(self.array.as_ref(), &options).map_err(|_| std::fmt::Error)?;
165
166 writeln!(f, "[")?;
167 for i in 0..self.array.len().min(10) {
168 let row = formatter.value(i);
169 writeln!(f, " {},", row)?;
170 }
171 writeln!(f, "]")?;
172
173 Ok(())
174 }
175}
176
177impl Datum for PyArray {
178 fn get(&self) -> (&dyn Array, bool) {
179 (self.array.as_ref(), false)
180 }
181}
182
183#[pymethods]
184impl PyArray {
185 #[new]
186 #[pyo3(signature = (obj, /, r#type = None, *))]
187 pub(crate) fn init(
188 py: Python,
189 obj: &Bound<PyAny>,
190 r#type: Option<PyField>,
191 ) -> PyArrowResult<Self> {
192 if obj.hasattr(intern!(py, "__arrow_c_array__"))? {
197 return Ok(obj.extract::<PyArray>()?);
198 }
199 if let Ok(data) = obj.extract::<PyArray>() {
200 return Ok(data);
201 }
202
203 macro_rules! impl_primitive {
204 ($rust_type:ty, $arrow_type:ty) => {{
205 let values: Vec<Option<$rust_type>> = obj.extract()?;
206 Arc::new(PrimitiveArray::<$arrow_type>::from(values))
207 }};
208 }
209
210 let field = r#type
211 .ok_or(PyValueError::new_err(
212 "type must be passed for non-Arrow input",
213 ))?
214 .into_inner();
215 let array: ArrayRef = match field.data_type() {
216 DataType::Float32 => impl_primitive!(f32, Float32Type),
217 DataType::Float64 => impl_primitive!(f64, Float64Type),
218 DataType::UInt8 => impl_primitive!(u8, UInt8Type),
219 DataType::UInt16 => impl_primitive!(u16, UInt16Type),
220 DataType::UInt32 => impl_primitive!(u32, UInt32Type),
221 DataType::UInt64 => impl_primitive!(u64, UInt64Type),
222 DataType::Int8 => impl_primitive!(i8, Int8Type),
223 DataType::Int16 => impl_primitive!(i16, Int16Type),
224 DataType::Int32 => impl_primitive!(i32, Int32Type),
225 DataType::Int64 => impl_primitive!(i64, Int64Type),
226 DataType::Boolean => {
227 let values: Vec<Option<bool>> = obj.extract()?;
228 Arc::new(BooleanArray::from(values))
229 }
230 DataType::Binary => {
231 let values: Vec<Option<PyBackedBytes>> = obj.extract()?;
232 let slices = values
233 .iter()
234 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_ref()))
235 .collect::<Vec<_>>();
236 Arc::new(BinaryArray::from(slices))
237 }
238 DataType::LargeBinary => {
239 let values: Vec<Option<PyBackedBytes>> = obj.extract()?;
240 let slices = values
241 .iter()
242 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_ref()))
243 .collect::<Vec<_>>();
244 Arc::new(LargeBinaryArray::from(slices))
245 }
246 DataType::BinaryView => {
247 let values: Vec<Option<PyBackedBytes>> = obj.extract()?;
248 let slices = values
249 .iter()
250 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_ref()))
251 .collect::<Vec<_>>();
252 Arc::new(BinaryViewArray::from(slices))
253 }
254 DataType::FixedSizeBinary(size) => {
255 let values: Vec<Option<PyBackedBytes>> = obj.extract()?;
256 Arc::new(FixedSizeBinaryArray::try_from_sparse_iter_with_size(
257 values
258 .iter()
259 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_ref())),
260 *size,
261 )?)
262 }
263 DataType::Utf8 => {
264 let values: Vec<Option<PyBackedStr>> = obj.extract()?;
265 let slices = values
266 .iter()
267 .map(|maybe_str| maybe_str.as_ref().map(|s| s.as_ref()))
268 .collect::<Vec<_>>();
269 Arc::new(StringArray::from(slices))
270 }
271 DataType::LargeUtf8 => {
272 let values: Vec<Option<PyBackedStr>> = obj.extract()?;
273 let slices = values
274 .iter()
275 .map(|maybe_str| maybe_str.as_ref().map(|s| s.as_ref()))
276 .collect::<Vec<_>>();
277 Arc::new(LargeStringArray::from(slices))
278 }
279 DataType::Utf8View => {
280 let values: Vec<Option<PyBackedStr>> = obj.extract()?;
281 let slices = values
282 .iter()
283 .map(|maybe_str| maybe_str.as_ref().map(|s| s.as_ref()))
284 .collect::<Vec<_>>();
285 Arc::new(StringViewArray::from(slices))
286 }
287 dt => {
288 return Err(PyNotImplementedError::new_err(format!(
289 "Array constructor for {dt} not yet implemented."
290 ))
291 .into())
292 }
293 };
294 Ok(Self::new(array, field))
295 }
296
297 #[cfg(feature = "buffer_protocol")]
298 fn buffer(&self) -> crate::buffer::PyArrowBuffer {
299 use arrow_array::cast::AsArray;
300
301 match self.array.data_type() {
302 DataType::Int64 => {
303 let arr = self.array.as_primitive::<Int64Type>();
304 let values = arr.values();
305 let buffer = values.inner().clone();
306 crate::buffer::PyArrowBuffer::new(buffer)
307 }
308 _ => todo!(),
309 }
310 }
311
312 #[pyo3(signature = (dtype=None, copy=None))]
313 #[allow(unused_variables)]
314 fn __array__<'py>(
315 &'py self,
316 py: Python<'py>,
317 dtype: Option<Bound<'py, PyAny>>,
318 copy: Option<Bound<'py, PyAny>>,
319 ) -> PyResult<Bound<'py, PyAny>> {
320 to_numpy(py, &self.array)
321 }
322
323 #[pyo3(signature = (requested_schema=None))]
324 fn __arrow_c_array__<'py>(
325 &'py self,
326 py: Python<'py>,
327 requested_schema: Option<Bound<'py, PyCapsule>>,
328 ) -> PyArrowResult<Bound<'py, PyTuple>> {
329 to_array_pycapsules(py, self.field.clone(), &self.array, requested_schema)
330 }
331
332 fn __arrow_c_schema__<'py>(&'py self, py: Python<'py>) -> PyArrowResult<Bound<'py, PyCapsule>> {
333 to_schema_pycapsule(py, self.field.as_ref())
334 }
335
336 fn __eq__(&self, other: &PyArray) -> bool {
337 self.array.as_ref() == other.array.as_ref() && self.field == other.field
338 }
339
340 fn __getitem__(&self, i: isize) -> PyArrowResult<PyScalar> {
341 let i = if i < 0 {
343 let i = self.array.len() as isize + i;
344 if i < 0 {
345 return Err(PyIndexError::new_err("Index out of range").into());
346 }
347 i as usize
348 } else {
349 i as usize
350 };
351 if i >= self.array.len() {
352 return Err(PyIndexError::new_err("Index out of range").into());
353 }
354 PyScalar::try_new(self.array.slice(i, 1), self.field.clone())
355 }
356
357 fn __len__(&self) -> usize {
358 self.array.len()
359 }
360
361 fn __repr__(&self) -> String {
362 self.to_string()
363 }
364
365 #[classmethod]
366 fn from_arrow(_cls: &Bound<PyType>, input: AnyArray) -> PyArrowResult<Self> {
367 match input {
368 AnyArray::Array(array) => Ok(array),
369 AnyArray::Stream(stream) => {
370 let chunked_array = stream.into_chunked_array()?;
371 let (chunks, field) = chunked_array.into_inner();
372 let chunk_refs = chunks.iter().map(|arr| arr.as_ref()).collect::<Vec<_>>();
373 let concatted = concat(chunk_refs.as_slice())?;
374 Ok(Self::new(concatted, field))
375 }
376 }
377 }
378
379 #[classmethod]
380 #[pyo3(name = "from_arrow_pycapsule")]
381 fn from_arrow_pycapsule_py(
382 _cls: &Bound<PyType>,
383 schema_capsule: &Bound<PyCapsule>,
384 array_capsule: &Bound<PyCapsule>,
385 ) -> PyResult<Self> {
386 Self::from_arrow_pycapsule(schema_capsule, array_capsule)
387 }
388
389 #[cfg(feature = "buffer_protocol")]
391 #[classmethod]
392 fn from_buffer(_cls: &Bound<PyType>, buffer: AnyBufferProtocol) -> PyArrowResult<Self> {
393 buffer.try_into()
394 }
395
396 #[classmethod]
397 fn from_numpy(
398 _cls: &Bound<PyType>,
399 py: Python,
400 array: Bound<'_, PyAny>,
401 ) -> PyArrowResult<Self> {
402 let mut numpy_array = array;
403 if numpy_array.hasattr("__array__")? {
404 numpy_array = numpy_array.call_method0("__array__")?;
405 };
406
407 #[cfg(feature = "buffer_protocol")]
409 if let Ok(buf) = numpy_array.extract::<AnyBufferProtocol>() {
410 return buf.try_into();
411 }
412
413 let numpy_array: Bound<PyUntypedArray> = numpy_array.extract()?;
414 let arrow_array = from_numpy(py, &numpy_array)?;
415 Ok(Self::from_array_ref(arrow_array))
416 }
417
418 fn cast(&self, target_type: PyField) -> PyArrowResult<Arro3Array> {
419 let new_field = target_type.into_inner();
420 let new_array = cast(self.as_ref(), new_field.data_type())?;
421 Ok(PyArray::new(new_array, new_field).into())
422 }
423
424 #[getter]
425 #[pyo3(name = "field")]
426 fn py_field(&self) -> Arro3Field {
427 PyField::new(self.field.clone()).into()
428 }
429
430 #[getter]
431 fn nbytes(&self) -> usize {
432 self.array.get_array_memory_size()
433 }
434
435 #[getter]
436 fn null_count(&self) -> usize {
437 self.array.null_count()
438 }
439
440 #[pyo3(signature = (offset=0, length=None))]
441 fn slice(&self, offset: usize, length: Option<usize>) -> Arro3Array {
442 let length = length.unwrap_or_else(|| self.array.len() - offset);
443 let new_array = self.array.slice(offset, length);
444 PyArray::new(new_array, self.field().clone()).into()
445 }
446
447 fn take(&self, indices: PyArray) -> PyArrowResult<Arro3Array> {
448 let new_array = take(self.as_ref(), indices.as_ref(), None)?;
449 Ok(PyArray::new(new_array, self.field.clone()).into())
450 }
451
452 fn to_numpy<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
453 self.__array__(py, None, None)
454 }
455
456 fn to_pylist(&self, py: Python) -> PyResult<Vec<Py<PyAny>>> {
457 let mut scalars = Vec::with_capacity(self.array.len());
458 for i in 0..self.array.len() {
459 let scalar =
460 unsafe { PyScalar::new_unchecked(self.array.slice(i, 1), self.field.clone()) };
461 scalars.push(scalar.as_py(py)?);
462 }
463 Ok(scalars)
464 }
465
466 #[getter]
467 fn r#type(&self) -> Arro3DataType {
468 PyDataType::new(self.field.data_type().clone()).into()
469 }
470}