pyo3_arrow/
field.rs

1use std::collections::HashMap;
2use std::fmt::Display;
3use std::sync::Arc;
4
5use arrow_schema::{Field, FieldRef};
6use pyo3::exceptions::PyTypeError;
7use pyo3::intern;
8use pyo3::prelude::*;
9use pyo3::types::{PyBytes, PyCapsule, PyDict, PyTuple, PyType};
10
11use crate::error::PyArrowResult;
12use crate::export::{Arro3DataType, Arro3Field};
13use crate::ffi::from_python::utils::import_schema_pycapsule;
14use crate::ffi::to_python::nanoarrow::to_nanoarrow_schema;
15use crate::ffi::to_python::to_schema_pycapsule;
16use crate::input::MetadataInput;
17use crate::PyDataType;
18
19/// A Python-facing Arrow field.
20///
21/// This is a wrapper around a [FieldRef].
22#[derive(Debug)]
23#[pyclass(module = "arro3.core._core", name = "Field", subclass, frozen)]
24pub struct PyField(FieldRef);
25
26impl PyField {
27    /// Construct a new PyField around a [FieldRef]
28    pub fn new(field: FieldRef) -> Self {
29        Self(field)
30    }
31
32    /// Construct from a raw Arrow C Schema capsule
33    pub fn from_arrow_pycapsule(capsule: &Bound<PyCapsule>) -> PyResult<Self> {
34        let schema_ptr = import_schema_pycapsule(capsule)?;
35        let field =
36            Field::try_from(schema_ptr).map_err(|err| PyTypeError::new_err(err.to_string()))?;
37        Ok(Self::new(Arc::new(field)))
38    }
39
40    /// Consume this and return its internal [FieldRef]
41    pub fn into_inner(self) -> FieldRef {
42        self.0
43    }
44
45    /// Export this to a Python `arro3.core.Field`.
46    pub fn to_arro3<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
47        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
48        arro3_mod.getattr(intern!(py, "Field"))?.call_method1(
49            intern!(py, "from_arrow_pycapsule"),
50            PyTuple::new(py, vec![self.__arrow_c_schema__(py)?])?,
51        )
52    }
53
54    /// Export this to a Python `arro3.core.Field`.
55    pub fn into_arro3(self, py: Python) -> PyResult<Bound<PyAny>> {
56        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
57        let capsule = to_schema_pycapsule(py, self.0.as_ref())?;
58        arro3_mod.getattr(intern!(py, "Field"))?.call_method1(
59            intern!(py, "from_arrow_pycapsule"),
60            PyTuple::new(py, vec![capsule])?,
61        )
62    }
63
64    /// Export this to a Python `nanoarrow.Schema`.
65    pub fn to_nanoarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
66        to_nanoarrow_schema(py, &self.__arrow_c_schema__(py)?)
67    }
68
69    /// Export to a pyarrow.Field
70    ///
71    /// Requires pyarrow >=14
72    pub fn to_pyarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
73        let pyarrow_mod = py.import(intern!(py, "pyarrow"))?;
74        let cloned = PyField::new(self.0.clone());
75        pyarrow_mod
76            .getattr(intern!(py, "field"))?
77            .call1(PyTuple::new(py, vec![cloned.into_pyobject(py)?])?)
78    }
79}
80
81impl From<PyField> for FieldRef {
82    fn from(value: PyField) -> Self {
83        value.0
84    }
85}
86
87impl From<FieldRef> for PyField {
88    fn from(value: FieldRef) -> Self {
89        Self(value)
90    }
91}
92
93impl AsRef<Field> for PyField {
94    fn as_ref(&self) -> &Field {
95        &self.0
96    }
97}
98
99impl Display for PyField {
100    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101        write!(f, "arro3.core.Field<")?;
102        f.write_str(self.0.name().as_str())?;
103        write!(f, ": ")?;
104        self.0.data_type().fmt(f)?;
105        if !self.0.is_nullable() {
106            write!(f, " not null")?;
107        }
108        writeln!(f, ">")?;
109        Ok(())
110    }
111}
112
113#[pymethods]
114impl PyField {
115    #[new]
116    #[pyo3(signature = (name, r#type, nullable=true, *, metadata=None))]
117    fn init(
118        name: String,
119        r#type: PyDataType,
120        nullable: bool,
121        metadata: Option<MetadataInput>,
122    ) -> PyResult<Self> {
123        let field = Field::new(name, r#type.into_inner(), nullable)
124            .with_metadata(metadata.unwrap_or_default().into_string_hashmap()?);
125        Ok(PyField::new(field.into()))
126    }
127
128    fn __arrow_c_schema__<'py>(&'py self, py: Python<'py>) -> PyArrowResult<Bound<'py, PyCapsule>> {
129        to_schema_pycapsule(py, self.0.as_ref())
130    }
131
132    fn __eq__(&self, other: &PyField) -> bool {
133        self.0 == other.0
134    }
135
136    fn __repr__(&self) -> String {
137        self.to_string()
138    }
139
140    #[classmethod]
141    fn from_arrow(_cls: &Bound<PyType>, input: Self) -> Self {
142        input
143    }
144
145    #[classmethod]
146    #[pyo3(name = "from_arrow_pycapsule")]
147    fn from_arrow_pycapsule_py(_cls: &Bound<PyType>, capsule: &Bound<PyCapsule>) -> PyResult<Self> {
148        Self::from_arrow_pycapsule(capsule)
149    }
150
151    fn equals(&self, other: PyField) -> bool {
152        self.0 == other.0
153    }
154
155    // Note: we can't return HashMap<Vec<u8>, Vec<u8>> because that will coerce keys and values to
156    // a list, not bytes
157    #[getter]
158    fn metadata<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
159        let d = PyDict::new(py);
160        self.0.metadata().iter().try_for_each(|(key, val)| {
161            d.set_item(
162                PyBytes::new(py, key.as_bytes()),
163                PyBytes::new(py, val.as_bytes()),
164            )
165        })?;
166        Ok(d)
167    }
168
169    #[getter]
170    fn metadata_str(&self) -> HashMap<String, String> {
171        self.0.metadata().clone()
172    }
173
174    #[getter]
175    fn name(&self) -> String {
176        self.0.name().clone()
177    }
178
179    #[getter]
180    fn nullable(&self) -> bool {
181        self.0.is_nullable()
182    }
183
184    fn remove_metadata(&self) -> Arro3Field {
185        PyField::new(
186            self.0
187                .as_ref()
188                .clone()
189                .with_metadata(Default::default())
190                .into(),
191        )
192        .into()
193    }
194
195    #[getter]
196    fn r#type(&self) -> Arro3DataType {
197        PyDataType::new(self.0.data_type().clone()).into()
198    }
199
200    fn with_metadata(&self, metadata: MetadataInput) -> PyResult<Arro3Field> {
201        Ok(PyField::new(
202            self.0
203                .as_ref()
204                .clone()
205                .with_metadata(metadata.into_string_hashmap()?)
206                .into(),
207        )
208        .into())
209    }
210
211    fn with_name(&self, name: String) -> Arro3Field {
212        PyField::new(self.0.as_ref().clone().with_name(name).into()).into()
213    }
214
215    fn with_nullable(&self, nullable: bool) -> Arro3Field {
216        PyField::new(self.0.as_ref().clone().with_nullable(nullable).into()).into()
217    }
218
219    fn with_type(&self, new_type: PyDataType) -> Arro3Field {
220        PyField::new(
221            self.0
222                .as_ref()
223                .clone()
224                .with_data_type(new_type.into_inner())
225                .into(),
226        )
227        .into()
228    }
229}