polars_python/series/
numpy_ufunc.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::ptr;
3
4use ndarray::IntoDimension;
5use numpy::npyffi::types::npy_intp;
6use numpy::npyffi::{self, flags};
7use numpy::{Element, PY_ARRAY_API, PyArray1, PyArrayDescrMethods, ToNpyDims};
8use polars_core::prelude::*;
9use polars_core::utils::arrow::types::NativeType;
10use pyo3::prelude::*;
11use pyo3::types::{PyNone, PyTuple};
12
13use super::PySeries;
14
15/// Create an empty numpy array arrows 64 byte alignment
16///
17/// # Safety
18/// All elements in the array are non initialized
19///
20/// The array is also writable from Python.
21unsafe fn aligned_array<T: Element + NativeType>(
22    py: Python<'_>,
23    size: usize,
24) -> (Bound<'_, PyArray1<T>>, Vec<T>) {
25    let mut buf = vec![T::default(); size];
26
27    // modified from
28    // numpy-0.10.0/src/array.rs:375
29
30    let len = buf.len();
31    let buffer_ptr = buf.as_mut_ptr();
32
33    let mut dims = [len].into_dimension();
34    let strides = [size_of::<T>() as npy_intp];
35
36    let ptr = PY_ARRAY_API.PyArray_NewFromDescr(
37        py,
38        PY_ARRAY_API.get_type_object(py, npyffi::NpyTypes::PyArray_Type),
39        T::get_dtype(py).into_dtype_ptr(),
40        dims.ndim_cint(),
41        dims.as_dims_ptr(),
42        strides.as_ptr() as *mut _, // strides
43        buffer_ptr as _,            // data
44        flags::NPY_ARRAY_OUT_ARRAY, // flag
45        ptr::null_mut(),            //obj
46    );
47    (
48        Bound::from_owned_ptr(py, ptr)
49            .downcast_into_exact::<PyArray1<T>>()
50            .unwrap(),
51        buf,
52    )
53}
54
55/// Get reference counter for numpy arrays.
56///   - For CPython: Get reference counter.
57///   - For PyPy: Reference counters for a live PyPy object = refcnt + 2 << 60.
58fn get_refcnt<T>(pyarray: &Bound<'_, PyArray1<T>>) -> isize {
59    let refcnt = pyarray.get_refcnt();
60    #[cfg(target_pointer_width = "64")]
61    if refcnt >= (2 << 60) {
62        return refcnt - (2 << 60);
63    }
64    refcnt
65}
66
67macro_rules! impl_ufuncs {
68    ($name:ident, $type:ident, $unsafe_from_ptr_method:ident) => {
69        #[pymethods]
70        impl PySeries {
71            // Applies a ufunc by accepting a lambda out: ufunc(*args, out=out).
72            //
73            // If allocate_out is true, the out array is allocated in this
74            // method, send to Python and once the ufunc is applied ownership is
75            // taken by Rust again to prevent memory leak. if the ufunc fails,
76            // we first must take ownership back.
77            //
78            // If allocate_out is false, the out parameter to the lambda will be
79            // None, meaning the ufunc will allocate memory itself. We will then
80            // have to convert that NumPy array into a pl.Series.
81            fn $name(&self, lambda: &Bound<PyAny>, allocate_out: bool) -> PyResult<PySeries> {
82                // numpy array object, and a *mut ptr
83                Python::with_gil(|py| {
84                    if !allocate_out {
85                        // We're not going to allocate the output array.
86                        // Instead, we'll let the ufunc do it.
87                        let result = lambda.call1((PyNone::get(py),))?;
88                        let series_factory = crate::py_modules::pl_series(py).bind(py);
89                        return series_factory
90                            .call((self.name(), result), None)?
91                            .getattr("_s")?
92                            .extract::<PySeries>();
93                    }
94
95                    let size = self.len();
96                    let (out_array, av) =
97                        unsafe { aligned_array::<<$type as PolarsNumericType>::Native>(py, size) };
98
99                    debug_assert_eq!(get_refcnt(&out_array), 1);
100                    // inserting it in a tuple increase the reference count by 1.
101                    let args = PyTuple::new(py, &[out_array.clone()])?;
102                    debug_assert_eq!(get_refcnt(&out_array), 2);
103
104                    // whatever the result, we must take the leaked memory ownership back
105                    let s = match lambda.call1(args) {
106                        Ok(_) => {
107                            // if this assert fails, the lambda has taken a reference to the object, so we must panic
108                            // args and the lambda return have a reference, making a total of 3
109                            assert!(get_refcnt(&out_array) <= 3);
110
111                            let validity = self.series.chunks()[0].validity().cloned();
112                            let ca = ChunkedArray::<$type>::from_vec_validity(
113                                self.series.name().clone(),
114                                av,
115                                validity,
116                            );
117                            PySeries::new(ca.into_series())
118                        },
119                        Err(e) => {
120                            // return error information
121                            return Err(e);
122                        },
123                    };
124
125                    Ok(s)
126                })
127            }
128        }
129    };
130}
131
132impl_ufuncs!(apply_ufunc_f32, Float32Type, unsafe_from_ptr_f32);
133impl_ufuncs!(apply_ufunc_f64, Float64Type, unsafe_from_ptr_f64);
134impl_ufuncs!(apply_ufunc_u8, UInt8Type, unsafe_from_ptr_u8);
135impl_ufuncs!(apply_ufunc_u16, UInt16Type, unsafe_from_ptr_u16);
136impl_ufuncs!(apply_ufunc_u32, UInt32Type, unsafe_from_ptr_u32);
137impl_ufuncs!(apply_ufunc_u64, UInt64Type, unsafe_from_ptr_u64);
138impl_ufuncs!(apply_ufunc_i8, Int8Type, unsafe_from_ptr_i8);
139impl_ufuncs!(apply_ufunc_i16, Int16Type, unsafe_from_ptr_i16);
140impl_ufuncs!(apply_ufunc_i32, Int32Type, unsafe_from_ptr_i32);
141impl_ufuncs!(apply_ufunc_i64, Int64Type, unsafe_from_ptr_i64);