1use arrow::array::{Array, BooleanArray, PrimitiveArray, Utf8Array};
16use arrow::bitmap::Bitmap;
17use arrow::buffer::Buffer;
18use arrow::offset::OffsetsBuffer;
19use arrow::types::NativeType;
20use polars::prelude::*;
21use polars_core::{with_match_physical_numeric_polars_type, with_match_physical_numeric_type};
22use pyo3::exceptions::PyTypeError;
23use pyo3::prelude::*;
24use pyo3::types::PyTuple;
25
26use super::{PySeries, ToSeries};
27use crate::conversion::Wrap;
28use crate::error::PyPolarsErr;
29use crate::raise_err;
30use crate::utils::EnterPolarsExt;
31
32struct BufferInfo {
33 pointer: usize,
34 offset: usize,
35 length: usize,
36}
37impl<'py> IntoPyObject<'py> for BufferInfo {
38 type Target = PyTuple;
39 type Output = Bound<'py, Self::Target>;
40 type Error = PyErr;
41
42 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
43 (self.pointer, self.offset, self.length).into_pyobject(py)
44 }
45}
46impl<'py> FromPyObject<'py> for BufferInfo {
47 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
48 let (pointer, offset, length) = ob.extract()?;
49 Ok(Self {
50 pointer,
51 offset,
52 length,
53 })
54 }
55}
56
57#[pymethods]
58impl PySeries {
59 fn _get_buffer_info(&self) -> PyResult<BufferInfo> {
61 let s = self.series.to_physical_repr();
62 let arrays = s.chunks();
63 if arrays.len() != 1 {
64 let msg = "cannot get buffer info for Series consisting of multiple chunks";
65 raise_err!(msg, ComputeError);
66 }
67 match s.dtype() {
68 DataType::Boolean => {
69 let ca = s.bool().unwrap();
70 let arr = ca.downcast_iter().next().unwrap();
71 let (slice, offset, len) = arr.values().as_slice();
72 Ok(BufferInfo {
73 pointer: slice.as_ptr() as usize,
74 offset,
75 length: len,
76 })
77 },
78 dt if dt.is_primitive_numeric() => {
79 Ok(with_match_physical_numeric_polars_type!(dt, |$T| {
80 let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
81 BufferInfo { pointer: get_pointer(ca), offset: 0, length: ca.len() }
82 }))
83 },
84 dt => {
85 let msg = format!(
86 "`_get_buffer_info` not implemented for non-physical type {dt}; try to select a buffer first"
87 );
88 Err(PyTypeError::new_err(msg))
89 },
90 }
91 }
92
93 fn _get_buffers(&self, py: Python) -> PyResult<(Self, Option<Self>, Option<Self>)> {
95 let s = &self.series;
96 py.enter_polars(|| match s.dtype().to_physical() {
97 dt if dt.is_primitive_numeric() => get_buffers_from_primitive(s),
98 DataType::Boolean => get_buffers_from_primitive(s),
99 DataType::String => get_buffers_from_string(s),
100 dt => {
101 let msg = format!("`_get_buffers` not implemented for `dtype` {dt}");
102 Err(PyTypeError::new_err(msg))
103 },
104 })
105 }
106}
107
108fn get_pointer<T: PolarsNumericType>(ca: &ChunkedArray<T>) -> usize {
109 let arr = ca.downcast_iter().next().unwrap();
110 arr.values().as_ptr() as usize
111}
112
113fn get_buffers_from_primitive(
114 s: &Series,
115) -> PyResult<(PySeries, Option<PySeries>, Option<PySeries>)> {
116 let chunks = s
117 .chunks()
118 .iter()
119 .map(|arr| arr.with_validity(None))
120 .collect::<Vec<_>>();
121 let values = Series::try_from((s.name().clone(), chunks))
122 .map_err(PyPolarsErr::from)?
123 .into();
124
125 let validity = get_bitmap(s);
126 let offsets = None;
127 Ok((values, validity, offsets))
128}
129
130fn get_buffers_from_string(s: &Series) -> PyResult<(PySeries, Option<PySeries>, Option<PySeries>)> {
134 let s = s.rechunk();
136
137 let ca = s.str().map_err(PyPolarsErr::from)?;
138 let arr_binview = ca.downcast_iter().next().unwrap();
139
140 let arr_utf8 = polars_compute::cast::utf8view_to_utf8(arr_binview);
142
143 let values = get_string_bytes(&arr_utf8)?;
144 let validity = get_bitmap(&s);
145 let offsets = get_string_offsets(&arr_utf8)?;
146
147 Ok((values, validity, Some(offsets)))
148}
149
150fn get_bitmap(s: &Series) -> Option<PySeries> {
151 if s.null_count() > 0 {
152 Some(s.is_not_null().into_series().into())
153 } else {
154 None
155 }
156}
157
158fn get_string_bytes(arr: &Utf8Array<i64>) -> PyResult<PySeries> {
159 let values_buffer = arr.values();
160 let values_arr =
161 PrimitiveArray::<u8>::try_new(ArrowDataType::UInt8, values_buffer.clone(), None)
162 .map_err(PyPolarsErr::from)?;
163 let values = Series::from_arrow(PlSmallStr::EMPTY, values_arr.to_boxed())
164 .map_err(PyPolarsErr::from)?
165 .into();
166 Ok(values)
167}
168
169fn get_string_offsets(arr: &Utf8Array<i64>) -> PyResult<PySeries> {
170 let offsets_buffer = arr.offsets().buffer();
171 let offsets_arr =
172 PrimitiveArray::<i64>::try_new(ArrowDataType::Int64, offsets_buffer.clone(), None)
173 .map_err(PyPolarsErr::from)?;
174 let offsets = Series::from_arrow(PlSmallStr::EMPTY, offsets_arr.to_boxed())
175 .map_err(PyPolarsErr::from)?
176 .into();
177 Ok(offsets)
178}
179
180#[pymethods]
181impl PySeries {
182 #[staticmethod]
184 unsafe fn _from_buffer(
185 dtype: Wrap<DataType>,
186 buffer_info: BufferInfo,
187 owner: &Bound<'_, PyAny>,
188 ) -> PyResult<Self> {
189 let dtype = dtype.0;
190 let BufferInfo {
191 pointer,
192 offset,
193 length,
194 } = buffer_info;
195 let owner = owner.to_owned().unbind();
196
197 let arr_boxed = match dtype {
198 dt if dt.is_primitive_numeric() => {
199 with_match_physical_numeric_type!(dt, |$T| unsafe {
200 from_buffer_impl::<$T>(pointer, offset, length, owner)
201 })
202 },
203 DataType::Boolean => {
204 unsafe { from_buffer_boolean_impl(pointer, offset, length, owner) }?
205 },
206 dt => {
207 let msg = format!(
208 "`_from_buffer` requires a physical type as input for `dtype`, got {dt}"
209 );
210 return Err(PyTypeError::new_err(msg));
211 },
212 };
213
214 let s = Series::from_arrow(PlSmallStr::EMPTY, arr_boxed)
215 .unwrap()
216 .into();
217 Ok(s)
218 }
219}
220
221unsafe fn from_buffer_impl<T: NativeType>(
222 pointer: usize,
223 offset: usize,
224 length: usize,
225 owner: Py<PyAny>,
226) -> Box<dyn Array> {
227 let pointer = pointer as *const T;
228 let pointer = unsafe { pointer.add(offset) };
229 let slice = unsafe { std::slice::from_raw_parts(pointer, length) };
230 let arr = unsafe { arrow::ffi::mmap::slice_and_owner(slice, owner) };
231 arr.to_boxed()
232}
233unsafe fn from_buffer_boolean_impl(
234 pointer: usize,
235 offset: usize,
236 length: usize,
237 owner: Py<PyAny>,
238) -> PyResult<Box<dyn Array>> {
239 let length_in_bytes = get_boolean_buffer_length_in_bytes(length, offset);
240
241 let pointer = pointer as *const u8;
242 let slice = unsafe { std::slice::from_raw_parts(pointer, length_in_bytes) };
243 let arr_result = unsafe { arrow::ffi::mmap::bitmap_and_owner(slice, offset, length, owner) };
244 let arr = arr_result.map_err(PyPolarsErr::from)?;
245 Ok(arr.to_boxed())
246}
247fn get_boolean_buffer_length_in_bytes(length: usize, offset: usize) -> usize {
248 let n_bits = offset + length;
249 let n_bytes = n_bits / 8;
250 let rest = n_bits % 8;
251 if rest == 0 { n_bytes } else { n_bytes + 1 }
252}
253
254#[pymethods]
255impl PySeries {
256 #[staticmethod]
258 #[pyo3(signature = (dtype, data, validity=None))]
259 unsafe fn _from_buffers(
260 py: Python<'_>,
261 dtype: Wrap<DataType>,
262 data: Vec<PySeries>,
263 validity: Option<PySeries>,
264 ) -> PyResult<Self> {
265 let dtype = dtype.0;
266 let mut data = data.to_series();
267
268 match data.len() {
269 0 => {
270 let msg = "`data` input to `_from_buffers` must contain at least one buffer";
271 return Err(PyTypeError::new_err(msg));
272 },
273 1 if validity.is_none() => {
274 let values = data.pop().unwrap();
275 let s = values.strict_cast(&dtype).map_err(PyPolarsErr::from)?;
276 return Ok(s.into());
277 },
278 _ => (),
279 }
280
281 let validity = match validity {
282 Some(s) => {
283 let dtype = s.series.dtype();
284 if !dtype.is_bool() {
285 let msg = format!("validity buffer must have data type Boolean, got {dtype:?}");
286 return Err(PyTypeError::new_err(msg));
287 }
288 Some(series_to_bitmap(s.series).unwrap())
289 },
290 None => None,
291 };
292
293 let s = match dtype.to_physical() {
294 dt if dt.is_primitive_numeric() => {
295 let values = data.into_iter().next().unwrap();
296 with_match_physical_numeric_polars_type!(dt, |$T| {
297 let values_buffer = series_to_buffer::<$T>(values);
298 from_buffers_num_impl::<<$T as PolarsNumericType>::Native>(values_buffer, validity)?
299 })
300 },
301 DataType::Boolean => {
302 let values = data.into_iter().next().unwrap();
303 let values_buffer = series_to_bitmap(values)?;
304 from_buffers_bool_impl(values_buffer, validity)?
305 },
306 DataType::String => {
307 let mut data_iter = data.into_iter();
308 let values = data_iter.next().unwrap();
309 let offsets = match data_iter.next() {
310 Some(s) => {
311 let dtype = s.dtype();
312 if !matches!(dtype, DataType::Int64) {
313 return Err(PyTypeError::new_err(format!(
314 "offsets buffer must have data type Int64, got {dtype:?}"
315 )));
316 }
317 series_to_offsets(s)
318 },
319 None => {
320 return Err(PyTypeError::new_err(
321 "`_from_buffers` cannot create a String column without an offsets buffer",
322 ));
323 },
324 };
325 let values = series_to_buffer::<UInt8Type>(values);
326 py.enter_polars(|| from_buffers_string_impl(values, validity, offsets))?
327 },
328 dt => {
329 let msg = format!("`_from_buffers` not implemented for `dtype` {dt}");
330 return Err(PyTypeError::new_err(msg));
331 },
332 };
333
334 let out = s.strict_cast(&dtype).map_err(PyPolarsErr::from)?;
335 Ok(out.into())
336 }
337}
338
339fn series_to_buffer<T>(s: Series) -> Buffer<T::Native>
340where
341 T: PolarsNumericType,
342{
343 let ca: &ChunkedArray<T> = s.as_ref().as_ref();
344 let ca = ca.rechunk();
345 ca.downcast_as_array().values().clone()
346}
347fn series_to_bitmap(s: Series) -> PyResult<Bitmap> {
348 let ca_result = s.bool();
349 let ca = ca_result.map_err(PyPolarsErr::from)?.rechunk();
350 Ok(ca.downcast_as_array().values().clone())
351}
352fn series_to_offsets(s: Series) -> OffsetsBuffer<i64> {
353 let buffer = series_to_buffer::<Int64Type>(s);
354 unsafe { OffsetsBuffer::new_unchecked(buffer) }
355}
356
357fn from_buffers_num_impl<T: NativeType>(
358 data: Buffer<T>,
359 validity: Option<Bitmap>,
360) -> PyResult<Series> {
361 let arr = PrimitiveArray::new(T::PRIMITIVE.into(), data, validity);
362 let s_result = Series::from_arrow(PlSmallStr::EMPTY, arr.to_boxed());
363 let s = s_result.map_err(PyPolarsErr::from)?;
364 Ok(s)
365}
366fn from_buffers_bool_impl(data: Bitmap, validity: Option<Bitmap>) -> PyResult<Series> {
367 let arr = BooleanArray::new(ArrowDataType::Boolean, data, validity);
368 let s_result = Series::from_arrow(PlSmallStr::EMPTY, arr.to_boxed());
369 let s = s_result.map_err(PyPolarsErr::from)?;
370 Ok(s)
371}
372fn from_buffers_string_impl(
376 data: Buffer<u8>,
377 validity: Option<Bitmap>,
378 offsets: OffsetsBuffer<i64>,
379) -> PyResult<Series> {
380 let arr = Utf8Array::new(ArrowDataType::LargeUtf8, offsets, data, validity);
381
382 let s_result = Series::from_arrow(PlSmallStr::EMPTY, arr.to_boxed());
384
385 let s = s_result.map_err(PyPolarsErr::from)?;
386 Ok(s)
387}