polars_python/series/
general.rs

1use polars_core::chunked_array::cast::CastOptions;
2use polars_core::series::IsSorted;
3use polars_core::utils::flatten::flatten_series;
4use polars_row::RowEncodingOptions;
5use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyValueError};
6use pyo3::prelude::*;
7use pyo3::types::PyBytes;
8use pyo3::{IntoPyObjectExt, Python};
9
10use self::row_encode::get_row_encoding_context;
11use super::PySeries;
12use crate::dataframe::PyDataFrame;
13use crate::error::PyPolarsErr;
14use crate::prelude::*;
15use crate::py_modules::polars;
16use crate::utils::EnterPolarsExt;
17
18#[pymethods]
19impl PySeries {
20    fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
21        py.enter_polars_df(|| Ok(self.series.struct_()?.clone().unnest()))
22    }
23
24    fn struct_fields(&self) -> PyResult<Vec<&str>> {
25        let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
26        Ok(ca
27            .struct_fields()
28            .iter()
29            .map(|s| s.name().as_str())
30            .collect())
31    }
32
33    fn is_sorted_ascending_flag(&self) -> bool {
34        matches!(self.series.is_sorted_flag(), IsSorted::Ascending)
35    }
36
37    fn is_sorted_descending_flag(&self) -> bool {
38        matches!(self.series.is_sorted_flag(), IsSorted::Descending)
39    }
40
41    fn can_fast_explode_flag(&self) -> bool {
42        match self.series.list() {
43            Err(_) => false,
44            Ok(list) => list._can_fast_explode(),
45        }
46    }
47
48    pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {
49        let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
50        Ok(ca.uses_lexical_ordering())
51    }
52
53    pub fn cat_is_local(&self) -> PyResult<bool> {
54        let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
55        Ok(ca.get_rev_map().is_local())
56    }
57
58    pub fn cat_to_local(&self, py: Python) -> PyResult<Self> {
59        py.enter_polars_series(|| Ok(self.series.categorical()?.to_local()))
60    }
61
62    fn estimated_size(&self) -> usize {
63        self.series.estimated_size()
64    }
65
66    #[cfg(feature = "object")]
67    fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {
68        if matches!(self.series.dtype(), DataType::Object(_)) {
69            let obj: Option<&ObjectValue> = self.series.get_object(index).map(|any| any.into());
70            Ok(obj.into_pyobject(py)?)
71        } else {
72            Ok(py.None().into_bound(py))
73        }
74    }
75
76    #[cfg(feature = "dtype-array")]
77    fn reshape(&self, py: Python<'_>, dims: Vec<i64>) -> PyResult<Self> {
78        let dims = dims
79            .into_iter()
80            .map(ReshapeDimension::new)
81            .collect::<Vec<_>>();
82
83        py.enter_polars_series(|| self.series.reshape_array(&dims))
84    }
85
86    /// Returns the string format of a single element of the Series.
87    fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {
88        let v = format!("{}", self.series.get(index).unwrap());
89        if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) =
90            self.series.dtype()
91        {
92            let v_no_quotes = &v[1..v.len() - 1];
93            let v_trunc = &v_no_quotes[..v_no_quotes
94                .char_indices()
95                .take(str_len_limit)
96                .last()
97                .map(|(i, c)| i + c.len_utf8())
98                .unwrap_or(0)];
99            if v_no_quotes == v_trunc {
100                v
101            } else {
102                format!("\"{v_trunc}…")
103            }
104        } else {
105            v
106        }
107    }
108
109    pub fn rechunk(&mut self, py: Python<'_>, in_place: bool) -> PyResult<Option<Self>> {
110        let series = py.enter_polars_ok(|| self.series.rechunk())?;
111        if in_place {
112            self.series = series;
113            Ok(None)
114        } else {
115            Ok(Some(series.into()))
116        }
117    }
118
119    /// Get a value by index.
120    fn get_index(&self, py: Python<'_>, index: usize) -> PyResult<PyObject> {
121        let av = match self.series.get(index) {
122            Ok(v) => v,
123            Err(PolarsError::OutOfBounds(err)) => {
124                return Err(PyIndexError::new_err(err.to_string()));
125            },
126            Err(e) => return Err(PyPolarsErr::from(e).into()),
127        };
128
129        match av {
130            AnyValue::List(s) | AnyValue::Array(s, _) => {
131                let pyseries = PySeries::new(s);
132                polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))
133            },
134            _ => Wrap(av).into_py_any(py),
135        }
136    }
137
138    /// Get a value by index, allowing negative indices.
139    fn get_index_signed(&self, py: Python<'_>, index: isize) -> PyResult<PyObject> {
140        let index = if index < 0 {
141            match self.len().checked_sub(index.unsigned_abs()) {
142                Some(v) => v,
143                None => {
144                    return Err(PyIndexError::new_err(
145                        polars_err!(oob = index, self.len()).to_string(),
146                    ));
147                },
148            }
149        } else {
150            usize::try_from(index).unwrap()
151        };
152        self.get_index(py, index)
153    }
154
155    fn bitand(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
156        py.enter_polars_series(|| &self.series & &other.series)
157    }
158
159    fn bitor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
160        py.enter_polars_series(|| &self.series | &other.series)
161    }
162
163    fn bitxor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
164        py.enter_polars_series(|| &self.series ^ &other.series)
165    }
166
167    fn chunk_lengths(&self) -> Vec<usize> {
168        self.series.chunk_lengths().collect()
169    }
170
171    pub fn name(&self) -> &str {
172        self.series.name().as_str()
173    }
174
175    fn rename(&mut self, name: &str) {
176        self.series.rename(name.into());
177    }
178
179    fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
180        Wrap(self.series.dtype().clone()).into_pyobject(py)
181    }
182
183    fn set_sorted_flag(&self, descending: bool) -> Self {
184        let mut out = self.series.clone();
185        if descending {
186            out.set_sorted_flag(IsSorted::Descending);
187        } else {
188            out.set_sorted_flag(IsSorted::Ascending)
189        }
190        out.into()
191    }
192
193    fn n_chunks(&self) -> usize {
194        self.series.n_chunks()
195    }
196
197    fn append(&mut self, other: &PySeries) -> PyResult<()> {
198        self.series
199            .append(&other.series)
200            .map_err(PyPolarsErr::from)?;
201        Ok(())
202    }
203
204    fn extend(&mut self, py: Python<'_>, other: &PySeries) -> PyResult<()> {
205        py.enter_polars(|| {
206            self.series.extend(&other.series)?;
207            PolarsResult::Ok(())
208        })
209    }
210
211    fn new_from_index(&self, py: Python<'_>, index: usize, length: usize) -> PyResult<Self> {
212        if index >= self.series.len() {
213            Err(PyValueError::new_err("index is out of bounds"))
214        } else {
215            py.enter_polars_series(|| Ok(self.series.new_from_index(index, length)))
216        }
217    }
218
219    fn filter(&self, py: Python<'_>, filter: &PySeries) -> PyResult<Self> {
220        let filter_series = &filter.series;
221        if let Ok(ca) = filter_series.bool() {
222            py.enter_polars_series(|| self.series.filter(ca))
223        } else {
224            Err(PyRuntimeError::new_err("Expected a boolean mask"))
225        }
226    }
227
228    fn sort(
229        &mut self,
230        py: Python<'_>,
231        descending: bool,
232        nulls_last: bool,
233        multithreaded: bool,
234    ) -> PyResult<Self> {
235        py.enter_polars_series(|| {
236            self.series.sort(
237                SortOptions::default()
238                    .with_order_descending(descending)
239                    .with_nulls_last(nulls_last)
240                    .with_multithreaded(multithreaded),
241            )
242        })
243    }
244
245    fn gather_with_series(&self, py: Python<'_>, indices: &PySeries) -> PyResult<Self> {
246        py.enter_polars_series(|| self.series.take(indices.series.idx()?))
247    }
248
249    fn null_count(&self) -> PyResult<usize> {
250        Ok(self.series.null_count())
251    }
252
253    fn has_nulls(&self) -> bool {
254        self.series.has_nulls()
255    }
256
257    fn equals(
258        &self,
259        py: Python<'_>,
260        other: &PySeries,
261        check_dtypes: bool,
262        check_names: bool,
263        null_equal: bool,
264    ) -> PyResult<bool> {
265        if check_dtypes && (self.series.dtype() != other.series.dtype()) {
266            return Ok(false);
267        }
268        if check_names && (self.series.name() != other.series.name()) {
269            return Ok(false);
270        }
271        if null_equal {
272            py.enter_polars_ok(|| self.series.equals_missing(&other.series))
273        } else {
274            py.enter_polars_ok(|| self.series.equals(&other.series))
275        }
276    }
277
278    fn as_str(&self) -> PyResult<String> {
279        Ok(format!("{:?}", self.series))
280    }
281
282    #[allow(clippy::len_without_is_empty)]
283    pub fn len(&self) -> usize {
284        self.series.len()
285    }
286
287    /// Rechunk and return a pointer to the start of the Series.
288    /// Only implemented for numeric types
289    fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
290        py.enter_polars(|| self.series.as_single_ptr())
291    }
292
293    fn clone(&self) -> Self {
294        self.series.clone().into()
295    }
296
297    fn zip_with(&self, py: Python<'_>, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
298        let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
299        py.enter_polars_series(|| self.series.zip_with(mask, &other.series))
300    }
301
302    #[pyo3(signature = (separator, drop_first=false))]
303    fn to_dummies(
304        &self,
305        py: Python<'_>,
306        separator: Option<&str>,
307        drop_first: bool,
308    ) -> PyResult<PyDataFrame> {
309        py.enter_polars_df(|| self.series.to_dummies(separator, drop_first))
310    }
311
312    fn get_list(&self, index: usize) -> Option<Self> {
313        let ca = self.series.list().ok()?;
314        Some(ca.get_as_series(index)?.into())
315    }
316
317    fn n_unique(&self, py: Python) -> PyResult<usize> {
318        py.enter_polars(|| self.series.n_unique())
319    }
320
321    fn floor(&self, py: Python) -> PyResult<Self> {
322        py.enter_polars_series(|| self.series.floor())
323    }
324
325    fn shrink_to_fit(&mut self, py: Python) -> PyResult<()> {
326        py.enter_polars_ok(|| self.series.shrink_to_fit())
327    }
328
329    fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
330        let lhs_dtype = self.series.dtype();
331        let rhs_dtype = other.series.dtype();
332
333        if !lhs_dtype.is_primitive_numeric() {
334            return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
335        };
336        if !rhs_dtype.is_primitive_numeric() {
337            return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
338        }
339
340        let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
341            py.enter_polars(|| (&self.series * &other.series)?.sum::<f64>())?
342                .into()
343        } else {
344            py.enter_polars(|| (&self.series * &other.series)?.sum::<i64>())?
345                .into()
346        };
347
348        Wrap(result).into_pyobject(py)
349    }
350
351    fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
352        // Used in pickle/pickling
353        Ok(PyBytes::new(
354            py,
355            &py.enter_polars(|| self.series.serialize_to_bytes())?,
356        ))
357    }
358
359    fn __setstate__(&mut self, py: Python<'_>, state: PyObject) -> PyResult<()> {
360        // Used in pickle/pickling
361
362        use pyo3::pybacked::PyBackedBytes;
363        match state.extract::<PyBackedBytes>(py) {
364            Ok(s) => py.enter_polars(|| {
365                let s = Series::deserialize_from_reader(&mut &*s)?;
366                self.series = s;
367                PolarsResult::Ok(())
368            }),
369            Err(e) => Err(e),
370        }
371    }
372
373    fn skew(&self, py: Python<'_>, bias: bool) -> PyResult<Option<f64>> {
374        py.enter_polars(|| self.series.skew(bias))
375    }
376
377    fn kurtosis(&self, py: Python<'_>, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
378        py.enter_polars(|| self.series.kurtosis(fisher, bias))
379    }
380
381    fn cast(
382        &self,
383        py: Python<'_>,
384        dtype: Wrap<DataType>,
385        strict: bool,
386        wrap_numerical: bool,
387    ) -> PyResult<Self> {
388        let options = if wrap_numerical {
389            CastOptions::Overflowing
390        } else if strict {
391            CastOptions::Strict
392        } else {
393            CastOptions::NonStrict
394        };
395        py.enter_polars_series(|| self.series.cast_with_options(&dtype.0, options))
396    }
397
398    fn get_chunks(&self) -> PyResult<Vec<PyObject>> {
399        Python::with_gil(|py| {
400            let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();
401            flatten_series(&self.series)
402                .into_iter()
403                .map(|s| wrap_s.call1(py, (Self::new(s),)))
404                .collect()
405        })
406    }
407
408    fn is_sorted(&self, py: Python<'_>, descending: bool, nulls_last: bool) -> PyResult<bool> {
409        let options = SortOptions {
410            descending,
411            nulls_last,
412            multithreaded: true,
413            maintain_order: false,
414            limit: None,
415        };
416        py.enter_polars(|| self.series.is_sorted(options))
417    }
418
419    fn clear(&self) -> Self {
420        self.series.clear().into()
421    }
422
423    fn head(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
424        py.enter_polars_series(|| Ok(self.series.head(Some(n))))
425    }
426
427    fn tail(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
428        py.enter_polars_series(|| Ok(self.series.tail(Some(n))))
429    }
430
431    fn value_counts(
432        &self,
433        py: Python<'_>,
434        sort: bool,
435        parallel: bool,
436        name: String,
437        normalize: bool,
438    ) -> PyResult<PyDataFrame> {
439        py.enter_polars_df(|| {
440            self.series
441                .value_counts(sort, parallel, name.into(), normalize)
442        })
443    }
444
445    #[pyo3(signature = (offset, length=None))]
446    fn slice(&self, offset: i64, length: Option<usize>) -> Self {
447        let length = length.unwrap_or_else(|| self.series.len());
448        self.series.slice(offset, length).into()
449    }
450
451    pub fn not_(&self, py: Python) -> PyResult<Self> {
452        py.enter_polars_series(|| polars_ops::series::negate_bitwise(&self.series))
453    }
454
455    /// Internal utility function to allow direct access to the row encoding from python.
456    #[pyo3(signature = (dtypes, opts))]
457    fn _row_decode(
458        &self,
459        py: Python<'_>,
460        dtypes: Vec<(String, Wrap<DataType>)>,
461        opts: Vec<(bool, bool, bool)>,
462    ) -> PyResult<PyDataFrame> {
463        py.enter_polars_df(|| {
464            assert_eq!(dtypes.len(), opts.len());
465
466            let opts = opts
467                .into_iter()
468                .map(|(descending, nulls_last, no_order)| {
469                    let mut opt = RowEncodingOptions::default();
470
471                    opt.set(RowEncodingOptions::DESCENDING, descending);
472                    opt.set(RowEncodingOptions::NULLS_LAST, nulls_last);
473                    opt.set(RowEncodingOptions::NO_ORDER, no_order);
474
475                    opt
476                })
477                .collect::<Vec<_>>();
478
479            // The polars-row crate expects the physical arrow types.
480            let arrow_dtypes = dtypes
481                .iter()
482                .map(|(_, dtype)| dtype.0.to_physical().to_arrow(CompatLevel::newest()))
483                .collect::<Vec<_>>();
484
485            let dicts = dtypes
486                .iter()
487                .map(|(_, dt)| dt)
488                .zip(opts.iter())
489                .map(|(dtype, opts)| get_row_encoding_context(&dtype.0, opts.is_ordered()))
490                .collect::<Vec<_>>();
491
492            // Get the BinaryOffset array.
493            let arr = self.series.rechunk();
494            let arr = arr.binary_offset()?;
495            assert_eq!(arr.chunks().len(), 1);
496            let mut values = arr
497                .downcast_iter()
498                .next()
499                .unwrap()
500                .values_iter()
501                .collect::<Vec<&[u8]>>();
502
503            let columns = unsafe {
504                polars_row::decode::decode_rows(&mut values, &opts, &dicts, &arrow_dtypes)
505            };
506
507            // Construct a DataFrame from the result.
508            let columns = columns
509                .into_iter()
510                .zip(dtypes)
511                .map(|(arr, (name, dtype))| unsafe {
512                    Series::from_chunks_and_dtype_unchecked(
513                        PlSmallStr::from(name),
514                        vec![arr],
515                        &dtype.0.to_physical(),
516                    )
517                    .into_column()
518                    .from_physical_unchecked(&dtype.0)
519                })
520                .collect::<PolarsResult<Vec<_>>>()?;
521            DataFrame::new(columns)
522        })
523    }
524}
525
526macro_rules! impl_set_with_mask {
527    ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
528        fn $name(
529            series: &Series,
530            filter: &PySeries,
531            value: Option<$native>,
532        ) -> PolarsResult<Series> {
533            let mask = filter.series.bool()?;
534            let ca = series.$cast()?;
535            let new = ca.set(mask, value)?;
536            Ok(new.into_series())
537        }
538
539        #[pymethods]
540        impl PySeries {
541            #[pyo3(signature = (filter, value))]
542            fn $name(
543                &self,
544                py: Python<'_>,
545                filter: &PySeries,
546                value: Option<$native>,
547            ) -> PyResult<Self> {
548                py.enter_polars_series(|| $name(&self.series, filter, value))
549            }
550        }
551    };
552}
553
554impl_set_with_mask!(set_with_mask_str, &str, str, String);
555impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
556impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
557impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
558impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
559impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
560impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
561impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
562impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
563impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
564impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
565impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
566
567macro_rules! impl_get {
568    ($name:ident, $series_variant:ident, $type:ty) => {
569        #[pymethods]
570        impl PySeries {
571            fn $name(&self, index: i64) -> Option<$type> {
572                if let Ok(ca) = self.series.$series_variant() {
573                    let index = if index < 0 {
574                        (ca.len() as i64 + index) as usize
575                    } else {
576                        index as usize
577                    };
578                    ca.get(index)
579                } else {
580                    None
581                }
582            }
583        }
584    };
585}
586
587impl_get!(get_f32, f32, f32);
588impl_get!(get_f64, f64, f64);
589impl_get!(get_u8, u8, u8);
590impl_get!(get_u16, u16, u16);
591impl_get!(get_u32, u32, u32);
592impl_get!(get_u64, u64, u64);
593impl_get!(get_i8, i8, i8);
594impl_get!(get_i16, i16, i16);
595impl_get!(get_i32, i32, i32);
596impl_get!(get_i64, i64, i64);
597impl_get!(get_str, str, &str);
598impl_get!(get_date, date, i32);
599impl_get!(get_datetime, datetime, i64);
600impl_get!(get_duration, duration, i64);
601
602#[cfg(test)]
603mod test {
604    use super::*;
605    use crate::series::ToSeries;
606
607    #[test]
608    fn transmute_to_series() {
609        // NOTE: This is only possible because PySeries is #[repr(transparent)]
610        // https://doc.rust-lang.org/reference/type-layout.html
611        let ps = PySeries {
612            series: [1i32, 2, 3].iter().collect(),
613        };
614
615        let s = unsafe { std::mem::transmute::<PySeries, Series>(ps.clone()) };
616
617        assert_eq!(s.sum::<i32>().unwrap(), 6);
618        let collection = vec![ps];
619        let s = collection.to_series();
620        assert_eq!(
621            s.iter()
622                .map(|s| s.sum::<i32>().unwrap())
623                .collect::<Vec<_>>(),
624            vec![6]
625        );
626    }
627}