polars_python/series/
general.rs

1use polars_core::chunked_array::cast::CastOptions;
2use polars_core::series::IsSorted;
3use polars_core::utils::flatten::flatten_series;
4use polars_row::RowEncodingOptions;
5use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyValueError};
6use pyo3::prelude::*;
7use pyo3::types::PyBytes;
8use pyo3::{IntoPyObjectExt, Python};
9
10use self::row_encode::get_row_encoding_context;
11use super::PySeries;
12use crate::dataframe::PyDataFrame;
13use crate::error::PyPolarsErr;
14use crate::prelude::*;
15use crate::py_modules::polars;
16
17#[pymethods]
18impl PySeries {
19    fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
20        let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
21        let df: DataFrame = py.allow_threads(|| ca.clone().unnest());
22        Ok(df.into())
23    }
24
25    fn struct_fields(&self) -> PyResult<Vec<&str>> {
26        let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
27        Ok(ca
28            .struct_fields()
29            .iter()
30            .map(|s| s.name().as_str())
31            .collect())
32    }
33
34    fn is_sorted_ascending_flag(&self) -> bool {
35        matches!(self.series.is_sorted_flag(), IsSorted::Ascending)
36    }
37
38    fn is_sorted_descending_flag(&self) -> bool {
39        matches!(self.series.is_sorted_flag(), IsSorted::Descending)
40    }
41
42    fn can_fast_explode_flag(&self) -> bool {
43        match self.series.list() {
44            Err(_) => false,
45            Ok(list) => list._can_fast_explode(),
46        }
47    }
48
49    pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {
50        let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
51        Ok(ca.uses_lexical_ordering())
52    }
53
54    pub fn cat_is_local(&self) -> PyResult<bool> {
55        let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
56        Ok(ca.get_rev_map().is_local())
57    }
58
59    pub fn cat_to_local(&self, py: Python) -> PyResult<Self> {
60        let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
61        Ok(py.allow_threads(|| ca.to_local().into_series().into()))
62    }
63
64    fn estimated_size(&self) -> usize {
65        self.series.estimated_size()
66    }
67
68    #[cfg(feature = "object")]
69    fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {
70        if matches!(self.series.dtype(), DataType::Object(_, _)) {
71            let obj: Option<&ObjectValue> = self.series.get_object(index).map(|any| any.into());
72            Ok(obj.into_pyobject(py)?)
73        } else {
74            Ok(py.None().into_bound(py))
75        }
76    }
77
78    #[cfg(feature = "dtype-array")]
79    fn reshape(&self, py: Python, dims: Vec<i64>) -> PyResult<Self> {
80        let dims = dims
81            .into_iter()
82            .map(ReshapeDimension::new)
83            .collect::<Vec<_>>();
84
85        let out = py
86            .allow_threads(|| self.series.reshape_array(&dims))
87            .map_err(PyPolarsErr::from)?;
88        Ok(out.into())
89    }
90
91    /// Returns the string format of a single element of the Series.
92    fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {
93        let v = format!("{}", self.series.get(index).unwrap());
94        if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) =
95            self.series.dtype()
96        {
97            let v_no_quotes = &v[1..v.len() - 1];
98            let v_trunc = &v_no_quotes[..v_no_quotes
99                .char_indices()
100                .take(str_len_limit)
101                .last()
102                .map(|(i, c)| i + c.len_utf8())
103                .unwrap_or(0)];
104            if v_no_quotes == v_trunc {
105                v
106            } else {
107                format!("\"{v_trunc}…")
108            }
109        } else {
110            v
111        }
112    }
113
114    pub fn rechunk(&mut self, py: Python, in_place: bool) -> Option<Self> {
115        let series = py.allow_threads(|| self.series.rechunk());
116        if in_place {
117            self.series = series;
118            None
119        } else {
120            Some(series.into())
121        }
122    }
123
124    /// Get a value by index.
125    fn get_index(&self, py: Python, index: usize) -> PyResult<PyObject> {
126        let av = match self.series.get(index) {
127            Ok(v) => v,
128            Err(PolarsError::OutOfBounds(err)) => {
129                return Err(PyIndexError::new_err(err.to_string()))
130            },
131            Err(e) => return Err(PyPolarsErr::from(e).into()),
132        };
133
134        match av {
135            AnyValue::List(s) | AnyValue::Array(s, _) => {
136                let pyseries = PySeries::new(s);
137                polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))
138            },
139            _ => Wrap(av).into_py_any(py),
140        }
141    }
142
143    /// Get a value by index, allowing negative indices.
144    fn get_index_signed(&self, py: Python, index: isize) -> PyResult<PyObject> {
145        let index = if index < 0 {
146            match self.len().checked_sub(index.unsigned_abs()) {
147                Some(v) => v,
148                None => {
149                    return Err(PyIndexError::new_err(
150                        polars_err!(oob = index, self.len()).to_string(),
151                    ));
152                },
153            }
154        } else {
155            usize::try_from(index).unwrap()
156        };
157        self.get_index(py, index)
158    }
159
160    fn bitand(&self, py: Python, other: &PySeries) -> PyResult<Self> {
161        let out = py
162            .allow_threads(|| &self.series & &other.series)
163            .map_err(PyPolarsErr::from)?;
164        Ok(out.into())
165    }
166
167    fn bitor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
168        let out = py
169            .allow_threads(|| &self.series | &other.series)
170            .map_err(PyPolarsErr::from)?;
171        Ok(out.into())
172    }
173    fn bitxor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
174        let out = py
175            .allow_threads(|| &self.series ^ &other.series)
176            .map_err(PyPolarsErr::from)?;
177        Ok(out.into())
178    }
179
180    fn chunk_lengths(&self) -> Vec<usize> {
181        self.series.chunk_lengths().collect()
182    }
183
184    pub fn name(&self) -> &str {
185        self.series.name().as_str()
186    }
187
188    fn rename(&mut self, name: &str) {
189        self.series.rename(name.into());
190    }
191
192    fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
193        Wrap(self.series.dtype().clone()).into_pyobject(py)
194    }
195
196    fn set_sorted_flag(&self, descending: bool) -> Self {
197        let mut out = self.series.clone();
198        if descending {
199            out.set_sorted_flag(IsSorted::Descending);
200        } else {
201            out.set_sorted_flag(IsSorted::Ascending)
202        }
203        out.into()
204    }
205
206    fn n_chunks(&self) -> usize {
207        self.series.n_chunks()
208    }
209
210    fn append(&mut self, other: &PySeries) -> PyResult<()> {
211        self.series
212            .append(&other.series)
213            .map_err(PyPolarsErr::from)?;
214        Ok(())
215    }
216
217    fn extend(&mut self, py: Python, other: &PySeries) -> PyResult<()> {
218        py.allow_threads(|| self.series.extend(&other.series))
219            .map_err(PyPolarsErr::from)?;
220        Ok(())
221    }
222
223    fn new_from_index(&self, py: Python, index: usize, length: usize) -> PyResult<Self> {
224        if index >= self.series.len() {
225            Err(PyValueError::new_err("index is out of bounds"))
226        } else {
227            Ok(py.allow_threads(|| self.series.new_from_index(index, length).into()))
228        }
229    }
230
231    fn filter(&self, py: Python, filter: &PySeries) -> PyResult<Self> {
232        let filter_series = &filter.series;
233        if let Ok(ca) = filter_series.bool() {
234            let series = py
235                .allow_threads(|| self.series.filter(ca))
236                .map_err(PyPolarsErr::from)?;
237            Ok(PySeries { series })
238        } else {
239            Err(PyRuntimeError::new_err("Expected a boolean mask"))
240        }
241    }
242
243    fn sort(
244        &mut self,
245        py: Python,
246        descending: bool,
247        nulls_last: bool,
248        multithreaded: bool,
249    ) -> PyResult<Self> {
250        Ok(py
251            .allow_threads(|| {
252                self.series.sort(
253                    SortOptions::default()
254                        .with_order_descending(descending)
255                        .with_nulls_last(nulls_last)
256                        .with_multithreaded(multithreaded),
257                )
258            })
259            .map_err(PyPolarsErr::from)?
260            .into())
261    }
262
263    fn gather_with_series(&self, py: Python, indices: &PySeries) -> PyResult<Self> {
264        py.allow_threads(|| {
265            let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
266            let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
267            Ok(s.into())
268        })
269    }
270
271    fn null_count(&self) -> PyResult<usize> {
272        Ok(self.series.null_count())
273    }
274
275    fn has_nulls(&self) -> bool {
276        self.series.has_nulls()
277    }
278
279    fn equals(
280        &self,
281        py: Python,
282        other: &PySeries,
283        check_dtypes: bool,
284        check_names: bool,
285        null_equal: bool,
286    ) -> bool {
287        if check_dtypes && (self.series.dtype() != other.series.dtype()) {
288            return false;
289        }
290        if check_names && (self.series.name() != other.series.name()) {
291            return false;
292        }
293        if null_equal {
294            py.allow_threads(|| self.series.equals_missing(&other.series))
295        } else {
296            py.allow_threads(|| self.series.equals(&other.series))
297        }
298    }
299
300    fn as_str(&self) -> PyResult<String> {
301        Ok(format!("{:?}", self.series))
302    }
303
304    #[allow(clippy::len_without_is_empty)]
305    pub fn len(&self) -> usize {
306        self.series.len()
307    }
308
309    /// Rechunk and return a pointer to the start of the Series.
310    /// Only implemented for numeric types
311    fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
312        let ptr = py
313            .allow_threads(|| self.series.as_single_ptr())
314            .map_err(PyPolarsErr::from)?;
315        Ok(ptr)
316    }
317
318    fn clone(&self) -> Self {
319        self.series.clone().into()
320    }
321
322    fn zip_with(&self, py: Python, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
323        let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
324        let s = py
325            .allow_threads(|| self.series.zip_with(mask, &other.series))
326            .map_err(PyPolarsErr::from)?;
327        Ok(s.into())
328    }
329
330    #[pyo3(signature = (separator, drop_first=false))]
331    fn to_dummies(
332        &self,
333        py: Python,
334        separator: Option<&str>,
335        drop_first: bool,
336    ) -> PyResult<PyDataFrame> {
337        let df = py
338            .allow_threads(|| self.series.to_dummies(separator, drop_first))
339            .map_err(PyPolarsErr::from)?;
340        Ok(df.into())
341    }
342
343    fn get_list(&self, index: usize) -> Option<Self> {
344        let ca = self.series.list().ok()?;
345        Some(ca.get_as_series(index)?.into())
346    }
347
348    fn n_unique(&self, py: Python) -> PyResult<usize> {
349        let n = py
350            .allow_threads(|| self.series.n_unique())
351            .map_err(PyPolarsErr::from)?;
352        Ok(n)
353    }
354
355    fn floor(&self, py: Python) -> PyResult<Self> {
356        let s = py
357            .allow_threads(|| self.series.floor())
358            .map_err(PyPolarsErr::from)?;
359        Ok(s.into())
360    }
361
362    fn shrink_to_fit(&mut self, py: Python) {
363        py.allow_threads(|| self.series.shrink_to_fit());
364    }
365
366    fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
367        let lhs_dtype = self.series.dtype();
368        let rhs_dtype = other.series.dtype();
369
370        if !lhs_dtype.is_primitive_numeric() {
371            return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
372        };
373        if !rhs_dtype.is_primitive_numeric() {
374            return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
375        }
376
377        let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
378            py.allow_threads(|| (&self.series * &other.series)?.sum::<f64>())
379                .map_err(PyPolarsErr::from)?
380                .into()
381        } else {
382            py.allow_threads(|| (&self.series * &other.series)?.sum::<i64>())
383                .map_err(PyPolarsErr::from)?
384                .into()
385        };
386
387        Wrap(result).into_pyobject(py)
388    }
389
390    fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
391        // Used in pickle/pickling
392        Ok(PyBytes::new(
393            py,
394            &py.allow_threads(|| self.series.serialize_to_bytes().map_err(PyPolarsErr::from))?,
395        ))
396    }
397
398    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
399        // Used in pickle/pickling
400
401        use pyo3::pybacked::PyBackedBytes;
402        match state.extract::<PyBackedBytes>(py) {
403            Ok(s) => py.allow_threads(|| {
404                let s = Series::deserialize_from_reader(&mut &*s).map_err(PyPolarsErr::from)?;
405                self.series = s;
406                Ok(())
407            }),
408            Err(e) => Err(e),
409        }
410    }
411
412    fn skew(&self, py: Python, bias: bool) -> PyResult<Option<f64>> {
413        let out = py
414            .allow_threads(|| self.series.skew(bias))
415            .map_err(PyPolarsErr::from)?;
416        Ok(out)
417    }
418
419    fn kurtosis(&self, py: Python, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
420        let out = py
421            .allow_threads(|| self.series.kurtosis(fisher, bias))
422            .map_err(PyPolarsErr::from)?;
423        Ok(out)
424    }
425
426    fn cast(
427        &self,
428        py: Python,
429        dtype: Wrap<DataType>,
430        strict: bool,
431        wrap_numerical: bool,
432    ) -> PyResult<Self> {
433        let options = if wrap_numerical {
434            CastOptions::Overflowing
435        } else if strict {
436            CastOptions::Strict
437        } else {
438            CastOptions::NonStrict
439        };
440
441        let dtype = dtype.0;
442        let out = py.allow_threads(|| self.series.cast_with_options(&dtype, options));
443        let out = out.map_err(PyPolarsErr::from)?;
444        Ok(out.into())
445    }
446
447    fn get_chunks(&self) -> PyResult<Vec<PyObject>> {
448        Python::with_gil(|py| {
449            let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();
450            flatten_series(&self.series)
451                .into_iter()
452                .map(|s| wrap_s.call1(py, (Self::new(s),)))
453                .collect()
454        })
455    }
456
457    fn is_sorted(&self, py: Python, descending: bool, nulls_last: bool) -> PyResult<bool> {
458        let options = SortOptions {
459            descending,
460            nulls_last,
461            multithreaded: true,
462            maintain_order: false,
463            limit: None,
464        };
465        Ok(py
466            .allow_threads(|| self.series.is_sorted(options))
467            .map_err(PyPolarsErr::from)?)
468    }
469
470    fn clear(&self) -> Self {
471        self.series.clear().into()
472    }
473
474    fn head(&self, py: Python, n: usize) -> Self {
475        py.allow_threads(|| self.series.head(Some(n))).into()
476    }
477
478    fn tail(&self, py: Python, n: usize) -> Self {
479        py.allow_threads(|| self.series.tail(Some(n))).into()
480    }
481
482    fn value_counts(
483        &self,
484        py: Python,
485        sort: bool,
486        parallel: bool,
487        name: String,
488        normalize: bool,
489    ) -> PyResult<PyDataFrame> {
490        let out = py
491            .allow_threads(|| {
492                self.series
493                    .value_counts(sort, parallel, name.into(), normalize)
494            })
495            .map_err(PyPolarsErr::from)?;
496        Ok(out.into())
497    }
498
499    #[pyo3(signature = (offset, length=None))]
500    fn slice(&self, offset: i64, length: Option<usize>) -> Self {
501        let length = length.unwrap_or_else(|| self.series.len());
502        self.series.slice(offset, length).into()
503    }
504
505    pub fn not_(&self, py: Python) -> PyResult<Self> {
506        let out = py
507            .allow_threads(|| polars_ops::series::negate_bitwise(&self.series))
508            .map_err(PyPolarsErr::from)?;
509        Ok(out.into())
510    }
511
512    /// Internal utility function to allow direct access to the row encoding from python.
513    #[pyo3(signature = (dtypes, opts))]
514    fn _row_decode<'py>(
515        &'py self,
516        py: Python<'py>,
517        dtypes: Vec<(String, Wrap<DataType>)>,
518        opts: Vec<(bool, bool, bool)>,
519    ) -> PyResult<PyDataFrame> {
520        py.allow_threads(|| {
521            assert_eq!(dtypes.len(), opts.len());
522
523            let opts = opts
524                .into_iter()
525                .map(|(descending, nulls_last, no_order)| {
526                    let mut opt = RowEncodingOptions::default();
527
528                    opt.set(RowEncodingOptions::DESCENDING, descending);
529                    opt.set(RowEncodingOptions::NULLS_LAST, nulls_last);
530                    opt.set(RowEncodingOptions::NO_ORDER, no_order);
531
532                    opt
533                })
534                .collect::<Vec<_>>();
535
536            // The polars-row crate expects the physical arrow types.
537            let arrow_dtypes = dtypes
538                .iter()
539                .map(|(_, dtype)| dtype.0.to_physical().to_arrow(CompatLevel::newest()))
540                .collect::<Vec<_>>();
541
542            let dicts = dtypes
543                .iter()
544                .map(|(_, dtype)| get_row_encoding_context(&dtype.0))
545                .collect::<Vec<_>>();
546
547            // Get the BinaryOffset array.
548            let arr = self.series.rechunk();
549            let arr = arr.binary_offset().map_err(PyPolarsErr::from)?;
550            assert_eq!(arr.chunks().len(), 1);
551            let mut values = arr
552                .downcast_iter()
553                .next()
554                .unwrap()
555                .values_iter()
556                .collect::<Vec<&[u8]>>();
557
558            let columns = PyResult::Ok(unsafe {
559                polars_row::decode::decode_rows(&mut values, &opts, &dicts, &arrow_dtypes)
560            })?;
561
562            // Construct a DataFrame from the result.
563            let columns = columns
564                .into_iter()
565                .zip(dtypes)
566                .map(|(arr, (name, dtype))| unsafe {
567                    Series::from_chunks_and_dtype_unchecked(
568                        PlSmallStr::from(name),
569                        vec![arr],
570                        &dtype.0.to_physical(),
571                    )
572                    .into_column()
573                    .from_physical_unchecked(&dtype.0)
574                })
575                .collect::<PolarsResult<Vec<_>>>()
576                .map_err(PyPolarsErr::from)?;
577            Ok(DataFrame::new(columns).map_err(PyPolarsErr::from)?.into())
578        })
579    }
580}
581
582macro_rules! impl_set_with_mask {
583    ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
584        fn $name(
585            series: &Series,
586            filter: &PySeries,
587            value: Option<$native>,
588        ) -> PolarsResult<Series> {
589            let mask = filter.series.bool()?;
590            let ca = series.$cast()?;
591            let new = ca.set(mask, value)?;
592            Ok(new.into_series())
593        }
594
595        #[pymethods]
596        impl PySeries {
597            #[pyo3(signature = (filter, value))]
598            fn $name(
599                &self,
600                py: Python,
601                filter: &PySeries,
602                value: Option<$native>,
603            ) -> PyResult<Self> {
604                let series = py
605                    .allow_threads(|| $name(&self.series, filter, value))
606                    .map_err(PyPolarsErr::from)?;
607                Ok(Self::new(series))
608            }
609        }
610    };
611}
612
613impl_set_with_mask!(set_with_mask_str, &str, str, String);
614impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
615impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
616impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
617impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
618impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
619impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
620impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
621impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
622impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
623impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
624impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
625
626macro_rules! impl_get {
627    ($name:ident, $series_variant:ident, $type:ty) => {
628        #[pymethods]
629        impl PySeries {
630            fn $name(&self, index: i64) -> Option<$type> {
631                if let Ok(ca) = self.series.$series_variant() {
632                    let index = if index < 0 {
633                        (ca.len() as i64 + index) as usize
634                    } else {
635                        index as usize
636                    };
637                    ca.get(index)
638                } else {
639                    None
640                }
641            }
642        }
643    };
644}
645
646impl_get!(get_f32, f32, f32);
647impl_get!(get_f64, f64, f64);
648impl_get!(get_u8, u8, u8);
649impl_get!(get_u16, u16, u16);
650impl_get!(get_u32, u32, u32);
651impl_get!(get_u64, u64, u64);
652impl_get!(get_i8, i8, i8);
653impl_get!(get_i16, i16, i16);
654impl_get!(get_i32, i32, i32);
655impl_get!(get_i64, i64, i64);
656impl_get!(get_str, str, &str);
657impl_get!(get_date, date, i32);
658impl_get!(get_datetime, datetime, i64);
659impl_get!(get_duration, duration, i64);
660
661#[cfg(test)]
662mod test {
663    use super::*;
664    use crate::series::ToSeries;
665
666    #[test]
667    fn transmute_to_series() {
668        // NOTE: This is only possible because PySeries is #[repr(transparent)]
669        // https://doc.rust-lang.org/reference/type-layout.html
670        let ps = PySeries {
671            series: [1i32, 2, 3].iter().collect(),
672        };
673
674        let s = unsafe { std::mem::transmute::<PySeries, Series>(ps.clone()) };
675
676        assert_eq!(s.sum::<i32>().unwrap(), 6);
677        let collection = vec![ps];
678        let s = collection.to_series();
679        assert_eq!(
680            s.iter()
681                .map(|s| s.sum::<i32>().unwrap())
682                .collect::<Vec<_>>(),
683            vec![6]
684        );
685    }
686}