polars_python/series/
general.rs

1use polars_core::chunked_array::cast::CastOptions;
2use polars_core::series::IsSorted;
3use polars_core::utils::flatten::flatten_series;
4use polars_row::RowEncodingOptions;
5use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyValueError};
6use pyo3::prelude::*;
7use pyo3::types::PyBytes;
8use pyo3::{IntoPyObjectExt, Python};
9
10use self::row_encode::get_row_encoding_context;
11use super::PySeries;
12use crate::dataframe::PyDataFrame;
13use crate::error::PyPolarsErr;
14use crate::prelude::*;
15use crate::py_modules::polars;
16use crate::utils::EnterPolarsExt;
17
18#[pymethods]
19impl PySeries {
20    fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
21        py.enter_polars_df(|| Ok(self.series.struct_()?.clone().unnest()))
22    }
23
24    fn struct_fields(&self) -> PyResult<Vec<&str>> {
25        let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
26        Ok(ca
27            .struct_fields()
28            .iter()
29            .map(|s| s.name().as_str())
30            .collect())
31    }
32
33    fn is_sorted_ascending_flag(&self) -> bool {
34        matches!(self.series.is_sorted_flag(), IsSorted::Ascending)
35    }
36
37    fn is_sorted_descending_flag(&self) -> bool {
38        matches!(self.series.is_sorted_flag(), IsSorted::Descending)
39    }
40
41    fn can_fast_explode_flag(&self) -> bool {
42        match self.series.list() {
43            Err(_) => false,
44            Ok(list) => list._can_fast_explode(),
45        }
46    }
47
48    pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {
49        Ok(true)
50    }
51
52    pub fn cat_is_local(&self) -> PyResult<bool> {
53        Ok(false)
54    }
55
56    pub fn cat_to_local(&self, _py: Python) -> PyResult<Self> {
57        Ok(self.clone())
58    }
59
60    fn estimated_size(&self) -> usize {
61        self.series.estimated_size()
62    }
63
64    #[cfg(feature = "object")]
65    fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {
66        if matches!(self.series.dtype(), DataType::Object(_)) {
67            let obj: Option<&ObjectValue> = self.series.get_object(index).map(|any| any.into());
68            Ok(obj.into_pyobject(py)?)
69        } else {
70            Ok(py.None().into_bound(py))
71        }
72    }
73
74    #[cfg(feature = "dtype-array")]
75    fn reshape(&self, py: Python<'_>, dims: Vec<i64>) -> PyResult<Self> {
76        let dims = dims
77            .into_iter()
78            .map(ReshapeDimension::new)
79            .collect::<Vec<_>>();
80
81        py.enter_polars_series(|| self.series.reshape_array(&dims))
82    }
83
84    /// Returns the string format of a single element of the Series.
85    fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {
86        let v = format!("{}", self.series.get(index).unwrap());
87        if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) =
88            self.series.dtype()
89        {
90            let v_no_quotes = &v[1..v.len() - 1];
91            let v_trunc = &v_no_quotes[..v_no_quotes
92                .char_indices()
93                .take(str_len_limit)
94                .last()
95                .map(|(i, c)| i + c.len_utf8())
96                .unwrap_or(0)];
97            if v_no_quotes == v_trunc {
98                v
99            } else {
100                format!("\"{v_trunc}…")
101            }
102        } else {
103            v
104        }
105    }
106
107    pub fn rechunk(&mut self, py: Python<'_>, in_place: bool) -> PyResult<Option<Self>> {
108        let series = py.enter_polars_ok(|| self.series.rechunk())?;
109        if in_place {
110            self.series = series;
111            Ok(None)
112        } else {
113            Ok(Some(series.into()))
114        }
115    }
116
117    /// Get a value by index.
118    fn get_index(&self, py: Python<'_>, index: usize) -> PyResult<PyObject> {
119        let av = match self.series.get(index) {
120            Ok(v) => v,
121            Err(PolarsError::OutOfBounds(err)) => {
122                return Err(PyIndexError::new_err(err.to_string()));
123            },
124            Err(e) => return Err(PyPolarsErr::from(e).into()),
125        };
126
127        match av {
128            AnyValue::List(s) | AnyValue::Array(s, _) => {
129                let pyseries = PySeries::new(s);
130                polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))
131            },
132            _ => Wrap(av).into_py_any(py),
133        }
134    }
135
136    /// Get a value by index, allowing negative indices.
137    fn get_index_signed(&self, py: Python<'_>, index: isize) -> PyResult<PyObject> {
138        let index = if index < 0 {
139            match self.len().checked_sub(index.unsigned_abs()) {
140                Some(v) => v,
141                None => {
142                    return Err(PyIndexError::new_err(
143                        polars_err!(oob = index, self.len()).to_string(),
144                    ));
145                },
146            }
147        } else {
148            usize::try_from(index).unwrap()
149        };
150        self.get_index(py, index)
151    }
152
153    fn bitand(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
154        py.enter_polars_series(|| &self.series & &other.series)
155    }
156
157    fn bitor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
158        py.enter_polars_series(|| &self.series | &other.series)
159    }
160
161    fn bitxor(&self, py: Python<'_>, other: &PySeries) -> PyResult<Self> {
162        py.enter_polars_series(|| &self.series ^ &other.series)
163    }
164
165    fn chunk_lengths(&self) -> Vec<usize> {
166        self.series.chunk_lengths().collect()
167    }
168
169    pub fn name(&self) -> &str {
170        self.series.name().as_str()
171    }
172
173    fn rename(&mut self, name: &str) {
174        self.series.rename(name.into());
175    }
176
177    fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
178        Wrap(self.series.dtype().clone()).into_pyobject(py)
179    }
180
181    fn set_sorted_flag(&self, descending: bool) -> Self {
182        let mut out = self.series.clone();
183        if descending {
184            out.set_sorted_flag(IsSorted::Descending);
185        } else {
186            out.set_sorted_flag(IsSorted::Ascending)
187        }
188        out.into()
189    }
190
191    fn n_chunks(&self) -> usize {
192        self.series.n_chunks()
193    }
194
195    fn append(&mut self, other: &PySeries) -> PyResult<()> {
196        self.series
197            .append(&other.series)
198            .map_err(PyPolarsErr::from)?;
199        Ok(())
200    }
201
202    fn extend(&mut self, py: Python<'_>, other: &PySeries) -> PyResult<()> {
203        py.enter_polars(|| {
204            self.series.extend(&other.series)?;
205            PolarsResult::Ok(())
206        })
207    }
208
209    fn new_from_index(&self, py: Python<'_>, index: usize, length: usize) -> PyResult<Self> {
210        if index >= self.series.len() {
211            Err(PyValueError::new_err("index is out of bounds"))
212        } else {
213            py.enter_polars_series(|| Ok(self.series.new_from_index(index, length)))
214        }
215    }
216
217    fn filter(&self, py: Python<'_>, filter: &PySeries) -> PyResult<Self> {
218        let filter_series = &filter.series;
219        if let Ok(ca) = filter_series.bool() {
220            py.enter_polars_series(|| self.series.filter(ca))
221        } else {
222            Err(PyRuntimeError::new_err("Expected a boolean mask"))
223        }
224    }
225
226    fn sort(
227        &mut self,
228        py: Python<'_>,
229        descending: bool,
230        nulls_last: bool,
231        multithreaded: bool,
232    ) -> PyResult<Self> {
233        py.enter_polars_series(|| {
234            self.series.sort(
235                SortOptions::default()
236                    .with_order_descending(descending)
237                    .with_nulls_last(nulls_last)
238                    .with_multithreaded(multithreaded),
239            )
240        })
241    }
242
243    fn gather_with_series(&self, py: Python<'_>, indices: &PySeries) -> PyResult<Self> {
244        py.enter_polars_series(|| self.series.take(indices.series.idx()?))
245    }
246
247    fn null_count(&self) -> PyResult<usize> {
248        Ok(self.series.null_count())
249    }
250
251    fn has_nulls(&self) -> bool {
252        self.series.has_nulls()
253    }
254
255    fn equals(
256        &self,
257        py: Python<'_>,
258        other: &PySeries,
259        check_dtypes: bool,
260        check_names: bool,
261        null_equal: bool,
262    ) -> PyResult<bool> {
263        if check_dtypes && (self.series.dtype() != other.series.dtype()) {
264            return Ok(false);
265        }
266        if check_names && (self.series.name() != other.series.name()) {
267            return Ok(false);
268        }
269        if null_equal {
270            py.enter_polars_ok(|| self.series.equals_missing(&other.series))
271        } else {
272            py.enter_polars_ok(|| self.series.equals(&other.series))
273        }
274    }
275
276    fn as_str(&self) -> PyResult<String> {
277        Ok(format!("{:?}", self.series))
278    }
279
280    #[allow(clippy::len_without_is_empty)]
281    pub fn len(&self) -> usize {
282        self.series.len()
283    }
284
285    /// Rechunk and return a pointer to the start of the Series.
286    /// Only implemented for numeric types
287    fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
288        py.enter_polars(|| self.series.as_single_ptr())
289    }
290
291    fn clone(&self) -> Self {
292        self.series.clone().into()
293    }
294
295    fn zip_with(&self, py: Python<'_>, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
296        let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
297        py.enter_polars_series(|| self.series.zip_with(mask, &other.series))
298    }
299
300    #[pyo3(signature = (separator, drop_first=false, drop_nulls=false))]
301    fn to_dummies(
302        &self,
303        py: Python<'_>,
304        separator: Option<&str>,
305        drop_first: bool,
306        drop_nulls: bool,
307    ) -> PyResult<PyDataFrame> {
308        py.enter_polars_df(|| self.series.to_dummies(separator, drop_first, drop_nulls))
309    }
310
311    fn get_list(&self, index: usize) -> Option<Self> {
312        let ca = self.series.list().ok()?;
313        Some(ca.get_as_series(index)?.into())
314    }
315
316    fn n_unique(&self, py: Python) -> PyResult<usize> {
317        py.enter_polars(|| self.series.n_unique())
318    }
319
320    fn floor(&self, py: Python) -> PyResult<Self> {
321        py.enter_polars_series(|| self.series.floor())
322    }
323
324    fn shrink_to_fit(&mut self, py: Python) -> PyResult<()> {
325        py.enter_polars_ok(|| self.series.shrink_to_fit())
326    }
327
328    fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
329        let lhs_dtype = self.series.dtype();
330        let rhs_dtype = other.series.dtype();
331
332        if !lhs_dtype.is_primitive_numeric() {
333            return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
334        };
335        if !rhs_dtype.is_primitive_numeric() {
336            return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
337        }
338
339        let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
340            py.enter_polars(|| (&self.series * &other.series)?.sum::<f64>())?
341                .into()
342        } else {
343            py.enter_polars(|| (&self.series * &other.series)?.sum::<i64>())?
344                .into()
345        };
346
347        Wrap(result).into_pyobject(py)
348    }
349
350    fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
351        // Used in pickle/pickling
352        Ok(PyBytes::new(
353            py,
354            &py.enter_polars(|| self.series.serialize_to_bytes())?,
355        ))
356    }
357
358    fn __setstate__(&mut self, py: Python<'_>, state: PyObject) -> PyResult<()> {
359        // Used in pickle/pickling
360
361        use pyo3::pybacked::PyBackedBytes;
362        match state.extract::<PyBackedBytes>(py) {
363            Ok(s) => py.enter_polars(|| {
364                let s = Series::deserialize_from_reader(&mut &*s)?;
365                self.series = s;
366                PolarsResult::Ok(())
367            }),
368            Err(e) => Err(e),
369        }
370    }
371
372    fn skew(&self, py: Python<'_>, bias: bool) -> PyResult<Option<f64>> {
373        py.enter_polars(|| self.series.skew(bias))
374    }
375
376    fn kurtosis(&self, py: Python<'_>, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
377        py.enter_polars(|| self.series.kurtosis(fisher, bias))
378    }
379
380    fn cast(
381        &self,
382        py: Python<'_>,
383        dtype: Wrap<DataType>,
384        strict: bool,
385        wrap_numerical: bool,
386    ) -> PyResult<Self> {
387        let options = if wrap_numerical {
388            CastOptions::Overflowing
389        } else if strict {
390            CastOptions::Strict
391        } else {
392            CastOptions::NonStrict
393        };
394        py.enter_polars_series(|| self.series.cast_with_options(&dtype.0, options))
395    }
396
397    fn get_chunks(&self) -> PyResult<Vec<PyObject>> {
398        Python::with_gil(|py| {
399            let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();
400            flatten_series(&self.series)
401                .into_iter()
402                .map(|s| wrap_s.call1(py, (Self::new(s),)))
403                .collect()
404        })
405    }
406
407    fn is_sorted(&self, py: Python<'_>, descending: bool, nulls_last: bool) -> PyResult<bool> {
408        let options = SortOptions {
409            descending,
410            nulls_last,
411            multithreaded: true,
412            maintain_order: false,
413            limit: None,
414        };
415        py.enter_polars(|| self.series.is_sorted(options))
416    }
417
418    fn clear(&self) -> Self {
419        self.series.clear().into()
420    }
421
422    fn head(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
423        py.enter_polars_series(|| Ok(self.series.head(Some(n))))
424    }
425
426    fn tail(&self, py: Python<'_>, n: usize) -> PyResult<Self> {
427        py.enter_polars_series(|| Ok(self.series.tail(Some(n))))
428    }
429
430    fn value_counts(
431        &self,
432        py: Python<'_>,
433        sort: bool,
434        parallel: bool,
435        name: String,
436        normalize: bool,
437    ) -> PyResult<PyDataFrame> {
438        py.enter_polars_df(|| {
439            self.series
440                .value_counts(sort, parallel, name.into(), normalize)
441        })
442    }
443
444    #[pyo3(signature = (offset, length=None))]
445    fn slice(&self, offset: i64, length: Option<usize>) -> Self {
446        let length = length.unwrap_or_else(|| self.series.len());
447        self.series.slice(offset, length).into()
448    }
449
450    pub fn not_(&self, py: Python) -> PyResult<Self> {
451        py.enter_polars_series(|| polars_ops::series::negate_bitwise(&self.series))
452    }
453
454    /// Internal utility function to allow direct access to the row encoding from python.
455    #[pyo3(signature = (dtypes, opts))]
456    fn _row_decode(
457        &self,
458        py: Python<'_>,
459        dtypes: Vec<(String, Wrap<DataType>)>,
460        opts: Vec<(bool, bool, bool)>,
461    ) -> PyResult<PyDataFrame> {
462        py.enter_polars_df(|| {
463            assert_eq!(dtypes.len(), opts.len());
464
465            let opts = opts
466                .into_iter()
467                .map(|(descending, nulls_last, no_order)| {
468                    let mut opt = RowEncodingOptions::default();
469
470                    opt.set(RowEncodingOptions::DESCENDING, descending);
471                    opt.set(RowEncodingOptions::NULLS_LAST, nulls_last);
472                    opt.set(RowEncodingOptions::NO_ORDER, no_order);
473
474                    opt
475                })
476                .collect::<Vec<_>>();
477
478            // The polars-row crate expects the physical arrow types.
479            let arrow_dtypes = dtypes
480                .iter()
481                .map(|(_, dtype)| dtype.0.to_physical().to_arrow(CompatLevel::newest()))
482                .collect::<Vec<_>>();
483
484            let dicts = dtypes
485                .iter()
486                .map(|(_, dtype)| get_row_encoding_context(&dtype.0))
487                .collect::<Vec<_>>();
488
489            // Get the BinaryOffset array.
490            let arr = self.series.rechunk();
491            let arr = arr.binary_offset()?;
492            assert_eq!(arr.chunks().len(), 1);
493            let mut values = arr
494                .downcast_iter()
495                .next()
496                .unwrap()
497                .values_iter()
498                .collect::<Vec<&[u8]>>();
499
500            let columns = unsafe {
501                polars_row::decode::decode_rows(&mut values, &opts, &dicts, &arrow_dtypes)
502            };
503
504            // Construct a DataFrame from the result.
505            let columns = columns
506                .into_iter()
507                .zip(dtypes)
508                .map(|(arr, (name, dtype))| unsafe {
509                    Series::from_chunks_and_dtype_unchecked(
510                        PlSmallStr::from(name),
511                        vec![arr],
512                        &dtype.0.to_physical(),
513                    )
514                    .into_column()
515                    .from_physical_unchecked(&dtype.0)
516                })
517                .collect::<PolarsResult<Vec<_>>>()?;
518            DataFrame::new(columns)
519        })
520    }
521}
522
523macro_rules! impl_set_with_mask {
524    ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
525        fn $name(
526            series: &Series,
527            filter: &PySeries,
528            value: Option<$native>,
529        ) -> PolarsResult<Series> {
530            let mask = filter.series.bool()?;
531            let ca = series.$cast()?;
532            let new = ca.set(mask, value)?;
533            Ok(new.into_series())
534        }
535
536        #[pymethods]
537        impl PySeries {
538            #[pyo3(signature = (filter, value))]
539            fn $name(
540                &self,
541                py: Python<'_>,
542                filter: &PySeries,
543                value: Option<$native>,
544            ) -> PyResult<Self> {
545                py.enter_polars_series(|| $name(&self.series, filter, value))
546            }
547        }
548    };
549}
550
551impl_set_with_mask!(set_with_mask_str, &str, str, String);
552impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
553impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
554impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
555impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
556impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
557impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
558impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
559impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
560impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
561impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
562impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
563
564macro_rules! impl_get {
565    ($name:ident, $series_variant:ident, $type:ty) => {
566        #[pymethods]
567        impl PySeries {
568            fn $name(&self, index: i64) -> Option<$type> {
569                if let Ok(ca) = self.series.$series_variant() {
570                    let index = if index < 0 {
571                        (ca.len() as i64 + index) as usize
572                    } else {
573                        index as usize
574                    };
575                    ca.get(index)
576                } else {
577                    None
578                }
579            }
580        }
581    };
582}
583
584impl_get!(get_f32, f32, f32);
585impl_get!(get_f64, f64, f64);
586impl_get!(get_u8, u8, u8);
587impl_get!(get_u16, u16, u16);
588impl_get!(get_u32, u32, u32);
589impl_get!(get_u64, u64, u64);
590impl_get!(get_i8, i8, i8);
591impl_get!(get_i16, i16, i16);
592impl_get!(get_i32, i32, i32);
593impl_get!(get_i64, i64, i64);
594impl_get!(get_str, str, &str);
595
596macro_rules! impl_get_phys {
597    ($name:ident, $series_variant:ident, $type:ty) => {
598        #[pymethods]
599        impl PySeries {
600            fn $name(&self, index: i64) -> Option<$type> {
601                if let Ok(ca) = self.series.$series_variant() {
602                    let index = if index < 0 {
603                        (ca.len() as i64 + index) as usize
604                    } else {
605                        index as usize
606                    };
607                    ca.physical().get(index)
608                } else {
609                    None
610                }
611            }
612        }
613    };
614}
615
616impl_get_phys!(get_date, date, i32);
617impl_get_phys!(get_datetime, datetime, i64);
618impl_get_phys!(get_duration, duration, i64);
619
620#[cfg(test)]
621mod test {
622    use super::*;
623    use crate::series::ToSeries;
624
625    #[test]
626    fn transmute_to_series() {
627        // NOTE: This is only possible because PySeries is #[repr(transparent)]
628        // https://doc.rust-lang.org/reference/type-layout.html
629        let ps = PySeries {
630            series: [1i32, 2, 3].iter().collect(),
631        };
632
633        let s = unsafe { std::mem::transmute::<PySeries, Series>(ps.clone()) };
634
635        assert_eq!(s.sum::<i32>().unwrap(), 6);
636        let collection = vec![ps];
637        let s = collection.to_series();
638        assert_eq!(
639            s.iter()
640                .map(|s| s.sum::<i32>().unwrap())
641                .collect::<Vec<_>>(),
642            vec![6]
643        );
644    }
645}