1use polars_core::chunked_array::cast::CastOptions;
2use polars_core::series::IsSorted;
3use polars_core::utils::flatten::flatten_series;
4use polars_row::RowEncodingOptions;
5use pyo3::exceptions::{PyIndexError, PyRuntimeError, PyValueError};
6use pyo3::prelude::*;
7use pyo3::types::PyBytes;
8use pyo3::{IntoPyObjectExt, Python};
9
10use self::row_encode::get_row_encoding_context;
11use super::PySeries;
12use crate::dataframe::PyDataFrame;
13use crate::error::PyPolarsErr;
14use crate::prelude::*;
15use crate::py_modules::polars;
16
17#[pymethods]
18impl PySeries {
19 fn struct_unnest(&self, py: Python) -> PyResult<PyDataFrame> {
20 let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
21 let df: DataFrame = py.allow_threads(|| ca.clone().unnest());
22 Ok(df.into())
23 }
24
25 fn struct_fields(&self) -> PyResult<Vec<&str>> {
26 let ca = self.series.struct_().map_err(PyPolarsErr::from)?;
27 Ok(ca
28 .struct_fields()
29 .iter()
30 .map(|s| s.name().as_str())
31 .collect())
32 }
33
34 fn is_sorted_ascending_flag(&self) -> bool {
35 matches!(self.series.is_sorted_flag(), IsSorted::Ascending)
36 }
37
38 fn is_sorted_descending_flag(&self) -> bool {
39 matches!(self.series.is_sorted_flag(), IsSorted::Descending)
40 }
41
42 fn can_fast_explode_flag(&self) -> bool {
43 match self.series.list() {
44 Err(_) => false,
45 Ok(list) => list._can_fast_explode(),
46 }
47 }
48
49 pub fn cat_uses_lexical_ordering(&self) -> PyResult<bool> {
50 let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
51 Ok(ca.uses_lexical_ordering())
52 }
53
54 pub fn cat_is_local(&self) -> PyResult<bool> {
55 let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
56 Ok(ca.get_rev_map().is_local())
57 }
58
59 pub fn cat_to_local(&self, py: Python) -> PyResult<Self> {
60 let ca = self.series.categorical().map_err(PyPolarsErr::from)?;
61 Ok(py.allow_threads(|| ca.to_local().into_series().into()))
62 }
63
64 fn estimated_size(&self) -> usize {
65 self.series.estimated_size()
66 }
67
68 #[cfg(feature = "object")]
69 fn get_object<'py>(&self, py: Python<'py>, index: usize) -> PyResult<Bound<'py, PyAny>> {
70 if matches!(self.series.dtype(), DataType::Object(_, _)) {
71 let obj: Option<&ObjectValue> = self.series.get_object(index).map(|any| any.into());
72 Ok(obj.into_pyobject(py)?)
73 } else {
74 Ok(py.None().into_bound(py))
75 }
76 }
77
78 #[cfg(feature = "dtype-array")]
79 fn reshape(&self, py: Python, dims: Vec<i64>) -> PyResult<Self> {
80 let dims = dims
81 .into_iter()
82 .map(ReshapeDimension::new)
83 .collect::<Vec<_>>();
84
85 let out = py
86 .allow_threads(|| self.series.reshape_array(&dims))
87 .map_err(PyPolarsErr::from)?;
88 Ok(out.into())
89 }
90
91 fn get_fmt(&self, index: usize, str_len_limit: usize) -> String {
93 let v = format!("{}", self.series.get(index).unwrap());
94 if let DataType::String | DataType::Categorical(_, _) | DataType::Enum(_, _) =
95 self.series.dtype()
96 {
97 let v_no_quotes = &v[1..v.len() - 1];
98 let v_trunc = &v_no_quotes[..v_no_quotes
99 .char_indices()
100 .take(str_len_limit)
101 .last()
102 .map(|(i, c)| i + c.len_utf8())
103 .unwrap_or(0)];
104 if v_no_quotes == v_trunc {
105 v
106 } else {
107 format!("\"{v_trunc}…")
108 }
109 } else {
110 v
111 }
112 }
113
114 pub fn rechunk(&mut self, py: Python, in_place: bool) -> Option<Self> {
115 let series = py.allow_threads(|| self.series.rechunk());
116 if in_place {
117 self.series = series;
118 None
119 } else {
120 Some(series.into())
121 }
122 }
123
124 fn get_index(&self, py: Python, index: usize) -> PyResult<PyObject> {
126 let av = match self.series.get(index) {
127 Ok(v) => v,
128 Err(PolarsError::OutOfBounds(err)) => {
129 return Err(PyIndexError::new_err(err.to_string()))
130 },
131 Err(e) => return Err(PyPolarsErr::from(e).into()),
132 };
133
134 match av {
135 AnyValue::List(s) | AnyValue::Array(s, _) => {
136 let pyseries = PySeries::new(s);
137 polars(py).getattr(py, "wrap_s")?.call1(py, (pyseries,))
138 },
139 _ => Wrap(av).into_py_any(py),
140 }
141 }
142
143 fn get_index_signed(&self, py: Python, index: isize) -> PyResult<PyObject> {
145 let index = if index < 0 {
146 match self.len().checked_sub(index.unsigned_abs()) {
147 Some(v) => v,
148 None => {
149 return Err(PyIndexError::new_err(
150 polars_err!(oob = index, self.len()).to_string(),
151 ));
152 },
153 }
154 } else {
155 usize::try_from(index).unwrap()
156 };
157 self.get_index(py, index)
158 }
159
160 fn bitand(&self, py: Python, other: &PySeries) -> PyResult<Self> {
161 let out = py
162 .allow_threads(|| &self.series & &other.series)
163 .map_err(PyPolarsErr::from)?;
164 Ok(out.into())
165 }
166
167 fn bitor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
168 let out = py
169 .allow_threads(|| &self.series | &other.series)
170 .map_err(PyPolarsErr::from)?;
171 Ok(out.into())
172 }
173 fn bitxor(&self, py: Python, other: &PySeries) -> PyResult<Self> {
174 let out = py
175 .allow_threads(|| &self.series ^ &other.series)
176 .map_err(PyPolarsErr::from)?;
177 Ok(out.into())
178 }
179
180 fn chunk_lengths(&self) -> Vec<usize> {
181 self.series.chunk_lengths().collect()
182 }
183
184 pub fn name(&self) -> &str {
185 self.series.name().as_str()
186 }
187
188 fn rename(&mut self, name: &str) {
189 self.series.rename(name.into());
190 }
191
192 fn dtype<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
193 Wrap(self.series.dtype().clone()).into_pyobject(py)
194 }
195
196 fn set_sorted_flag(&self, descending: bool) -> Self {
197 let mut out = self.series.clone();
198 if descending {
199 out.set_sorted_flag(IsSorted::Descending);
200 } else {
201 out.set_sorted_flag(IsSorted::Ascending)
202 }
203 out.into()
204 }
205
206 fn n_chunks(&self) -> usize {
207 self.series.n_chunks()
208 }
209
210 fn append(&mut self, other: &PySeries) -> PyResult<()> {
211 self.series
212 .append(&other.series)
213 .map_err(PyPolarsErr::from)?;
214 Ok(())
215 }
216
217 fn extend(&mut self, py: Python, other: &PySeries) -> PyResult<()> {
218 py.allow_threads(|| self.series.extend(&other.series))
219 .map_err(PyPolarsErr::from)?;
220 Ok(())
221 }
222
223 fn new_from_index(&self, py: Python, index: usize, length: usize) -> PyResult<Self> {
224 if index >= self.series.len() {
225 Err(PyValueError::new_err("index is out of bounds"))
226 } else {
227 Ok(py.allow_threads(|| self.series.new_from_index(index, length).into()))
228 }
229 }
230
231 fn filter(&self, py: Python, filter: &PySeries) -> PyResult<Self> {
232 let filter_series = &filter.series;
233 if let Ok(ca) = filter_series.bool() {
234 let series = py
235 .allow_threads(|| self.series.filter(ca))
236 .map_err(PyPolarsErr::from)?;
237 Ok(PySeries { series })
238 } else {
239 Err(PyRuntimeError::new_err("Expected a boolean mask"))
240 }
241 }
242
243 fn sort(
244 &mut self,
245 py: Python,
246 descending: bool,
247 nulls_last: bool,
248 multithreaded: bool,
249 ) -> PyResult<Self> {
250 Ok(py
251 .allow_threads(|| {
252 self.series.sort(
253 SortOptions::default()
254 .with_order_descending(descending)
255 .with_nulls_last(nulls_last)
256 .with_multithreaded(multithreaded),
257 )
258 })
259 .map_err(PyPolarsErr::from)?
260 .into())
261 }
262
263 fn gather_with_series(&self, py: Python, indices: &PySeries) -> PyResult<Self> {
264 py.allow_threads(|| {
265 let indices = indices.series.idx().map_err(PyPolarsErr::from)?;
266 let s = self.series.take(indices).map_err(PyPolarsErr::from)?;
267 Ok(s.into())
268 })
269 }
270
271 fn null_count(&self) -> PyResult<usize> {
272 Ok(self.series.null_count())
273 }
274
275 fn has_nulls(&self) -> bool {
276 self.series.has_nulls()
277 }
278
279 fn equals(
280 &self,
281 py: Python,
282 other: &PySeries,
283 check_dtypes: bool,
284 check_names: bool,
285 null_equal: bool,
286 ) -> bool {
287 if check_dtypes && (self.series.dtype() != other.series.dtype()) {
288 return false;
289 }
290 if check_names && (self.series.name() != other.series.name()) {
291 return false;
292 }
293 if null_equal {
294 py.allow_threads(|| self.series.equals_missing(&other.series))
295 } else {
296 py.allow_threads(|| self.series.equals(&other.series))
297 }
298 }
299
300 fn as_str(&self) -> PyResult<String> {
301 Ok(format!("{:?}", self.series))
302 }
303
304 #[allow(clippy::len_without_is_empty)]
305 pub fn len(&self) -> usize {
306 self.series.len()
307 }
308
309 fn as_single_ptr(&mut self, py: Python) -> PyResult<usize> {
312 let ptr = py
313 .allow_threads(|| self.series.as_single_ptr())
314 .map_err(PyPolarsErr::from)?;
315 Ok(ptr)
316 }
317
318 fn clone(&self) -> Self {
319 self.series.clone().into()
320 }
321
322 fn zip_with(&self, py: Python, mask: &PySeries, other: &PySeries) -> PyResult<Self> {
323 let mask = mask.series.bool().map_err(PyPolarsErr::from)?;
324 let s = py
325 .allow_threads(|| self.series.zip_with(mask, &other.series))
326 .map_err(PyPolarsErr::from)?;
327 Ok(s.into())
328 }
329
330 #[pyo3(signature = (separator, drop_first=false))]
331 fn to_dummies(
332 &self,
333 py: Python,
334 separator: Option<&str>,
335 drop_first: bool,
336 ) -> PyResult<PyDataFrame> {
337 let df = py
338 .allow_threads(|| self.series.to_dummies(separator, drop_first))
339 .map_err(PyPolarsErr::from)?;
340 Ok(df.into())
341 }
342
343 fn get_list(&self, index: usize) -> Option<Self> {
344 let ca = self.series.list().ok()?;
345 Some(ca.get_as_series(index)?.into())
346 }
347
348 fn n_unique(&self, py: Python) -> PyResult<usize> {
349 let n = py
350 .allow_threads(|| self.series.n_unique())
351 .map_err(PyPolarsErr::from)?;
352 Ok(n)
353 }
354
355 fn floor(&self, py: Python) -> PyResult<Self> {
356 let s = py
357 .allow_threads(|| self.series.floor())
358 .map_err(PyPolarsErr::from)?;
359 Ok(s.into())
360 }
361
362 fn shrink_to_fit(&mut self, py: Python) {
363 py.allow_threads(|| self.series.shrink_to_fit());
364 }
365
366 fn dot<'py>(&self, other: &PySeries, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
367 let lhs_dtype = self.series.dtype();
368 let rhs_dtype = other.series.dtype();
369
370 if !lhs_dtype.is_primitive_numeric() {
371 return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
372 };
373 if !rhs_dtype.is_primitive_numeric() {
374 return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
375 }
376
377 let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
378 py.allow_threads(|| (&self.series * &other.series)?.sum::<f64>())
379 .map_err(PyPolarsErr::from)?
380 .into()
381 } else {
382 py.allow_threads(|| (&self.series * &other.series)?.sum::<i64>())
383 .map_err(PyPolarsErr::from)?
384 .into()
385 };
386
387 Wrap(result).into_pyobject(py)
388 }
389
390 fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
391 Ok(PyBytes::new(
393 py,
394 &py.allow_threads(|| self.series.serialize_to_bytes().map_err(PyPolarsErr::from))?,
395 ))
396 }
397
398 fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
399 use pyo3::pybacked::PyBackedBytes;
402 match state.extract::<PyBackedBytes>(py) {
403 Ok(s) => py.allow_threads(|| {
404 let s = Series::deserialize_from_reader(&mut &*s).map_err(PyPolarsErr::from)?;
405 self.series = s;
406 Ok(())
407 }),
408 Err(e) => Err(e),
409 }
410 }
411
412 fn skew(&self, py: Python, bias: bool) -> PyResult<Option<f64>> {
413 let out = py
414 .allow_threads(|| self.series.skew(bias))
415 .map_err(PyPolarsErr::from)?;
416 Ok(out)
417 }
418
419 fn kurtosis(&self, py: Python, fisher: bool, bias: bool) -> PyResult<Option<f64>> {
420 let out = py
421 .allow_threads(|| self.series.kurtosis(fisher, bias))
422 .map_err(PyPolarsErr::from)?;
423 Ok(out)
424 }
425
426 fn cast(
427 &self,
428 py: Python,
429 dtype: Wrap<DataType>,
430 strict: bool,
431 wrap_numerical: bool,
432 ) -> PyResult<Self> {
433 let options = if wrap_numerical {
434 CastOptions::Overflowing
435 } else if strict {
436 CastOptions::Strict
437 } else {
438 CastOptions::NonStrict
439 };
440
441 let dtype = dtype.0;
442 let out = py.allow_threads(|| self.series.cast_with_options(&dtype, options));
443 let out = out.map_err(PyPolarsErr::from)?;
444 Ok(out.into())
445 }
446
447 fn get_chunks(&self) -> PyResult<Vec<PyObject>> {
448 Python::with_gil(|py| {
449 let wrap_s = py_modules::polars(py).getattr(py, "wrap_s").unwrap();
450 flatten_series(&self.series)
451 .into_iter()
452 .map(|s| wrap_s.call1(py, (Self::new(s),)))
453 .collect()
454 })
455 }
456
457 fn is_sorted(&self, py: Python, descending: bool, nulls_last: bool) -> PyResult<bool> {
458 let options = SortOptions {
459 descending,
460 nulls_last,
461 multithreaded: true,
462 maintain_order: false,
463 limit: None,
464 };
465 Ok(py
466 .allow_threads(|| self.series.is_sorted(options))
467 .map_err(PyPolarsErr::from)?)
468 }
469
470 fn clear(&self) -> Self {
471 self.series.clear().into()
472 }
473
474 fn head(&self, py: Python, n: usize) -> Self {
475 py.allow_threads(|| self.series.head(Some(n))).into()
476 }
477
478 fn tail(&self, py: Python, n: usize) -> Self {
479 py.allow_threads(|| self.series.tail(Some(n))).into()
480 }
481
482 fn value_counts(
483 &self,
484 py: Python,
485 sort: bool,
486 parallel: bool,
487 name: String,
488 normalize: bool,
489 ) -> PyResult<PyDataFrame> {
490 let out = py
491 .allow_threads(|| {
492 self.series
493 .value_counts(sort, parallel, name.into(), normalize)
494 })
495 .map_err(PyPolarsErr::from)?;
496 Ok(out.into())
497 }
498
499 #[pyo3(signature = (offset, length=None))]
500 fn slice(&self, offset: i64, length: Option<usize>) -> Self {
501 let length = length.unwrap_or_else(|| self.series.len());
502 self.series.slice(offset, length).into()
503 }
504
505 pub fn not_(&self, py: Python) -> PyResult<Self> {
506 let out = py
507 .allow_threads(|| polars_ops::series::negate_bitwise(&self.series))
508 .map_err(PyPolarsErr::from)?;
509 Ok(out.into())
510 }
511
512 #[pyo3(signature = (dtypes, opts))]
514 fn _row_decode<'py>(
515 &'py self,
516 py: Python<'py>,
517 dtypes: Vec<(String, Wrap<DataType>)>,
518 opts: Vec<(bool, bool, bool)>,
519 ) -> PyResult<PyDataFrame> {
520 py.allow_threads(|| {
521 assert_eq!(dtypes.len(), opts.len());
522
523 let opts = opts
524 .into_iter()
525 .map(|(descending, nulls_last, no_order)| {
526 let mut opt = RowEncodingOptions::default();
527
528 opt.set(RowEncodingOptions::DESCENDING, descending);
529 opt.set(RowEncodingOptions::NULLS_LAST, nulls_last);
530 opt.set(RowEncodingOptions::NO_ORDER, no_order);
531
532 opt
533 })
534 .collect::<Vec<_>>();
535
536 let arrow_dtypes = dtypes
538 .iter()
539 .map(|(_, dtype)| dtype.0.to_physical().to_arrow(CompatLevel::newest()))
540 .collect::<Vec<_>>();
541
542 let dicts = dtypes
543 .iter()
544 .map(|(_, dtype)| get_row_encoding_context(&dtype.0))
545 .collect::<Vec<_>>();
546
547 let arr = self.series.rechunk();
549 let arr = arr.binary_offset().map_err(PyPolarsErr::from)?;
550 assert_eq!(arr.chunks().len(), 1);
551 let mut values = arr
552 .downcast_iter()
553 .next()
554 .unwrap()
555 .values_iter()
556 .collect::<Vec<&[u8]>>();
557
558 let columns = PyResult::Ok(unsafe {
559 polars_row::decode::decode_rows(&mut values, &opts, &dicts, &arrow_dtypes)
560 })?;
561
562 let columns = columns
564 .into_iter()
565 .zip(dtypes)
566 .map(|(arr, (name, dtype))| unsafe {
567 Series::from_chunks_and_dtype_unchecked(
568 PlSmallStr::from(name),
569 vec![arr],
570 &dtype.0.to_physical(),
571 )
572 .into_column()
573 .from_physical_unchecked(&dtype.0)
574 })
575 .collect::<PolarsResult<Vec<_>>>()
576 .map_err(PyPolarsErr::from)?;
577 Ok(DataFrame::new(columns).map_err(PyPolarsErr::from)?.into())
578 })
579 }
580}
581
582macro_rules! impl_set_with_mask {
583 ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
584 fn $name(
585 series: &Series,
586 filter: &PySeries,
587 value: Option<$native>,
588 ) -> PolarsResult<Series> {
589 let mask = filter.series.bool()?;
590 let ca = series.$cast()?;
591 let new = ca.set(mask, value)?;
592 Ok(new.into_series())
593 }
594
595 #[pymethods]
596 impl PySeries {
597 #[pyo3(signature = (filter, value))]
598 fn $name(
599 &self,
600 py: Python,
601 filter: &PySeries,
602 value: Option<$native>,
603 ) -> PyResult<Self> {
604 let series = py
605 .allow_threads(|| $name(&self.series, filter, value))
606 .map_err(PyPolarsErr::from)?;
607 Ok(Self::new(series))
608 }
609 }
610 };
611}
612
613impl_set_with_mask!(set_with_mask_str, &str, str, String);
614impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
615impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
616impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
617impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
618impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
619impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
620impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
621impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
622impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
623impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
624impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
625
626macro_rules! impl_get {
627 ($name:ident, $series_variant:ident, $type:ty) => {
628 #[pymethods]
629 impl PySeries {
630 fn $name(&self, index: i64) -> Option<$type> {
631 if let Ok(ca) = self.series.$series_variant() {
632 let index = if index < 0 {
633 (ca.len() as i64 + index) as usize
634 } else {
635 index as usize
636 };
637 ca.get(index)
638 } else {
639 None
640 }
641 }
642 }
643 };
644}
645
646impl_get!(get_f32, f32, f32);
647impl_get!(get_f64, f64, f64);
648impl_get!(get_u8, u8, u8);
649impl_get!(get_u16, u16, u16);
650impl_get!(get_u32, u32, u32);
651impl_get!(get_u64, u64, u64);
652impl_get!(get_i8, i8, i8);
653impl_get!(get_i16, i16, i16);
654impl_get!(get_i32, i32, i32);
655impl_get!(get_i64, i64, i64);
656impl_get!(get_str, str, &str);
657impl_get!(get_date, date, i32);
658impl_get!(get_datetime, datetime, i64);
659impl_get!(get_duration, duration, i64);
660
661#[cfg(test)]
662mod test {
663 use super::*;
664 use crate::series::ToSeries;
665
666 #[test]
667 fn transmute_to_series() {
668 let ps = PySeries {
671 series: [1i32, 2, 3].iter().collect(),
672 };
673
674 let s = unsafe { std::mem::transmute::<PySeries, Series>(ps.clone()) };
675
676 assert_eq!(s.sum::<i32>().unwrap(), 6);
677 let collection = vec![ps];
678 let s = collection.to_series();
679 assert_eq!(
680 s.iter()
681 .map(|s| s.sum::<i32>().unwrap())
682 .collect::<Vec<_>>(),
683 vec![6]
684 );
685 }
686}