1#![allow(ambiguous_glob_reexports)]
2#[cfg(feature = "dtype-categorical")]
4pub mod cat;
5
6#[cfg(feature = "dtype-categorical")]
7pub use cat::*;
8#[cfg(feature = "rolling_window_by")]
9pub(crate) use polars_time::prelude::*;
10
11mod arithmetic;
12mod arity;
13#[cfg(feature = "dtype-array")]
14mod array;
15pub mod binary;
16#[cfg(feature = "bitwise")]
17mod bitwise;
18mod builder_dsl;
19pub use builder_dsl::*;
20#[cfg(feature = "temporal")]
21pub mod dt;
22mod expr;
23mod expr_dyn_fn;
24mod format;
25mod from;
26pub mod function_expr;
27pub mod functions;
28mod list;
29#[cfg(feature = "meta")]
30mod meta;
31mod name;
32mod options;
33#[cfg(feature = "python")]
34pub mod python_dsl;
35#[cfg(feature = "random")]
36mod random;
37mod scan_sources;
38mod selector;
39mod statistics;
40#[cfg(feature = "strings")]
41pub mod string;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44pub mod udf;
45
46use std::fmt::Debug;
47use std::sync::Arc;
48
49mod plan;
50pub use arity::*;
51#[cfg(feature = "dtype-array")]
52pub use array::*;
53pub use expr::*;
54pub use function_expr::schema::FieldsMapper;
55pub use function_expr::*;
56pub use functions::*;
57pub use list::*;
58#[cfg(feature = "meta")]
59pub use meta::*;
60pub use name::*;
61pub use options::*;
62pub use plan::*;
63use polars_compute::rolling::QuantileMethod;
64use polars_core::chunked_array::cast::CastOptions;
65use polars_core::error::feature_gated;
66use polars_core::prelude::*;
67use polars_core::series::IsSorted;
68#[cfg(feature = "diff")]
69use polars_core::series::ops::NullBehavior;
70#[cfg(any(feature = "search_sorted", feature = "is_between"))]
71use polars_core::utils::SuperTypeFlags;
72use polars_core::utils::{SuperTypeOptions, try_get_supertype};
73pub use selector::Selector;
74#[cfg(feature = "dtype-struct")]
75pub use struct_::*;
76pub use udf::UserDefinedFunction;
77mod file_scan;
78pub use file_scan::*;
79pub use scan_sources::{ScanSource, ScanSourceIter, ScanSourceRef, ScanSources};
80
81pub use crate::plans::lit;
82use crate::prelude::*;
83
84impl Expr {
85 pub(crate) fn with_function_options<F>(self, func: F) -> Expr
87 where
88 F: Fn(FunctionOptions) -> FunctionOptions,
89 {
90 match self {
91 Self::AnonymousFunction {
92 input,
93 function,
94 output_type,
95 mut options,
96 } => {
97 options = func(options);
98 Self::AnonymousFunction {
99 input,
100 function,
101 output_type,
102 options,
103 }
104 },
105 Self::Function {
106 input,
107 function,
108 mut options,
109 } => {
110 options = func(options);
111 Self::Function {
112 input,
113 function,
114 options,
115 }
116 },
117 _ => {
118 panic!("implementation error")
119 },
120 }
121 }
122
123 #[doc(hidden)]
126 pub fn with_fmt(self, name: &'static str) -> Expr {
127 self.with_function_options(|mut options| {
128 options.fmt_str = name;
129 options
130 })
131 }
132
133 pub fn eq<E: Into<Expr>>(self, other: E) -> Expr {
135 binary_expr(self, Operator::Eq, other.into())
136 }
137
138 pub fn eq_missing<E: Into<Expr>>(self, other: E) -> Expr {
140 binary_expr(self, Operator::EqValidity, other.into())
141 }
142
143 pub fn neq<E: Into<Expr>>(self, other: E) -> Expr {
145 binary_expr(self, Operator::NotEq, other.into())
146 }
147
148 pub fn neq_missing<E: Into<Expr>>(self, other: E) -> Expr {
150 binary_expr(self, Operator::NotEqValidity, other.into())
151 }
152
153 pub fn lt<E: Into<Expr>>(self, other: E) -> Expr {
155 binary_expr(self, Operator::Lt, other.into())
156 }
157
158 pub fn gt<E: Into<Expr>>(self, other: E) -> Expr {
160 binary_expr(self, Operator::Gt, other.into())
161 }
162
163 pub fn gt_eq<E: Into<Expr>>(self, other: E) -> Expr {
165 binary_expr(self, Operator::GtEq, other.into())
166 }
167
168 pub fn lt_eq<E: Into<Expr>>(self, other: E) -> Expr {
170 binary_expr(self, Operator::LtEq, other.into())
171 }
172
173 #[allow(clippy::should_implement_trait)]
175 pub fn not(self) -> Expr {
176 self.map_unary(BooleanFunction::Not)
177 }
178
179 pub fn alias<S>(self, name: S) -> Expr
181 where
182 S: Into<PlSmallStr>,
183 {
184 Expr::Alias(Arc::new(self), name.into())
185 }
186
187 #[allow(clippy::wrong_self_convention)]
189 pub fn is_null(self) -> Self {
190 self.map_unary(BooleanFunction::IsNull)
191 }
192
193 #[allow(clippy::wrong_self_convention)]
195 pub fn is_not_null(self) -> Self {
196 self.map_unary(BooleanFunction::IsNotNull)
197 }
198
199 pub fn drop_nulls(self) -> Self {
201 self.map_unary(FunctionExpr::DropNulls)
202 }
203
204 pub fn drop_nans(self) -> Self {
206 self.map_unary(FunctionExpr::DropNans)
207 }
208
209 pub fn n_unique(self) -> Self {
211 AggExpr::NUnique(Arc::new(self)).into()
212 }
213
214 pub fn first(self) -> Self {
216 AggExpr::First(Arc::new(self)).into()
217 }
218
219 pub fn last(self) -> Self {
221 AggExpr::Last(Arc::new(self)).into()
222 }
223
224 pub fn implode(self) -> Self {
226 AggExpr::Implode(Arc::new(self)).into()
227 }
228
229 pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self {
231 AggExpr::Quantile {
232 expr: Arc::new(self),
233 quantile: Arc::new(quantile),
234 method,
235 }
236 .into()
237 }
238
239 pub fn agg_groups(self) -> Self {
241 AggExpr::AggGroups(Arc::new(self)).into()
242 }
243
244 pub fn flatten(self) -> Self {
246 self.explode()
247 }
248
249 pub fn explode(self) -> Self {
251 Expr::Explode {
252 input: Arc::new(self),
253 skip_empty: false,
254 }
255 }
256
257 pub fn slice<E: Into<Expr>, F: Into<Expr>>(self, offset: E, length: F) -> Self {
260 Expr::Slice {
261 input: Arc::new(self),
262 offset: Arc::new(offset.into()),
263 length: Arc::new(length.into()),
264 }
265 }
266
267 pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
269 let output_type = if upcast {
270 GetOutput::super_type()
271 } else {
272 GetOutput::same_type()
273 };
274
275 apply_binary(
276 self,
277 other.into(),
278 move |mut a, mut b| {
279 if upcast {
280 let dtype = try_get_supertype(a.dtype(), b.dtype())?;
281 a = a.cast(&dtype)?;
282 b = b.cast(&dtype)?;
283 }
284 a.append(&b)?;
285 Ok(Some(a))
286 },
287 output_type,
288 )
289 }
290
291 pub fn head(self, length: Option<usize>) -> Self {
293 self.slice(lit(0), lit(length.unwrap_or(10) as u64))
294 }
295
296 pub fn tail(self, length: Option<usize>) -> Self {
298 let len = length.unwrap_or(10);
299 self.slice(lit(-(len as i64)), lit(len as u64))
300 }
301
302 pub fn unique(self) -> Self {
304 self.map_unary(FunctionExpr::Unique(false))
305 }
306
307 pub fn unique_stable(self) -> Self {
310 self.map_unary(FunctionExpr::Unique(true))
311 }
312
313 pub fn arg_unique(self) -> Self {
315 self.map_unary(FunctionExpr::ArgUnique)
316 }
317
318 pub fn arg_min(self) -> Self {
320 let options = FunctionOptions::aggregation().with_fmt_str("arg_min");
321 self.function_with_options(
322 move |c: Column| {
323 Ok(Some(Column::new(
324 c.name().clone(),
325 &[c.as_materialized_series().arg_min().map(|idx| idx as u32)],
326 )))
327 },
328 GetOutput::from_type(IDX_DTYPE),
329 options,
330 )
331 }
332
333 pub fn arg_max(self) -> Self {
335 let options = FunctionOptions::aggregation().with_fmt_str("arg_max");
336 self.function_with_options(
337 move |c: Column| {
338 Ok(Some(Column::new(
339 c.name().clone(),
340 &[c.as_materialized_series()
341 .arg_max()
342 .map(|idx| idx as IdxSize)],
343 )))
344 },
345 GetOutput::from_type(IDX_DTYPE),
346 options,
347 )
348 }
349
350 pub fn arg_sort(self, sort_options: SortOptions) -> Self {
352 let options = FunctionOptions::groupwise().with_fmt_str("arg_sort");
353 self.function_with_options(
354 move |c: Column| {
355 Ok(Some(
356 c.as_materialized_series()
357 .arg_sort(sort_options)
358 .into_column(),
359 ))
360 },
361 GetOutput::from_type(IDX_DTYPE),
362 options,
363 )
364 }
365
366 #[cfg(feature = "index_of")]
367 pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
369 self.map_binary(FunctionExpr::IndexOf, element.into())
370 }
371
372 #[cfg(feature = "search_sorted")]
373 pub fn search_sorted<E: Into<Expr>>(self, element: E, side: SearchSortedSide) -> Expr {
375 self.map_binary(FunctionExpr::SearchSorted(side), element.into())
376 }
377
378 pub fn strict_cast(self, dtype: DataType) -> Self {
382 Expr::Cast {
383 expr: Arc::new(self),
384 dtype,
385 options: CastOptions::Strict,
386 }
387 }
388
389 pub fn cast(self, dtype: DataType) -> Self {
391 Expr::Cast {
392 expr: Arc::new(self),
393 dtype,
394 options: CastOptions::NonStrict,
395 }
396 }
397
398 pub fn cast_with_options(self, dtype: DataType, cast_options: CastOptions) -> Self {
400 Expr::Cast {
401 expr: Arc::new(self),
402 dtype,
403 options: cast_options,
404 }
405 }
406
407 pub fn gather<E: Into<Expr>>(self, idx: E) -> Self {
409 Expr::Gather {
410 expr: Arc::new(self),
411 idx: Arc::new(idx.into()),
412 returns_scalar: false,
413 }
414 }
415
416 pub fn get<E: Into<Expr>>(self, idx: E) -> Self {
418 Expr::Gather {
419 expr: Arc::new(self),
420 idx: Arc::new(idx.into()),
421 returns_scalar: true,
422 }
423 }
424
425 pub fn sort(self, options: SortOptions) -> Self {
455 Expr::Sort {
456 expr: Arc::new(self),
457 options,
458 }
459 }
460
461 #[cfg(feature = "top_k")]
465 pub fn top_k(self, k: Expr) -> Self {
466 self.map_binary(FunctionExpr::TopK { descending: false }, k)
467 }
468
469 #[cfg(feature = "top_k")]
473 pub fn top_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
474 self,
475 k: K,
476 by: E,
477 descending: Vec<bool>,
478 ) -> Self {
479 self.map_n_ary(
480 FunctionExpr::TopKBy { descending },
481 [k.into()]
482 .into_iter()
483 .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
484 )
485 }
486
487 #[cfg(feature = "top_k")]
491 pub fn bottom_k(self, k: Expr) -> Self {
492 self.map_binary(FunctionExpr::TopK { descending: true }, k)
493 }
494
495 #[cfg(feature = "top_k")]
500 pub fn bottom_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
501 self,
502 k: K,
503 by: E,
504 descending: Vec<bool>,
505 ) -> Self {
506 let descending = descending.into_iter().map(|x| !x).collect();
507 self.map_n_ary(
508 FunctionExpr::TopKBy { descending },
509 [k.into()]
510 .into_iter()
511 .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
512 )
513 }
514
515 pub fn reverse(self) -> Self {
517 self.map_unary(FunctionExpr::Reverse)
518 }
519
520 pub fn map<F>(self, function: F, output_type: GetOutput) -> Self
530 where
531 F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
532 {
533 let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
534
535 Expr::AnonymousFunction {
536 input: vec![self],
537 function: new_column_udf(f),
538 output_type,
539 options: FunctionOptions::elementwise()
540 .with_fmt_str("map")
541 .with_flags(|f| f | FunctionFlags::OPTIONAL_RE_ENTRANT),
542 }
543 }
544
545 pub fn map_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
549 where
550 F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
551 {
552 let mut input = vec![self];
553 input.extend_from_slice(arguments);
554
555 Expr::AnonymousFunction {
556 input,
557 function: new_column_udf(function),
558 output_type,
559 options: FunctionOptions::elementwise().with_fmt_str(""),
560 }
561 }
562
563 pub fn function_with_options<F>(
565 self,
566 function: F,
567 output_type: GetOutput,
568 options: FunctionOptions,
569 ) -> Self
570 where
571 F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
572 {
573 let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
574
575 Expr::AnonymousFunction {
576 input: vec![self],
577 function: new_column_udf(f),
578 output_type,
579 options,
580 }
581 }
582
583 pub fn apply<F>(self, function: F, output_type: GetOutput) -> Self
593 where
594 F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
595 {
596 let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
597
598 Expr::AnonymousFunction {
599 input: vec![self],
600 function: new_column_udf(f),
601 output_type,
602 options: FunctionOptions::groupwise().with_fmt_str(""),
603 }
604 }
605
606 pub fn apply_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
610 where
611 F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
612 {
613 let mut input = vec![self];
614 input.extend_from_slice(arguments);
615
616 Expr::AnonymousFunction {
617 input,
618 function: new_column_udf(function),
619 output_type,
620 options: FunctionOptions::groupwise().with_fmt_str(""),
621 }
622 }
623
624 #[allow(clippy::wrong_self_convention)]
626 pub fn is_finite(self) -> Self {
627 self.map_unary(BooleanFunction::IsFinite)
628 }
629
630 #[allow(clippy::wrong_self_convention)]
632 pub fn is_infinite(self) -> Self {
633 self.map_unary(BooleanFunction::IsInfinite)
634 }
635
636 pub fn is_nan(self) -> Self {
638 self.map_unary(BooleanFunction::IsNan)
639 }
640
641 pub fn is_not_nan(self) -> Self {
643 self.map_unary(BooleanFunction::IsNotNan)
644 }
645
646 pub fn shift(self, n: Expr) -> Self {
648 self.map_binary(FunctionExpr::Shift, n)
649 }
650
651 pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(self, n: E, fill_value: IE) -> Self {
653 self.map_ternary(FunctionExpr::ShiftAndFill, n.into(), fill_value.into())
654 }
655
656 #[cfg(feature = "cum_agg")]
658 pub fn cum_count(self, reverse: bool) -> Self {
659 self.map_unary(FunctionExpr::CumCount { reverse })
660 }
661
662 #[cfg(feature = "cum_agg")]
664 pub fn cum_sum(self, reverse: bool) -> Self {
665 self.map_unary(FunctionExpr::CumSum { reverse })
666 }
667
668 #[cfg(feature = "cum_agg")]
670 pub fn cum_prod(self, reverse: bool) -> Self {
671 self.map_unary(FunctionExpr::CumProd { reverse })
672 }
673
674 #[cfg(feature = "cum_agg")]
676 pub fn cum_min(self, reverse: bool) -> Self {
677 self.map_unary(FunctionExpr::CumMin { reverse })
678 }
679
680 #[cfg(feature = "cum_agg")]
682 pub fn cum_max(self, reverse: bool) -> Self {
683 self.map_unary(FunctionExpr::CumMax { reverse })
684 }
685
686 pub fn product(self) -> Self {
688 let options = FunctionOptions::aggregation().with_fmt_str("product");
689 self.function_with_options(
690 move |c: Column| {
691 Some(
692 c.product()
693 .map(|sc| sc.into_series(c.name().clone()).into_column()),
694 )
695 .transpose()
696 },
697 GetOutput::map_dtype(|dt| {
698 use DataType as T;
699 Ok(match dt {
700 T::Float32 => T::Float32,
701 T::Float64 => T::Float64,
702 T::UInt64 => T::UInt64,
703 #[cfg(feature = "dtype-i128")]
704 T::Int128 => T::Int128,
705 _ => T::Int64,
706 })
707 }),
708 options,
709 )
710 }
711
712 #[cfg(feature = "round_series")]
714 pub fn round(self, decimals: u32, mode: RoundMode) -> Self {
715 self.map_unary(FunctionExpr::Round { decimals, mode })
716 }
717
718 #[cfg(feature = "round_series")]
720 pub fn round_sig_figs(self, digits: i32) -> Self {
721 self.map_unary(FunctionExpr::RoundSF { digits })
722 }
723
724 #[cfg(feature = "round_series")]
726 pub fn floor(self) -> Self {
727 self.map_unary(FunctionExpr::Floor)
728 }
729
730 #[cfg(feature = "round_series")]
732 pub fn pi() -> Self {
733 lit(std::f64::consts::PI)
734 }
735
736 #[cfg(feature = "round_series")]
738 pub fn ceil(self) -> Self {
739 self.map_unary(FunctionExpr::Ceil)
740 }
741
742 #[cfg(feature = "round_series")]
744 pub fn clip(self, min: Expr, max: Expr) -> Self {
745 self.map_ternary(
746 FunctionExpr::Clip {
747 has_min: true,
748 has_max: true,
749 },
750 min,
751 max,
752 )
753 }
754
755 #[cfg(feature = "round_series")]
757 pub fn clip_max(self, max: Expr) -> Self {
758 self.map_binary(
759 FunctionExpr::Clip {
760 has_min: false,
761 has_max: true,
762 },
763 max,
764 )
765 }
766
767 #[cfg(feature = "round_series")]
769 pub fn clip_min(self, min: Expr) -> Self {
770 self.map_binary(
771 FunctionExpr::Clip {
772 has_min: true,
773 has_max: false,
774 },
775 min,
776 )
777 }
778
779 #[cfg(feature = "abs")]
781 pub fn abs(self) -> Self {
782 self.map_unary(FunctionExpr::Abs)
783 }
784
785 pub fn over<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(self, partition_by: E) -> Self {
836 self.over_with_options(partition_by, None, Default::default())
837 }
838
839 pub fn over_with_options<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
840 self,
841 partition_by: E,
842 order_by: Option<(E, SortOptions)>,
843 options: WindowMapping,
844 ) -> Self {
845 let partition_by = partition_by
846 .as_ref()
847 .iter()
848 .map(|e| e.clone().into())
849 .collect();
850
851 let order_by = order_by.map(|(e, options)| {
852 let e = e.as_ref();
853 let e = if e.len() == 1 {
854 Arc::new(e[0].clone().into())
855 } else {
856 feature_gated!["dtype-struct", {
857 let e = e.iter().map(|e| e.clone().into()).collect::<Vec<_>>();
858 Arc::new(as_struct(e))
859 }]
860 };
861 (e, options)
862 });
863
864 Expr::Window {
865 function: Arc::new(self),
866 partition_by,
867 order_by,
868 options: options.into(),
869 }
870 }
871
872 #[cfg(feature = "dynamic_group_by")]
873 pub fn rolling(self, options: RollingGroupOptions) -> Self {
874 let index_col = col(options.index_column.clone());
877 Expr::Window {
878 function: Arc::new(self),
879 partition_by: vec![index_col],
880 order_by: None,
881 options: WindowType::Rolling(options),
882 }
883 }
884
885 fn fill_null_impl(self, fill_value: Expr) -> Self {
886 self.map_binary(FunctionExpr::FillNull, fill_value)
887 }
888
889 pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> Self {
891 self.fill_null_impl(fill_value.into())
892 }
893
894 pub fn fill_null_with_strategy(self, strategy: FillNullStrategy) -> Self {
895 self.map_unary(FunctionExpr::FillNullWithStrategy(strategy))
896 }
897
898 pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> Self {
900 when(self.clone().is_not_nan().or(self.clone().is_null()))
904 .then(self)
905 .otherwise(fill_value.into())
906 }
907 pub fn count(self) -> Self {
911 AggExpr::Count(Arc::new(self), false).into()
912 }
913
914 pub fn len(self) -> Self {
915 AggExpr::Count(Arc::new(self), true).into()
916 }
917
918 #[allow(clippy::wrong_self_convention)]
920 #[cfg(feature = "is_unique")]
921 pub fn is_duplicated(self) -> Self {
922 self.map_unary(BooleanFunction::IsDuplicated)
923 }
924
925 #[allow(clippy::wrong_self_convention)]
926 #[cfg(feature = "is_between")]
927 pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
928 self.map_ternary(
929 BooleanFunction::IsBetween { closed },
930 lower.into(),
931 upper.into(),
932 )
933 }
934
935 #[allow(clippy::wrong_self_convention)]
937 #[cfg(feature = "is_unique")]
938 pub fn is_unique(self) -> Self {
939 self.map_unary(BooleanFunction::IsUnique)
940 }
941
942 #[cfg(feature = "approx_unique")]
944 pub fn approx_n_unique(self) -> Self {
945 self.map_unary(FunctionExpr::ApproxNUnique)
946 }
947
948 pub fn and<E: Into<Expr>>(self, expr: E) -> Self {
950 binary_expr(self, Operator::And, expr.into())
951 }
952
953 pub fn xor<E: Into<Expr>>(self, expr: E) -> Self {
955 binary_expr(self, Operator::Xor, expr.into())
956 }
957
958 pub fn or<E: Into<Expr>>(self, expr: E) -> Self {
960 binary_expr(self, Operator::Or, expr.into())
961 }
962
963 pub fn logical_or<E: Into<Expr>>(self, expr: E) -> Self {
965 binary_expr(self, Operator::LogicalOr, expr.into())
966 }
967
968 pub fn logical_and<E: Into<Expr>>(self, expr: E) -> Self {
970 binary_expr(self, Operator::LogicalAnd, expr.into())
971 }
972
973 pub fn filter<E: Into<Expr>>(self, predicate: E) -> Self {
978 if has_expr(&self, |e| matches!(e, Expr::Wildcard)) {
979 panic!("filter '*' not allowed, use LazyFrame::filter")
980 };
981 Expr::Filter {
982 input: Arc::new(self),
983 by: Arc::new(predicate.into()),
984 }
985 }
986
987 #[allow(clippy::wrong_self_convention)]
989 #[cfg(feature = "is_in")]
990 pub fn is_in<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Self {
991 let other = other.into();
992 let function = BooleanFunction::IsIn { nulls_equal };
993 let options = function.function_options();
994 let function = function.into();
995 Expr::Function {
996 input: vec![self, other],
997 function,
998 options,
999 }
1000 }
1001
1002 pub fn sort_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1029 self,
1030 by: E,
1031 sort_options: SortMultipleOptions,
1032 ) -> Expr {
1033 let by = by.as_ref().iter().map(|e| e.clone().into()).collect();
1034 Expr::SortBy {
1035 expr: Arc::new(self),
1036 by,
1037 sort_options,
1038 }
1039 }
1040
1041 #[cfg(feature = "repeat_by")]
1042 pub fn repeat_by<E: Into<Expr>>(self, by: E) -> Expr {
1045 self.map_binary(FunctionExpr::RepeatBy, by.into())
1046 }
1047
1048 #[cfg(feature = "is_first_distinct")]
1049 #[allow(clippy::wrong_self_convention)]
1050 pub fn is_first_distinct(self) -> Expr {
1052 self.map_unary(BooleanFunction::IsFirstDistinct)
1053 }
1054
1055 #[cfg(feature = "is_last_distinct")]
1056 #[allow(clippy::wrong_self_convention)]
1057 pub fn is_last_distinct(self) -> Expr {
1059 self.map_unary(BooleanFunction::IsLastDistinct)
1060 }
1061
1062 fn dot_impl(self, other: Expr) -> Expr {
1063 (self * other).sum()
1064 }
1065
1066 pub fn dot<E: Into<Expr>>(self, other: E) -> Expr {
1068 self.dot_impl(other.into())
1069 }
1070
1071 #[cfg(feature = "mode")]
1072 pub fn mode(self) -> Expr {
1074 self.map_unary(FunctionExpr::Mode)
1075 }
1076
1077 pub fn exclude(self, columns: impl IntoVec<PlSmallStr>) -> Expr {
1081 let v = columns.into_vec().into_iter().map(Excluded::Name).collect();
1082 Expr::Exclude(Arc::new(self), v)
1083 }
1084
1085 pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Expr {
1086 let v = dtypes
1087 .as_ref()
1088 .iter()
1089 .map(|dt| Excluded::Dtype(dt.clone()))
1090 .collect();
1091 Expr::Exclude(Arc::new(self), v)
1092 }
1093
1094 #[cfg(feature = "interpolate")]
1095 pub fn interpolate(self, method: InterpolationMethod) -> Expr {
1098 self.map_unary(FunctionExpr::Interpolate(method))
1099 }
1100
1101 #[cfg(feature = "rolling_window_by")]
1102 #[allow(clippy::type_complexity)]
1103 fn finish_rolling_by(
1104 self,
1105 by: Expr,
1106 options: RollingOptionsDynamicWindow,
1107 rolling_function_by: fn(RollingOptionsDynamicWindow) -> RollingFunctionBy,
1108 ) -> Expr {
1109 self.map_binary(
1110 FunctionExpr::RollingExprBy(rolling_function_by(options)),
1111 by,
1112 )
1113 }
1114
1115 #[cfg(feature = "interpolate_by")]
1116 pub fn interpolate_by(self, by: Expr) -> Expr {
1120 self.map_binary(FunctionExpr::InterpolateBy, by)
1121 }
1122
1123 #[cfg(feature = "rolling_window")]
1124 #[allow(clippy::type_complexity)]
1125 fn finish_rolling(
1126 self,
1127 options: RollingOptionsFixedWindow,
1128 rolling_function: fn(RollingOptionsFixedWindow) -> RollingFunction,
1129 ) -> Expr {
1130 self.map_unary(FunctionExpr::RollingExpr(rolling_function(options)))
1131 }
1132
1133 #[cfg(feature = "rolling_window_by")]
1135 pub fn rolling_min_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1136 self.finish_rolling_by(by, options, RollingFunctionBy::MinBy)
1137 }
1138
1139 #[cfg(feature = "rolling_window_by")]
1141 pub fn rolling_max_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1142 self.finish_rolling_by(by, options, RollingFunctionBy::MaxBy)
1143 }
1144
1145 #[cfg(feature = "rolling_window_by")]
1147 pub fn rolling_mean_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1148 self.finish_rolling_by(by, options, RollingFunctionBy::MeanBy)
1149 }
1150
1151 #[cfg(feature = "rolling_window_by")]
1153 pub fn rolling_sum_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1154 self.finish_rolling_by(by, options, RollingFunctionBy::SumBy)
1155 }
1156
1157 #[cfg(feature = "rolling_window_by")]
1159 pub fn rolling_quantile_by(
1160 self,
1161 by: Expr,
1162 method: QuantileMethod,
1163 quantile: f64,
1164 mut options: RollingOptionsDynamicWindow,
1165 ) -> Expr {
1166 use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1167 options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1168 prob: quantile,
1169 method,
1170 }));
1171
1172 self.finish_rolling_by(by, options, RollingFunctionBy::QuantileBy)
1173 }
1174
1175 #[cfg(feature = "rolling_window_by")]
1177 pub fn rolling_var_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1178 self.finish_rolling_by(by, options, RollingFunctionBy::VarBy)
1179 }
1180
1181 #[cfg(feature = "rolling_window_by")]
1183 pub fn rolling_std_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1184 self.finish_rolling_by(by, options, RollingFunctionBy::StdBy)
1185 }
1186
1187 #[cfg(feature = "rolling_window_by")]
1189 pub fn rolling_median_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1190 self.rolling_quantile_by(by, QuantileMethod::Linear, 0.5, options)
1191 }
1192
1193 #[cfg(feature = "rolling_window")]
1197 pub fn rolling_min(self, options: RollingOptionsFixedWindow) -> Expr {
1198 self.finish_rolling(options, RollingFunction::Min)
1199 }
1200
1201 #[cfg(feature = "rolling_window")]
1205 pub fn rolling_max(self, options: RollingOptionsFixedWindow) -> Expr {
1206 self.finish_rolling(options, RollingFunction::Max)
1207 }
1208
1209 #[cfg(feature = "rolling_window")]
1213 pub fn rolling_mean(self, options: RollingOptionsFixedWindow) -> Expr {
1214 self.finish_rolling(options, RollingFunction::Mean)
1215 }
1216
1217 #[cfg(feature = "rolling_window")]
1221 pub fn rolling_sum(self, options: RollingOptionsFixedWindow) -> Expr {
1222 self.finish_rolling(options, RollingFunction::Sum)
1223 }
1224
1225 #[cfg(feature = "rolling_window")]
1229 pub fn rolling_median(self, options: RollingOptionsFixedWindow) -> Expr {
1230 self.rolling_quantile(QuantileMethod::Linear, 0.5, options)
1231 }
1232
1233 #[cfg(feature = "rolling_window")]
1237 pub fn rolling_quantile(
1238 self,
1239 method: QuantileMethod,
1240 quantile: f64,
1241 mut options: RollingOptionsFixedWindow,
1242 ) -> Expr {
1243 use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1244
1245 options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1246 prob: quantile,
1247 method,
1248 }));
1249
1250 self.finish_rolling(options, RollingFunction::Quantile)
1251 }
1252
1253 #[cfg(feature = "rolling_window")]
1255 pub fn rolling_var(self, options: RollingOptionsFixedWindow) -> Expr {
1256 self.finish_rolling(options, RollingFunction::Var)
1257 }
1258
1259 #[cfg(feature = "rolling_window")]
1261 pub fn rolling_std(self, options: RollingOptionsFixedWindow) -> Expr {
1262 self.finish_rolling(options, RollingFunction::Std)
1263 }
1264
1265 #[cfg(feature = "rolling_window")]
1267 #[cfg(feature = "moment")]
1268 pub fn rolling_skew(self, options: RollingOptionsFixedWindow) -> Expr {
1269 self.finish_rolling(options, RollingFunction::Skew)
1270 }
1271
1272 #[cfg(feature = "rolling_window")]
1274 #[cfg(feature = "moment")]
1275 pub fn rolling_kurtosis(self, options: RollingOptionsFixedWindow) -> Expr {
1276 self.finish_rolling(options, RollingFunction::Kurtosis)
1277 }
1278
1279 #[cfg(feature = "rolling_window")]
1280 pub fn rolling_map(
1283 self,
1284 f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
1285 output_type: GetOutput,
1286 options: RollingOptionsFixedWindow,
1287 ) -> Expr {
1288 self.apply(
1289 move |c: Column| {
1290 c.as_materialized_series()
1291 .rolling_map(f.as_ref(), options.clone())
1292 .map(Column::from)
1293 .map(Some)
1294 },
1295 output_type,
1296 )
1297 .with_fmt("rolling_map")
1298 }
1299
1300 #[cfg(feature = "rolling_window")]
1301 pub fn rolling_map_float<F>(self, window_size: usize, f: F) -> Expr
1305 where
1306 F: 'static + FnMut(&mut Float64Chunked) -> Option<f64> + Send + Sync + Copy,
1307 {
1308 self.apply(
1309 move |c: Column| {
1310 let out = match c.dtype() {
1311 DataType::Float64 => c
1312 .f64()
1313 .unwrap()
1314 .rolling_map_float(window_size, f)
1315 .map(|ca| ca.into_column()),
1316 _ => c
1317 .cast(&DataType::Float64)?
1318 .f64()
1319 .unwrap()
1320 .rolling_map_float(window_size, f)
1321 .map(|ca| ca.into_column()),
1322 }?;
1323 if let DataType::Float32 = c.dtype() {
1324 out.cast(&DataType::Float32).map(Some)
1325 } else {
1326 Ok(Some(out))
1327 }
1328 },
1329 GetOutput::map_field(|field| {
1330 Ok(match field.dtype() {
1331 DataType::Float64 => field.clone(),
1332 DataType::Float32 => Field::new(field.name().clone(), DataType::Float32),
1333 _ => Field::new(field.name().clone(), DataType::Float64),
1334 })
1335 }),
1336 )
1337 .with_fmt("rolling_map_float")
1338 }
1339
1340 #[cfg(feature = "peaks")]
1341 pub fn peak_min(self) -> Expr {
1342 self.map_unary(FunctionExpr::PeakMin)
1343 }
1344
1345 #[cfg(feature = "peaks")]
1346 pub fn peak_max(self) -> Expr {
1347 self.map_unary(FunctionExpr::PeakMax)
1348 }
1349
1350 #[cfg(feature = "rank")]
1351 pub fn rank(self, options: RankOptions, seed: Option<u64>) -> Expr {
1353 self.map_unary(FunctionExpr::Rank { options, seed })
1354 }
1355
1356 #[cfg(feature = "replace")]
1357 pub fn replace<E: Into<Expr>>(self, old: E, new: E) -> Expr {
1359 let old = old.into();
1360 let new = new.into();
1361 self.map_n_ary(FunctionExpr::Replace, [old, new])
1362 }
1363
1364 #[cfg(feature = "replace")]
1365 pub fn replace_strict<E: Into<Expr>>(
1367 self,
1368 old: E,
1369 new: E,
1370 default: Option<E>,
1371 return_dtype: Option<DataType>,
1372 ) -> Expr {
1373 let old = old.into();
1374 let new = new.into();
1375 let mut args = vec![old, new];
1376 args.extend(default.map(Into::into));
1377 self.map_n_ary(FunctionExpr::ReplaceStrict { return_dtype }, args)
1378 }
1379
1380 #[cfg(feature = "cutqcut")]
1381 pub fn cut(
1383 self,
1384 breaks: Vec<f64>,
1385 labels: Option<impl IntoVec<PlSmallStr>>,
1386 left_closed: bool,
1387 include_breaks: bool,
1388 ) -> Expr {
1389 self.map_unary(FunctionExpr::Cut {
1390 breaks,
1391 labels: labels.map(|x| x.into_vec()),
1392 left_closed,
1393 include_breaks,
1394 })
1395 }
1396
1397 #[cfg(feature = "cutqcut")]
1398 pub fn qcut(
1400 self,
1401 probs: Vec<f64>,
1402 labels: Option<impl IntoVec<PlSmallStr>>,
1403 left_closed: bool,
1404 allow_duplicates: bool,
1405 include_breaks: bool,
1406 ) -> Expr {
1407 self.map_unary(FunctionExpr::QCut {
1408 probs,
1409 labels: labels.map(|x| x.into_vec()),
1410 left_closed,
1411 allow_duplicates,
1412 include_breaks,
1413 })
1414 }
1415
1416 #[cfg(feature = "cutqcut")]
1417 pub fn qcut_uniform(
1419 self,
1420 n_bins: usize,
1421 labels: Option<impl IntoVec<PlSmallStr>>,
1422 left_closed: bool,
1423 allow_duplicates: bool,
1424 include_breaks: bool,
1425 ) -> Expr {
1426 let probs = (1..n_bins).map(|b| b as f64 / n_bins as f64).collect();
1427 self.map_unary(FunctionExpr::QCut {
1428 probs,
1429 labels: labels.map(|x| x.into_vec()),
1430 left_closed,
1431 allow_duplicates,
1432 include_breaks,
1433 })
1434 }
1435
1436 #[cfg(feature = "rle")]
1437 pub fn rle(self) -> Expr {
1439 self.map_unary(FunctionExpr::RLE)
1440 }
1441
1442 #[cfg(feature = "rle")]
1443 pub fn rle_id(self) -> Expr {
1445 self.map_unary(FunctionExpr::RLEID)
1446 }
1447
1448 #[cfg(feature = "diff")]
1449 pub fn diff(self, n: Expr, null_behavior: NullBehavior) -> Expr {
1451 self.map_binary(FunctionExpr::Diff(null_behavior), n)
1452 }
1453
1454 #[cfg(feature = "pct_change")]
1455 pub fn pct_change(self, n: Expr) -> Expr {
1457 self.map_binary(FunctionExpr::PctChange, n)
1458 }
1459
1460 #[cfg(feature = "moment")]
1461 pub fn skew(self, bias: bool) -> Expr {
1471 self.map_unary(FunctionExpr::Skew(bias))
1472 }
1473
1474 #[cfg(feature = "moment")]
1475 pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr {
1483 self.map_unary(FunctionExpr::Kurtosis(fisher, bias))
1484 }
1485
1486 pub fn upper_bound(self) -> Expr {
1488 self.map_unary(FunctionExpr::UpperBound)
1489 }
1490
1491 pub fn lower_bound(self) -> Expr {
1493 self.map_unary(FunctionExpr::LowerBound)
1494 }
1495
1496 #[cfg(feature = "dtype-array")]
1497 pub fn reshape(self, dimensions: &[i64]) -> Self {
1498 let dimensions = dimensions
1499 .iter()
1500 .map(|&v| ReshapeDimension::new(v))
1501 .collect();
1502 self.map_unary(FunctionExpr::Reshape(dimensions))
1503 }
1504
1505 #[cfg(feature = "ewma")]
1506 pub fn ewm_mean(self, options: EWMOptions) -> Self {
1508 self.map_unary(FunctionExpr::EwmMean { options })
1509 }
1510
1511 #[cfg(feature = "ewma_by")]
1512 pub fn ewm_mean_by(self, times: Expr, half_life: Duration) -> Self {
1514 self.map_binary(FunctionExpr::EwmMeanBy { half_life }, times)
1515 }
1516
1517 #[cfg(feature = "ewma")]
1518 pub fn ewm_std(self, options: EWMOptions) -> Self {
1520 self.map_unary(FunctionExpr::EwmStd { options })
1521 }
1522
1523 #[cfg(feature = "ewma")]
1524 pub fn ewm_var(self, options: EWMOptions) -> Self {
1526 self.map_unary(FunctionExpr::EwmVar { options })
1527 }
1528
1529 pub fn any(self, ignore_nulls: bool) -> Self {
1537 self.map_unary(BooleanFunction::Any { ignore_nulls })
1538 }
1539
1540 pub fn all(self, ignore_nulls: bool) -> Self {
1548 self.map_unary(BooleanFunction::All { ignore_nulls })
1549 }
1550
1551 pub fn shrink_dtype(self) -> Self {
1555 self.map_unary(FunctionExpr::ShrinkType)
1556 }
1557
1558 #[cfg(feature = "dtype-struct")]
1559 pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
1563 self.map_unary(FunctionExpr::ValueCounts {
1564 sort,
1565 parallel,
1566 name: name.into(),
1567 normalize,
1568 })
1569 }
1570
1571 #[cfg(feature = "unique_counts")]
1572 pub fn unique_counts(self) -> Self {
1576 self.map_unary(FunctionExpr::UniqueCounts)
1577 }
1578
1579 #[cfg(feature = "log")]
1580 pub fn log(self, base: f64) -> Self {
1582 self.map_unary(FunctionExpr::Log { base })
1583 }
1584
1585 #[cfg(feature = "log")]
1586 pub fn log1p(self) -> Self {
1588 self.map_unary(FunctionExpr::Log1p)
1589 }
1590
1591 #[cfg(feature = "log")]
1592 pub fn exp(self) -> Self {
1594 self.map_unary(FunctionExpr::Exp)
1595 }
1596
1597 #[cfg(feature = "log")]
1598 pub fn entropy(self, base: f64, normalize: bool) -> Self {
1601 self.map_unary(FunctionExpr::Entropy { base, normalize })
1602 }
1603 pub fn null_count(self) -> Expr {
1605 self.map_unary(FunctionExpr::NullCount)
1606 }
1607
1608 pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
1614 self.map_unary(FunctionExpr::SetSortedFlag(sorted))
1616 }
1617
1618 #[cfg(feature = "row_hash")]
1619 pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr {
1621 self.map_unary(FunctionExpr::Hash(k0, k1, k2, k3))
1622 }
1623
1624 pub fn to_physical(self) -> Expr {
1625 self.map_unary(FunctionExpr::ToPhysical)
1626 }
1627
1628 pub fn gather_every(self, n: usize, offset: usize) -> Expr {
1629 self.map_unary(FunctionExpr::GatherEvery { n, offset })
1630 }
1631
1632 #[cfg(feature = "reinterpret")]
1633 pub fn reinterpret(self, signed: bool) -> Expr {
1634 self.map_unary(FunctionExpr::Reinterpret(signed))
1635 }
1636
1637 pub fn extend_constant(self, value: Expr, n: Expr) -> Expr {
1638 self.map_ternary(FunctionExpr::ExtendConstant, value, n)
1639 }
1640
1641 #[cfg(feature = "strings")]
1642 pub fn str(self) -> string::StringNameSpace {
1644 string::StringNameSpace(self)
1645 }
1646
1647 pub fn binary(self) -> binary::BinaryNameSpace {
1649 binary::BinaryNameSpace(self)
1650 }
1651
1652 #[cfg(feature = "temporal")]
1653 pub fn dt(self) -> dt::DateLikeNameSpace {
1655 dt::DateLikeNameSpace(self)
1656 }
1657
1658 pub fn list(self) -> list::ListNameSpace {
1660 list::ListNameSpace(self)
1661 }
1662
1663 pub fn name(self) -> name::ExprNameNameSpace {
1665 name::ExprNameNameSpace(self)
1666 }
1667
1668 #[cfg(feature = "dtype-array")]
1670 pub fn arr(self) -> array::ArrayNameSpace {
1671 array::ArrayNameSpace(self)
1672 }
1673
1674 #[cfg(feature = "dtype-categorical")]
1676 pub fn cat(self) -> cat::CategoricalNameSpace {
1677 cat::CategoricalNameSpace(self)
1678 }
1679
1680 #[cfg(feature = "dtype-struct")]
1682 pub fn struct_(self) -> struct_::StructNameSpace {
1683 struct_::StructNameSpace(self)
1684 }
1685
1686 #[cfg(feature = "meta")]
1688 pub fn meta(self) -> meta::MetaNameSpace {
1689 meta::MetaNameSpace(self)
1690 }
1691}
1692
1693pub fn map_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
1703where
1704 F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1705 E: AsRef<[Expr]>,
1706{
1707 let input = expr.as_ref().to_vec();
1708
1709 Expr::AnonymousFunction {
1710 input,
1711 function: new_column_udf(function),
1712 output_type,
1713 options: FunctionOptions::elementwise().with_fmt_str(""),
1714 }
1715}
1716
1717pub fn apply_multiple<F, E>(
1727 function: F,
1728 expr: E,
1729 output_type: GetOutput,
1730 returns_scalar: bool,
1731) -> Expr
1732where
1733 F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1734 E: AsRef<[Expr]>,
1735{
1736 let input = expr.as_ref().to_vec();
1737 Expr::AnonymousFunction {
1738 input,
1739 function: new_column_udf(function),
1740 output_type,
1741 options: FunctionOptions::groupwise()
1742 .with_fmt_str("")
1743 .with_flags(|mut f| {
1744 f.set(FunctionFlags::RETURNS_SCALAR, returns_scalar);
1745 f
1746 }),
1747 }
1748}
1749
1750pub fn len() -> Expr {
1752 Expr::Len
1753}
1754
1755pub fn first() -> Expr {
1757 Expr::Nth(0)
1758}
1759
1760pub fn last() -> Expr {
1762 Expr::Nth(-1)
1763}
1764
1765pub fn nth(n: i64) -> Expr {
1767 Expr::Nth(n)
1768}