polars_plan/dsl/
mod.rs

1#![allow(ambiguous_glob_reexports)]
2//! Domain specific language for the Lazy API.
3#[cfg(feature = "dtype-categorical")]
4pub mod cat;
5
6#[cfg(feature = "dtype-categorical")]
7pub use cat::*;
8#[cfg(feature = "rolling_window_by")]
9pub(crate) use polars_time::prelude::*;
10
11mod arithmetic;
12mod arity;
13#[cfg(feature = "dtype-array")]
14mod array;
15pub mod binary;
16#[cfg(feature = "bitwise")]
17mod bitwise;
18mod builder_dsl;
19pub use builder_dsl::*;
20#[cfg(feature = "temporal")]
21pub mod dt;
22mod expr;
23mod expr_dyn_fn;
24mod format;
25mod from;
26pub mod function_expr;
27pub mod functions;
28mod list;
29#[cfg(feature = "meta")]
30mod meta;
31mod name;
32mod options;
33#[cfg(feature = "python")]
34pub mod python_dsl;
35#[cfg(feature = "random")]
36mod random;
37mod scan_sources;
38mod selector;
39mod statistics;
40#[cfg(feature = "strings")]
41pub mod string;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44pub mod udf;
45
46use std::fmt::Debug;
47use std::sync::Arc;
48
49mod plan;
50pub use arity::*;
51#[cfg(feature = "dtype-array")]
52pub use array::*;
53pub use expr::*;
54pub use function_expr::schema::FieldsMapper;
55pub use function_expr::*;
56pub use functions::*;
57pub use list::*;
58#[cfg(feature = "meta")]
59pub use meta::*;
60pub use name::*;
61pub use options::*;
62pub use plan::*;
63use polars_compute::rolling::QuantileMethod;
64use polars_core::chunked_array::cast::CastOptions;
65use polars_core::error::feature_gated;
66use polars_core::prelude::*;
67use polars_core::series::IsSorted;
68#[cfg(feature = "diff")]
69use polars_core::series::ops::NullBehavior;
70#[cfg(any(feature = "search_sorted", feature = "is_between"))]
71use polars_core::utils::SuperTypeFlags;
72use polars_core::utils::{SuperTypeOptions, try_get_supertype};
73pub use selector::Selector;
74#[cfg(feature = "dtype-struct")]
75pub use struct_::*;
76pub use udf::UserDefinedFunction;
77mod file_scan;
78pub use file_scan::*;
79pub use scan_sources::{ScanSource, ScanSourceIter, ScanSourceRef, ScanSources};
80
81pub use crate::plans::lit;
82use crate::prelude::*;
83
84impl Expr {
85    /// Modify the Options passed to the `Function` node.
86    pub(crate) fn with_function_options<F>(self, func: F) -> Expr
87    where
88        F: Fn(FunctionOptions) -> FunctionOptions,
89    {
90        match self {
91            Self::AnonymousFunction {
92                input,
93                function,
94                output_type,
95                mut options,
96            } => {
97                options = func(options);
98                Self::AnonymousFunction {
99                    input,
100                    function,
101                    output_type,
102                    options,
103                }
104            },
105            Self::Function {
106                input,
107                function,
108                mut options,
109            } => {
110                options = func(options);
111                Self::Function {
112                    input,
113                    function,
114                    options,
115                }
116            },
117            _ => {
118                panic!("implementation error")
119            },
120        }
121    }
122
123    /// Overwrite the function name used for formatting.
124    /// (this is not intended to be used).
125    #[doc(hidden)]
126    pub fn with_fmt(self, name: &'static str) -> Expr {
127        self.with_function_options(|mut options| {
128            options.fmt_str = name;
129            options
130        })
131    }
132
133    /// Compare `Expr` with other `Expr` on equality.
134    pub fn eq<E: Into<Expr>>(self, other: E) -> Expr {
135        binary_expr(self, Operator::Eq, other.into())
136    }
137
138    /// Compare `Expr` with other `Expr` on equality where `None == None`.
139    pub fn eq_missing<E: Into<Expr>>(self, other: E) -> Expr {
140        binary_expr(self, Operator::EqValidity, other.into())
141    }
142
143    /// Compare `Expr` with other `Expr` on non-equality.
144    pub fn neq<E: Into<Expr>>(self, other: E) -> Expr {
145        binary_expr(self, Operator::NotEq, other.into())
146    }
147
148    /// Compare `Expr` with other `Expr` on non-equality where `None == None`.
149    pub fn neq_missing<E: Into<Expr>>(self, other: E) -> Expr {
150        binary_expr(self, Operator::NotEqValidity, other.into())
151    }
152
153    /// Check if `Expr` < `Expr`.
154    pub fn lt<E: Into<Expr>>(self, other: E) -> Expr {
155        binary_expr(self, Operator::Lt, other.into())
156    }
157
158    /// Check if `Expr` > `Expr`.
159    pub fn gt<E: Into<Expr>>(self, other: E) -> Expr {
160        binary_expr(self, Operator::Gt, other.into())
161    }
162
163    /// Check if `Expr` >= `Expr`.
164    pub fn gt_eq<E: Into<Expr>>(self, other: E) -> Expr {
165        binary_expr(self, Operator::GtEq, other.into())
166    }
167
168    /// Check if `Expr` <= `Expr`.
169    pub fn lt_eq<E: Into<Expr>>(self, other: E) -> Expr {
170        binary_expr(self, Operator::LtEq, other.into())
171    }
172
173    /// Negate `Expr`.
174    #[allow(clippy::should_implement_trait)]
175    pub fn not(self) -> Expr {
176        self.map_unary(BooleanFunction::Not)
177    }
178
179    /// Rename Column.
180    pub fn alias<S>(self, name: S) -> Expr
181    where
182        S: Into<PlSmallStr>,
183    {
184        Expr::Alias(Arc::new(self), name.into())
185    }
186
187    /// Run is_null operation on `Expr`.
188    #[allow(clippy::wrong_self_convention)]
189    pub fn is_null(self) -> Self {
190        self.map_unary(BooleanFunction::IsNull)
191    }
192
193    /// Run is_not_null operation on `Expr`.
194    #[allow(clippy::wrong_self_convention)]
195    pub fn is_not_null(self) -> Self {
196        self.map_unary(BooleanFunction::IsNotNull)
197    }
198
199    /// Drop null values.
200    pub fn drop_nulls(self) -> Self {
201        self.map_unary(FunctionExpr::DropNulls)
202    }
203
204    /// Drop NaN values.
205    pub fn drop_nans(self) -> Self {
206        self.map_unary(FunctionExpr::DropNans)
207    }
208
209    /// Get the number of unique values in the groups.
210    pub fn n_unique(self) -> Self {
211        AggExpr::NUnique(Arc::new(self)).into()
212    }
213
214    /// Get the first value in the group.
215    pub fn first(self) -> Self {
216        AggExpr::First(Arc::new(self)).into()
217    }
218
219    /// Get the last value in the group.
220    pub fn last(self) -> Self {
221        AggExpr::Last(Arc::new(self)).into()
222    }
223
224    /// GroupBy the group to a Series.
225    pub fn implode(self) -> Self {
226        AggExpr::Implode(Arc::new(self)).into()
227    }
228
229    /// Compute the quantile per group.
230    pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self {
231        AggExpr::Quantile {
232            expr: Arc::new(self),
233            quantile: Arc::new(quantile),
234            method,
235        }
236        .into()
237    }
238
239    /// Get the group indexes of the group by operation.
240    pub fn agg_groups(self) -> Self {
241        AggExpr::AggGroups(Arc::new(self)).into()
242    }
243
244    /// Alias for `explode`.
245    pub fn flatten(self) -> Self {
246        self.explode()
247    }
248
249    /// Explode the String/List column.
250    pub fn explode(self) -> Self {
251        Expr::Explode {
252            input: Arc::new(self),
253            skip_empty: false,
254        }
255    }
256
257    /// Slice the Series.
258    /// `offset` may be negative.
259    pub fn slice<E: Into<Expr>, F: Into<Expr>>(self, offset: E, length: F) -> Self {
260        Expr::Slice {
261            input: Arc::new(self),
262            offset: Arc::new(offset.into()),
263            length: Arc::new(length.into()),
264        }
265    }
266
267    /// Append expressions. This is done by adding the chunks of `other` to this [`Series`].
268    pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
269        let output_type = if upcast {
270            GetOutput::super_type()
271        } else {
272            GetOutput::same_type()
273        };
274
275        apply_binary(
276            self,
277            other.into(),
278            move |mut a, mut b| {
279                if upcast {
280                    let dtype = try_get_supertype(a.dtype(), b.dtype())?;
281                    a = a.cast(&dtype)?;
282                    b = b.cast(&dtype)?;
283                }
284                a.append(&b)?;
285                Ok(Some(a))
286            },
287            output_type,
288        )
289    }
290
291    /// Get the first `n` elements of the Expr result.
292    pub fn head(self, length: Option<usize>) -> Self {
293        self.slice(lit(0), lit(length.unwrap_or(10) as u64))
294    }
295
296    /// Get the last `n` elements of the Expr result.
297    pub fn tail(self, length: Option<usize>) -> Self {
298        let len = length.unwrap_or(10);
299        self.slice(lit(-(len as i64)), lit(len as u64))
300    }
301
302    /// Get unique values of this expression.
303    pub fn unique(self) -> Self {
304        self.map_unary(FunctionExpr::Unique(false))
305    }
306
307    /// Get unique values of this expression, while maintaining order.
308    /// This requires more work than [`Expr::unique`].
309    pub fn unique_stable(self) -> Self {
310        self.map_unary(FunctionExpr::Unique(true))
311    }
312
313    /// Get the first index of unique values of this expression.
314    pub fn arg_unique(self) -> Self {
315        self.map_unary(FunctionExpr::ArgUnique)
316    }
317
318    /// Get the index value that has the minimum value.
319    pub fn arg_min(self) -> Self {
320        let options = FunctionOptions::aggregation().with_fmt_str("arg_min");
321        self.function_with_options(
322            move |c: Column| {
323                Ok(Some(Column::new(
324                    c.name().clone(),
325                    &[c.as_materialized_series().arg_min().map(|idx| idx as u32)],
326                )))
327            },
328            GetOutput::from_type(IDX_DTYPE),
329            options,
330        )
331    }
332
333    /// Get the index value that has the maximum value.
334    pub fn arg_max(self) -> Self {
335        let options = FunctionOptions::aggregation().with_fmt_str("arg_max");
336        self.function_with_options(
337            move |c: Column| {
338                Ok(Some(Column::new(
339                    c.name().clone(),
340                    &[c.as_materialized_series()
341                        .arg_max()
342                        .map(|idx| idx as IdxSize)],
343                )))
344            },
345            GetOutput::from_type(IDX_DTYPE),
346            options,
347        )
348    }
349
350    /// Get the index values that would sort this expression.
351    pub fn arg_sort(self, sort_options: SortOptions) -> Self {
352        let options = FunctionOptions::groupwise().with_fmt_str("arg_sort");
353        self.function_with_options(
354            move |c: Column| {
355                Ok(Some(
356                    c.as_materialized_series()
357                        .arg_sort(sort_options)
358                        .into_column(),
359                ))
360            },
361            GetOutput::from_type(IDX_DTYPE),
362            options,
363        )
364    }
365
366    #[cfg(feature = "index_of")]
367    /// Find the index of a value.
368    pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
369        self.map_binary(FunctionExpr::IndexOf, element.into())
370    }
371
372    #[cfg(feature = "search_sorted")]
373    /// Find indices where elements should be inserted to maintain order.
374    pub fn search_sorted<E: Into<Expr>>(self, element: E, side: SearchSortedSide) -> Expr {
375        self.map_binary(FunctionExpr::SearchSorted(side), element.into())
376    }
377
378    /// Cast expression to another data type.
379    /// Throws an error if conversion had overflows.
380    /// Returns an Error if cast is invalid on rows after predicates are pushed down.
381    pub fn strict_cast(self, dtype: DataType) -> Self {
382        Expr::Cast {
383            expr: Arc::new(self),
384            dtype,
385            options: CastOptions::Strict,
386        }
387    }
388
389    /// Cast expression to another data type.
390    pub fn cast(self, dtype: DataType) -> Self {
391        Expr::Cast {
392            expr: Arc::new(self),
393            dtype,
394            options: CastOptions::NonStrict,
395        }
396    }
397
398    /// Cast expression to another data type.
399    pub fn cast_with_options(self, dtype: DataType, cast_options: CastOptions) -> Self {
400        Expr::Cast {
401            expr: Arc::new(self),
402            dtype,
403            options: cast_options,
404        }
405    }
406
407    /// Take the values by idx.
408    pub fn gather<E: Into<Expr>>(self, idx: E) -> Self {
409        Expr::Gather {
410            expr: Arc::new(self),
411            idx: Arc::new(idx.into()),
412            returns_scalar: false,
413        }
414    }
415
416    /// Take the values by a single index.
417    pub fn get<E: Into<Expr>>(self, idx: E) -> Self {
418        Expr::Gather {
419            expr: Arc::new(self),
420            idx: Arc::new(idx.into()),
421            returns_scalar: true,
422        }
423    }
424
425    /// Sort with given options.
426    ///
427    /// # Example
428    ///
429    /// ```rust
430    /// # use polars_core::prelude::*;
431    /// # use polars_lazy::prelude::*;
432    /// # fn main() -> PolarsResult<()> {
433    /// let lf = df! {
434    ///    "a" => [Some(5), Some(4), Some(3), Some(2), None]
435    /// }?
436    /// .lazy();
437    ///
438    /// let sorted = lf
439    ///     .select(
440    ///         vec![col("a").sort(SortOptions::default())],
441    ///     )
442    ///     .collect()?;
443    ///
444    /// assert_eq!(
445    ///     sorted,
446    ///     df! {
447    ///         "a" => [None, Some(2), Some(3), Some(4), Some(5)]
448    ///     }?
449    /// );
450    /// # Ok(())
451    /// # }
452    /// ```
453    /// See [`SortOptions`] for more options.
454    pub fn sort(self, options: SortOptions) -> Self {
455        Expr::Sort {
456            expr: Arc::new(self),
457            options,
458        }
459    }
460
461    /// Returns the `k` largest elements.
462    ///
463    /// This has time complexity `O(n + k log(n))`.
464    #[cfg(feature = "top_k")]
465    pub fn top_k(self, k: Expr) -> Self {
466        self.map_binary(FunctionExpr::TopK { descending: false }, k)
467    }
468
469    /// Returns the `k` largest rows by given column.
470    ///
471    /// For single column, use [`Expr::top_k`].
472    #[cfg(feature = "top_k")]
473    pub fn top_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
474        self,
475        k: K,
476        by: E,
477        descending: Vec<bool>,
478    ) -> Self {
479        self.map_n_ary(
480            FunctionExpr::TopKBy { descending },
481            [k.into()]
482                .into_iter()
483                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
484        )
485    }
486
487    /// Returns the `k` smallest elements.
488    ///
489    /// This has time complexity `O(n + k log(n))`.
490    #[cfg(feature = "top_k")]
491    pub fn bottom_k(self, k: Expr) -> Self {
492        self.map_binary(FunctionExpr::TopK { descending: true }, k)
493    }
494
495    /// Returns the `k` smallest rows by given column.
496    ///
497    /// For single column, use [`Expr::bottom_k`].
498    // #[cfg(feature = "top_k")]
499    #[cfg(feature = "top_k")]
500    pub fn bottom_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
501        self,
502        k: K,
503        by: E,
504        descending: Vec<bool>,
505    ) -> Self {
506        let descending = descending.into_iter().map(|x| !x).collect();
507        self.map_n_ary(
508            FunctionExpr::TopKBy { descending },
509            [k.into()]
510                .into_iter()
511                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
512        )
513    }
514
515    /// Reverse column
516    pub fn reverse(self) -> Self {
517        self.map_unary(FunctionExpr::Reverse)
518    }
519
520    /// Apply a function/closure once the logical plan get executed.
521    ///
522    /// This function is very similar to [`Expr::apply`], but differs in how it handles aggregations.
523    ///
524    ///  * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
525    ///  * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
526    ///
527    /// It is the responsibility of the caller that the schema is correct by giving
528    /// the correct output_type. If None given the output type of the input expr is used.
529    pub fn map<F>(self, function: F, output_type: GetOutput) -> Self
530    where
531        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
532    {
533        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
534
535        Expr::AnonymousFunction {
536            input: vec![self],
537            function: new_column_udf(f),
538            output_type,
539            options: FunctionOptions::elementwise()
540                .with_fmt_str("map")
541                .with_flags(|f| f | FunctionFlags::OPTIONAL_RE_ENTRANT),
542        }
543    }
544
545    /// Apply a function/closure once the logical plan get executed with many arguments.
546    ///
547    /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
548    pub fn map_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
549    where
550        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
551    {
552        let mut input = vec![self];
553        input.extend_from_slice(arguments);
554
555        Expr::AnonymousFunction {
556            input,
557            function: new_column_udf(function),
558            output_type,
559            options: FunctionOptions::elementwise().with_fmt_str(""),
560        }
561    }
562
563    /// A function that cannot be expressed with `map` or `apply` and requires extra settings.
564    pub fn function_with_options<F>(
565        self,
566        function: F,
567        output_type: GetOutput,
568        options: FunctionOptions,
569    ) -> Self
570    where
571        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
572    {
573        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
574
575        Expr::AnonymousFunction {
576            input: vec![self],
577            function: new_column_udf(f),
578            output_type,
579            options,
580        }
581    }
582
583    /// Apply a function/closure over the groups. This should only be used in a group_by aggregation.
584    ///
585    /// It is the responsibility of the caller that the schema is correct by giving
586    /// the correct output_type. If None given the output type of the input expr is used.
587    ///
588    /// This difference with [map](Self::map) is that `apply` will create a separate `Series` per group.
589    ///
590    /// * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
591    /// * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
592    pub fn apply<F>(self, function: F, output_type: GetOutput) -> Self
593    where
594        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
595    {
596        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
597
598        Expr::AnonymousFunction {
599            input: vec![self],
600            function: new_column_udf(f),
601            output_type,
602            options: FunctionOptions::groupwise().with_fmt_str(""),
603        }
604    }
605
606    /// Apply a function/closure over the groups with many arguments. This should only be used in a group_by aggregation.
607    ///
608    /// See the [`Expr::apply`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
609    pub fn apply_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
610    where
611        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
612    {
613        let mut input = vec![self];
614        input.extend_from_slice(arguments);
615
616        Expr::AnonymousFunction {
617            input,
618            function: new_column_udf(function),
619            output_type,
620            options: FunctionOptions::groupwise().with_fmt_str(""),
621        }
622    }
623
624    /// Get mask of finite values if dtype is Float.
625    #[allow(clippy::wrong_self_convention)]
626    pub fn is_finite(self) -> Self {
627        self.map_unary(BooleanFunction::IsFinite)
628    }
629
630    /// Get mask of infinite values if dtype is Float.
631    #[allow(clippy::wrong_self_convention)]
632    pub fn is_infinite(self) -> Self {
633        self.map_unary(BooleanFunction::IsInfinite)
634    }
635
636    /// Get mask of NaN values if dtype is Float.
637    pub fn is_nan(self) -> Self {
638        self.map_unary(BooleanFunction::IsNan)
639    }
640
641    /// Get inverse mask of NaN values if dtype is Float.
642    pub fn is_not_nan(self) -> Self {
643        self.map_unary(BooleanFunction::IsNotNan)
644    }
645
646    /// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::shift).
647    pub fn shift(self, n: Expr) -> Self {
648        self.map_binary(FunctionExpr::Shift, n)
649    }
650
651    /// Shift the values in the array by some period and fill the resulting empty values.
652    pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(self, n: E, fill_value: IE) -> Self {
653        self.map_ternary(FunctionExpr::ShiftAndFill, n.into(), fill_value.into())
654    }
655
656    /// Cumulatively count values from 0 to len.
657    #[cfg(feature = "cum_agg")]
658    pub fn cum_count(self, reverse: bool) -> Self {
659        self.map_unary(FunctionExpr::CumCount { reverse })
660    }
661
662    /// Get an array with the cumulative sum computed at every element.
663    #[cfg(feature = "cum_agg")]
664    pub fn cum_sum(self, reverse: bool) -> Self {
665        self.map_unary(FunctionExpr::CumSum { reverse })
666    }
667
668    /// Get an array with the cumulative product computed at every element.
669    #[cfg(feature = "cum_agg")]
670    pub fn cum_prod(self, reverse: bool) -> Self {
671        self.map_unary(FunctionExpr::CumProd { reverse })
672    }
673
674    /// Get an array with the cumulative min computed at every element.
675    #[cfg(feature = "cum_agg")]
676    pub fn cum_min(self, reverse: bool) -> Self {
677        self.map_unary(FunctionExpr::CumMin { reverse })
678    }
679
680    /// Get an array with the cumulative max computed at every element.
681    #[cfg(feature = "cum_agg")]
682    pub fn cum_max(self, reverse: bool) -> Self {
683        self.map_unary(FunctionExpr::CumMax { reverse })
684    }
685
686    /// Get the product aggregation of an expression.
687    pub fn product(self) -> Self {
688        let options = FunctionOptions::aggregation().with_fmt_str("product");
689        self.function_with_options(
690            move |c: Column| {
691                Some(
692                    c.product()
693                        .map(|sc| sc.into_series(c.name().clone()).into_column()),
694                )
695                .transpose()
696            },
697            GetOutput::map_dtype(|dt| {
698                use DataType as T;
699                Ok(match dt {
700                    T::Float32 => T::Float32,
701                    T::Float64 => T::Float64,
702                    T::UInt64 => T::UInt64,
703                    #[cfg(feature = "dtype-i128")]
704                    T::Int128 => T::Int128,
705                    _ => T::Int64,
706                })
707            }),
708            options,
709        )
710    }
711
712    /// Round underlying floating point array to given decimal numbers.
713    #[cfg(feature = "round_series")]
714    pub fn round(self, decimals: u32, mode: RoundMode) -> Self {
715        self.map_unary(FunctionExpr::Round { decimals, mode })
716    }
717
718    /// Round to a number of significant figures.
719    #[cfg(feature = "round_series")]
720    pub fn round_sig_figs(self, digits: i32) -> Self {
721        self.map_unary(FunctionExpr::RoundSF { digits })
722    }
723
724    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
725    #[cfg(feature = "round_series")]
726    pub fn floor(self) -> Self {
727        self.map_unary(FunctionExpr::Floor)
728    }
729
730    /// Constant Pi
731    #[cfg(feature = "round_series")]
732    pub fn pi() -> Self {
733        lit(std::f64::consts::PI)
734    }
735
736    /// Ceil underlying floating point array to the highest integers smaller or equal to the float value.
737    #[cfg(feature = "round_series")]
738    pub fn ceil(self) -> Self {
739        self.map_unary(FunctionExpr::Ceil)
740    }
741
742    /// Clip underlying values to a set boundary.
743    #[cfg(feature = "round_series")]
744    pub fn clip(self, min: Expr, max: Expr) -> Self {
745        self.map_ternary(
746            FunctionExpr::Clip {
747                has_min: true,
748                has_max: true,
749            },
750            min,
751            max,
752        )
753    }
754
755    /// Clip underlying values to a set boundary.
756    #[cfg(feature = "round_series")]
757    pub fn clip_max(self, max: Expr) -> Self {
758        self.map_binary(
759            FunctionExpr::Clip {
760                has_min: false,
761                has_max: true,
762            },
763            max,
764        )
765    }
766
767    /// Clip underlying values to a set boundary.
768    #[cfg(feature = "round_series")]
769    pub fn clip_min(self, min: Expr) -> Self {
770        self.map_binary(
771            FunctionExpr::Clip {
772                has_min: true,
773                has_max: false,
774            },
775            min,
776        )
777    }
778
779    /// Convert all values to their absolute/positive value.
780    #[cfg(feature = "abs")]
781    pub fn abs(self) -> Self {
782        self.map_unary(FunctionExpr::Abs)
783    }
784
785    /// Apply window function over a subgroup.
786    /// This is similar to a group_by + aggregation + self join.
787    /// Or similar to [window functions in Postgres](https://www.postgresql.org/docs/9.1/tutorial-window.html).
788    ///
789    /// # Example
790    ///
791    /// ``` rust
792    /// #[macro_use] extern crate polars_core;
793    /// use polars_core::prelude::*;
794    /// use polars_lazy::prelude::*;
795    ///
796    /// fn example() -> PolarsResult<()> {
797    ///     let df = df! {
798    ///             "groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
799    ///             "values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
800    ///         }?;
801    ///
802    ///     let out = df
803    ///      .lazy()
804    ///      .select(&[
805    ///          col("groups"),
806    ///          sum("values").over([col("groups")]),
807    ///      ])
808    ///      .collect()?;
809    ///     println!("{}", &out);
810    ///     Ok(())
811    /// }
812    ///
813    /// ```
814    ///
815    /// Outputs:
816    ///
817    /// ``` text
818    /// ╭────────┬────────╮
819    /// │ groups ┆ values │
820    /// │ ---    ┆ ---    │
821    /// │ i32    ┆ i32    │
822    /// ╞════════╪════════╡
823    /// │ 1      ┆ 16     │
824    /// │ 1      ┆ 16     │
825    /// │ 2      ┆ 13     │
826    /// │ 2      ┆ 13     │
827    /// │ …      ┆ …      │
828    /// │ 1      ┆ 16     │
829    /// │ 2      ┆ 13     │
830    /// │ 3      ┆ 15     │
831    /// │ 3      ┆ 15     │
832    /// │ 1      ┆ 16     │
833    /// ╰────────┴────────╯
834    /// ```
835    pub fn over<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(self, partition_by: E) -> Self {
836        self.over_with_options(partition_by, None, Default::default())
837    }
838
839    pub fn over_with_options<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
840        self,
841        partition_by: E,
842        order_by: Option<(E, SortOptions)>,
843        options: WindowMapping,
844    ) -> Self {
845        let partition_by = partition_by
846            .as_ref()
847            .iter()
848            .map(|e| e.clone().into())
849            .collect();
850
851        let order_by = order_by.map(|(e, options)| {
852            let e = e.as_ref();
853            let e = if e.len() == 1 {
854                Arc::new(e[0].clone().into())
855            } else {
856                feature_gated!["dtype-struct", {
857                    let e = e.iter().map(|e| e.clone().into()).collect::<Vec<_>>();
858                    Arc::new(as_struct(e))
859                }]
860            };
861            (e, options)
862        });
863
864        Expr::Window {
865            function: Arc::new(self),
866            partition_by,
867            order_by,
868            options: options.into(),
869        }
870    }
871
872    #[cfg(feature = "dynamic_group_by")]
873    pub fn rolling(self, options: RollingGroupOptions) -> Self {
874        // We add the index column as `partition expr` so that the optimizer will
875        // not ignore it.
876        let index_col = col(options.index_column.clone());
877        Expr::Window {
878            function: Arc::new(self),
879            partition_by: vec![index_col],
880            order_by: None,
881            options: WindowType::Rolling(options),
882        }
883    }
884
885    fn fill_null_impl(self, fill_value: Expr) -> Self {
886        self.map_binary(FunctionExpr::FillNull, fill_value)
887    }
888
889    /// Replace the null values by a value.
890    pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> Self {
891        self.fill_null_impl(fill_value.into())
892    }
893
894    pub fn fill_null_with_strategy(self, strategy: FillNullStrategy) -> Self {
895        self.map_unary(FunctionExpr::FillNullWithStrategy(strategy))
896    }
897
898    /// Replace the floating point `NaN` values by a value.
899    pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> Self {
900        // we take the not branch so that self is truthy value of `when -> then -> otherwise`
901        // and that ensure we keep the name of `self`
902
903        when(self.clone().is_not_nan().or(self.clone().is_null()))
904            .then(self)
905            .otherwise(fill_value.into())
906    }
907    /// Count the values of the Series
908    /// or
909    /// Get counts of the group by operation.
910    pub fn count(self) -> Self {
911        AggExpr::Count(Arc::new(self), false).into()
912    }
913
914    pub fn len(self) -> Self {
915        AggExpr::Count(Arc::new(self), true).into()
916    }
917
918    /// Get a mask of duplicated values.
919    #[allow(clippy::wrong_self_convention)]
920    #[cfg(feature = "is_unique")]
921    pub fn is_duplicated(self) -> Self {
922        self.map_unary(BooleanFunction::IsDuplicated)
923    }
924
925    #[allow(clippy::wrong_self_convention)]
926    #[cfg(feature = "is_between")]
927    pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
928        self.map_ternary(
929            BooleanFunction::IsBetween { closed },
930            lower.into(),
931            upper.into(),
932        )
933    }
934
935    /// Get a mask of unique values.
936    #[allow(clippy::wrong_self_convention)]
937    #[cfg(feature = "is_unique")]
938    pub fn is_unique(self) -> Self {
939        self.map_unary(BooleanFunction::IsUnique)
940    }
941
942    /// Get the approximate count of unique values.
943    #[cfg(feature = "approx_unique")]
944    pub fn approx_n_unique(self) -> Self {
945        self.map_unary(FunctionExpr::ApproxNUnique)
946    }
947
948    /// Bitwise "and" operation.
949    pub fn and<E: Into<Expr>>(self, expr: E) -> Self {
950        binary_expr(self, Operator::And, expr.into())
951    }
952
953    /// Bitwise "xor" operation.
954    pub fn xor<E: Into<Expr>>(self, expr: E) -> Self {
955        binary_expr(self, Operator::Xor, expr.into())
956    }
957
958    /// Bitwise "or" operation.
959    pub fn or<E: Into<Expr>>(self, expr: E) -> Self {
960        binary_expr(self, Operator::Or, expr.into())
961    }
962
963    /// Logical "or" operation.
964    pub fn logical_or<E: Into<Expr>>(self, expr: E) -> Self {
965        binary_expr(self, Operator::LogicalOr, expr.into())
966    }
967
968    /// Logical "and" operation.
969    pub fn logical_and<E: Into<Expr>>(self, expr: E) -> Self {
970        binary_expr(self, Operator::LogicalAnd, expr.into())
971    }
972
973    /// Filter a single column.
974    ///
975    /// Should be used in aggregation context. If you want to filter on a
976    /// DataFrame level, use `LazyFrame::filter`.
977    pub fn filter<E: Into<Expr>>(self, predicate: E) -> Self {
978        if has_expr(&self, |e| matches!(e, Expr::Wildcard)) {
979            panic!("filter '*' not allowed, use LazyFrame::filter")
980        };
981        Expr::Filter {
982            input: Arc::new(self),
983            by: Arc::new(predicate.into()),
984        }
985    }
986
987    /// Check if the values of the left expression are in the lists of the right expr.
988    #[allow(clippy::wrong_self_convention)]
989    #[cfg(feature = "is_in")]
990    pub fn is_in<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Self {
991        let other = other.into();
992        let function = BooleanFunction::IsIn { nulls_equal };
993        let options = function.function_options();
994        let function = function.into();
995        Expr::Function {
996            input: vec![self, other],
997            function,
998            options,
999        }
1000    }
1001
1002    /// Sort this column by the ordering of another column evaluated from given expr.
1003    /// Can also be used in a group_by context to sort the groups.
1004    ///
1005    /// # Example
1006    ///
1007    /// ```rust
1008    /// # use polars_core::prelude::*;
1009    /// # use polars_lazy::prelude::*;
1010    /// # fn main() -> PolarsResult<()> {
1011    /// let lf = df! {
1012    ///     "a" => [1, 2, 3, 4, 5],
1013    ///     "b" => [5, 4, 3, 2, 1]
1014    /// }?.lazy();
1015    ///
1016    /// let sorted = lf
1017    ///     .select(
1018    ///         vec![col("a").sort_by(col("b"), SortOptions::default())],
1019    ///     )
1020    ///     .collect()?;
1021    ///
1022    /// assert_eq!(
1023    ///     sorted,
1024    ///     df! { "a" => [5, 4, 3, 2, 1] }?
1025    /// );
1026    /// # Ok(())
1027    /// # }
1028    pub fn sort_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1029        self,
1030        by: E,
1031        sort_options: SortMultipleOptions,
1032    ) -> Expr {
1033        let by = by.as_ref().iter().map(|e| e.clone().into()).collect();
1034        Expr::SortBy {
1035            expr: Arc::new(self),
1036            by,
1037            sort_options,
1038        }
1039    }
1040
1041    #[cfg(feature = "repeat_by")]
1042    /// Repeat the column `n` times, where `n` is determined by the values in `by`.
1043    /// This yields an `Expr` of dtype `List`.
1044    pub fn repeat_by<E: Into<Expr>>(self, by: E) -> Expr {
1045        self.map_binary(FunctionExpr::RepeatBy, by.into())
1046    }
1047
1048    #[cfg(feature = "is_first_distinct")]
1049    #[allow(clippy::wrong_self_convention)]
1050    /// Get a mask of the first unique value.
1051    pub fn is_first_distinct(self) -> Expr {
1052        self.map_unary(BooleanFunction::IsFirstDistinct)
1053    }
1054
1055    #[cfg(feature = "is_last_distinct")]
1056    #[allow(clippy::wrong_self_convention)]
1057    /// Get a mask of the last unique value.
1058    pub fn is_last_distinct(self) -> Expr {
1059        self.map_unary(BooleanFunction::IsLastDistinct)
1060    }
1061
1062    fn dot_impl(self, other: Expr) -> Expr {
1063        (self * other).sum()
1064    }
1065
1066    /// Compute the dot/inner product between two expressions.
1067    pub fn dot<E: Into<Expr>>(self, other: E) -> Expr {
1068        self.dot_impl(other.into())
1069    }
1070
1071    #[cfg(feature = "mode")]
1072    /// Compute the mode(s) of this column. This is the most occurring value.
1073    pub fn mode(self) -> Expr {
1074        self.map_unary(FunctionExpr::Mode)
1075    }
1076
1077    /// Exclude a column from a wildcard/regex selection.
1078    ///
1079    /// You may also use regexes in the exclude as long as they start with `^` and end with `$`.
1080    pub fn exclude(self, columns: impl IntoVec<PlSmallStr>) -> Expr {
1081        let v = columns.into_vec().into_iter().map(Excluded::Name).collect();
1082        Expr::Exclude(Arc::new(self), v)
1083    }
1084
1085    pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Expr {
1086        let v = dtypes
1087            .as_ref()
1088            .iter()
1089            .map(|dt| Excluded::Dtype(dt.clone()))
1090            .collect();
1091        Expr::Exclude(Arc::new(self), v)
1092    }
1093
1094    #[cfg(feature = "interpolate")]
1095    /// Interpolate intermediate values.
1096    /// Nulls at the beginning and end of the series remain null.
1097    pub fn interpolate(self, method: InterpolationMethod) -> Expr {
1098        self.map_unary(FunctionExpr::Interpolate(method))
1099    }
1100
1101    #[cfg(feature = "rolling_window_by")]
1102    #[allow(clippy::type_complexity)]
1103    fn finish_rolling_by(
1104        self,
1105        by: Expr,
1106        options: RollingOptionsDynamicWindow,
1107        rolling_function_by: fn(RollingOptionsDynamicWindow) -> RollingFunctionBy,
1108    ) -> Expr {
1109        self.map_binary(
1110            FunctionExpr::RollingExprBy(rolling_function_by(options)),
1111            by,
1112        )
1113    }
1114
1115    #[cfg(feature = "interpolate_by")]
1116    /// Interpolate intermediate values.
1117    /// Nulls at the beginning and end of the series remain null.
1118    /// The `by` column provides the x-coordinates for interpolation and must not contain nulls.
1119    pub fn interpolate_by(self, by: Expr) -> Expr {
1120        self.map_binary(FunctionExpr::InterpolateBy, by)
1121    }
1122
1123    #[cfg(feature = "rolling_window")]
1124    #[allow(clippy::type_complexity)]
1125    fn finish_rolling(
1126        self,
1127        options: RollingOptionsFixedWindow,
1128        rolling_function: fn(RollingOptionsFixedWindow) -> RollingFunction,
1129    ) -> Expr {
1130        self.map_unary(FunctionExpr::RollingExpr(rolling_function(options)))
1131    }
1132
1133    /// Apply a rolling minimum based on another column.
1134    #[cfg(feature = "rolling_window_by")]
1135    pub fn rolling_min_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1136        self.finish_rolling_by(by, options, RollingFunctionBy::MinBy)
1137    }
1138
1139    /// Apply a rolling maximum based on another column.
1140    #[cfg(feature = "rolling_window_by")]
1141    pub fn rolling_max_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1142        self.finish_rolling_by(by, options, RollingFunctionBy::MaxBy)
1143    }
1144
1145    /// Apply a rolling mean based on another column.
1146    #[cfg(feature = "rolling_window_by")]
1147    pub fn rolling_mean_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1148        self.finish_rolling_by(by, options, RollingFunctionBy::MeanBy)
1149    }
1150
1151    /// Apply a rolling sum based on another column.
1152    #[cfg(feature = "rolling_window_by")]
1153    pub fn rolling_sum_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1154        self.finish_rolling_by(by, options, RollingFunctionBy::SumBy)
1155    }
1156
1157    /// Apply a rolling quantile based on another column.
1158    #[cfg(feature = "rolling_window_by")]
1159    pub fn rolling_quantile_by(
1160        self,
1161        by: Expr,
1162        method: QuantileMethod,
1163        quantile: f64,
1164        mut options: RollingOptionsDynamicWindow,
1165    ) -> Expr {
1166        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1167        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1168            prob: quantile,
1169            method,
1170        }));
1171
1172        self.finish_rolling_by(by, options, RollingFunctionBy::QuantileBy)
1173    }
1174
1175    /// Apply a rolling variance based on another column.
1176    #[cfg(feature = "rolling_window_by")]
1177    pub fn rolling_var_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1178        self.finish_rolling_by(by, options, RollingFunctionBy::VarBy)
1179    }
1180
1181    /// Apply a rolling std-dev based on another column.
1182    #[cfg(feature = "rolling_window_by")]
1183    pub fn rolling_std_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1184        self.finish_rolling_by(by, options, RollingFunctionBy::StdBy)
1185    }
1186
1187    /// Apply a rolling median based on another column.
1188    #[cfg(feature = "rolling_window_by")]
1189    pub fn rolling_median_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1190        self.rolling_quantile_by(by, QuantileMethod::Linear, 0.5, options)
1191    }
1192
1193    /// Apply a rolling minimum.
1194    ///
1195    /// See: [`RollingAgg::rolling_min`]
1196    #[cfg(feature = "rolling_window")]
1197    pub fn rolling_min(self, options: RollingOptionsFixedWindow) -> Expr {
1198        self.finish_rolling(options, RollingFunction::Min)
1199    }
1200
1201    /// Apply a rolling maximum.
1202    ///
1203    /// See: [`RollingAgg::rolling_max`]
1204    #[cfg(feature = "rolling_window")]
1205    pub fn rolling_max(self, options: RollingOptionsFixedWindow) -> Expr {
1206        self.finish_rolling(options, RollingFunction::Max)
1207    }
1208
1209    /// Apply a rolling mean.
1210    ///
1211    /// See: [`RollingAgg::rolling_mean`]
1212    #[cfg(feature = "rolling_window")]
1213    pub fn rolling_mean(self, options: RollingOptionsFixedWindow) -> Expr {
1214        self.finish_rolling(options, RollingFunction::Mean)
1215    }
1216
1217    /// Apply a rolling sum.
1218    ///
1219    /// See: [`RollingAgg::rolling_sum`]
1220    #[cfg(feature = "rolling_window")]
1221    pub fn rolling_sum(self, options: RollingOptionsFixedWindow) -> Expr {
1222        self.finish_rolling(options, RollingFunction::Sum)
1223    }
1224
1225    /// Apply a rolling median.
1226    ///
1227    /// See: [`RollingAgg::rolling_median`]
1228    #[cfg(feature = "rolling_window")]
1229    pub fn rolling_median(self, options: RollingOptionsFixedWindow) -> Expr {
1230        self.rolling_quantile(QuantileMethod::Linear, 0.5, options)
1231    }
1232
1233    /// Apply a rolling quantile.
1234    ///
1235    /// See: [`RollingAgg::rolling_quantile`]
1236    #[cfg(feature = "rolling_window")]
1237    pub fn rolling_quantile(
1238        self,
1239        method: QuantileMethod,
1240        quantile: f64,
1241        mut options: RollingOptionsFixedWindow,
1242    ) -> Expr {
1243        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1244
1245        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1246            prob: quantile,
1247            method,
1248        }));
1249
1250        self.finish_rolling(options, RollingFunction::Quantile)
1251    }
1252
1253    /// Apply a rolling variance.
1254    #[cfg(feature = "rolling_window")]
1255    pub fn rolling_var(self, options: RollingOptionsFixedWindow) -> Expr {
1256        self.finish_rolling(options, RollingFunction::Var)
1257    }
1258
1259    /// Apply a rolling std-dev.
1260    #[cfg(feature = "rolling_window")]
1261    pub fn rolling_std(self, options: RollingOptionsFixedWindow) -> Expr {
1262        self.finish_rolling(options, RollingFunction::Std)
1263    }
1264
1265    /// Apply a rolling skew.
1266    #[cfg(feature = "rolling_window")]
1267    #[cfg(feature = "moment")]
1268    pub fn rolling_skew(self, options: RollingOptionsFixedWindow) -> Expr {
1269        self.finish_rolling(options, RollingFunction::Skew)
1270    }
1271
1272    /// Apply a rolling skew.
1273    #[cfg(feature = "rolling_window")]
1274    #[cfg(feature = "moment")]
1275    pub fn rolling_kurtosis(self, options: RollingOptionsFixedWindow) -> Expr {
1276        self.finish_rolling(options, RollingFunction::Kurtosis)
1277    }
1278
1279    #[cfg(feature = "rolling_window")]
1280    /// Apply a custom function over a rolling/ moving window of the array.
1281    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1282    pub fn rolling_map(
1283        self,
1284        f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
1285        output_type: GetOutput,
1286        options: RollingOptionsFixedWindow,
1287    ) -> Expr {
1288        self.apply(
1289            move |c: Column| {
1290                c.as_materialized_series()
1291                    .rolling_map(f.as_ref(), options.clone())
1292                    .map(Column::from)
1293                    .map(Some)
1294            },
1295            output_type,
1296        )
1297        .with_fmt("rolling_map")
1298    }
1299
1300    #[cfg(feature = "rolling_window")]
1301    /// Apply a custom function over a rolling/ moving window of the array.
1302    /// Prefer this over rolling_apply in case of floating point numbers as this is faster.
1303    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1304    pub fn rolling_map_float<F>(self, window_size: usize, f: F) -> Expr
1305    where
1306        F: 'static + FnMut(&mut Float64Chunked) -> Option<f64> + Send + Sync + Copy,
1307    {
1308        self.apply(
1309            move |c: Column| {
1310                let out = match c.dtype() {
1311                    DataType::Float64 => c
1312                        .f64()
1313                        .unwrap()
1314                        .rolling_map_float(window_size, f)
1315                        .map(|ca| ca.into_column()),
1316                    _ => c
1317                        .cast(&DataType::Float64)?
1318                        .f64()
1319                        .unwrap()
1320                        .rolling_map_float(window_size, f)
1321                        .map(|ca| ca.into_column()),
1322                }?;
1323                if let DataType::Float32 = c.dtype() {
1324                    out.cast(&DataType::Float32).map(Some)
1325                } else {
1326                    Ok(Some(out))
1327                }
1328            },
1329            GetOutput::map_field(|field| {
1330                Ok(match field.dtype() {
1331                    DataType::Float64 => field.clone(),
1332                    DataType::Float32 => Field::new(field.name().clone(), DataType::Float32),
1333                    _ => Field::new(field.name().clone(), DataType::Float64),
1334                })
1335            }),
1336        )
1337        .with_fmt("rolling_map_float")
1338    }
1339
1340    #[cfg(feature = "peaks")]
1341    pub fn peak_min(self) -> Expr {
1342        self.map_unary(FunctionExpr::PeakMin)
1343    }
1344
1345    #[cfg(feature = "peaks")]
1346    pub fn peak_max(self) -> Expr {
1347        self.map_unary(FunctionExpr::PeakMax)
1348    }
1349
1350    #[cfg(feature = "rank")]
1351    /// Assign ranks to data, dealing with ties appropriately.
1352    pub fn rank(self, options: RankOptions, seed: Option<u64>) -> Expr {
1353        self.map_unary(FunctionExpr::Rank { options, seed })
1354    }
1355
1356    #[cfg(feature = "replace")]
1357    /// Replace the given values with other values.
1358    pub fn replace<E: Into<Expr>>(self, old: E, new: E) -> Expr {
1359        let old = old.into();
1360        let new = new.into();
1361        self.map_n_ary(FunctionExpr::Replace, [old, new])
1362    }
1363
1364    #[cfg(feature = "replace")]
1365    /// Replace the given values with other values.
1366    pub fn replace_strict<E: Into<Expr>>(
1367        self,
1368        old: E,
1369        new: E,
1370        default: Option<E>,
1371        return_dtype: Option<DataType>,
1372    ) -> Expr {
1373        let old = old.into();
1374        let new = new.into();
1375        let mut args = vec![old, new];
1376        args.extend(default.map(Into::into));
1377        self.map_n_ary(FunctionExpr::ReplaceStrict { return_dtype }, args)
1378    }
1379
1380    #[cfg(feature = "cutqcut")]
1381    /// Bin continuous values into discrete categories.
1382    pub fn cut(
1383        self,
1384        breaks: Vec<f64>,
1385        labels: Option<impl IntoVec<PlSmallStr>>,
1386        left_closed: bool,
1387        include_breaks: bool,
1388    ) -> Expr {
1389        self.map_unary(FunctionExpr::Cut {
1390            breaks,
1391            labels: labels.map(|x| x.into_vec()),
1392            left_closed,
1393            include_breaks,
1394        })
1395    }
1396
1397    #[cfg(feature = "cutqcut")]
1398    /// Bin continuous values into discrete categories based on their quantiles.
1399    pub fn qcut(
1400        self,
1401        probs: Vec<f64>,
1402        labels: Option<impl IntoVec<PlSmallStr>>,
1403        left_closed: bool,
1404        allow_duplicates: bool,
1405        include_breaks: bool,
1406    ) -> Expr {
1407        self.map_unary(FunctionExpr::QCut {
1408            probs,
1409            labels: labels.map(|x| x.into_vec()),
1410            left_closed,
1411            allow_duplicates,
1412            include_breaks,
1413        })
1414    }
1415
1416    #[cfg(feature = "cutqcut")]
1417    /// Bin continuous values into discrete categories using uniform quantile probabilities.
1418    pub fn qcut_uniform(
1419        self,
1420        n_bins: usize,
1421        labels: Option<impl IntoVec<PlSmallStr>>,
1422        left_closed: bool,
1423        allow_duplicates: bool,
1424        include_breaks: bool,
1425    ) -> Expr {
1426        let probs = (1..n_bins).map(|b| b as f64 / n_bins as f64).collect();
1427        self.map_unary(FunctionExpr::QCut {
1428            probs,
1429            labels: labels.map(|x| x.into_vec()),
1430            left_closed,
1431            allow_duplicates,
1432            include_breaks,
1433        })
1434    }
1435
1436    #[cfg(feature = "rle")]
1437    /// Get the lengths of runs of identical values.
1438    pub fn rle(self) -> Expr {
1439        self.map_unary(FunctionExpr::RLE)
1440    }
1441
1442    #[cfg(feature = "rle")]
1443    /// Similar to `rle`, but maps values to run IDs.
1444    pub fn rle_id(self) -> Expr {
1445        self.map_unary(FunctionExpr::RLEID)
1446    }
1447
1448    #[cfg(feature = "diff")]
1449    /// Calculate the n-th discrete difference between values.
1450    pub fn diff(self, n: Expr, null_behavior: NullBehavior) -> Expr {
1451        self.map_binary(FunctionExpr::Diff(null_behavior), n)
1452    }
1453
1454    #[cfg(feature = "pct_change")]
1455    /// Computes percentage change between values.
1456    pub fn pct_change(self, n: Expr) -> Expr {
1457        self.map_binary(FunctionExpr::PctChange, n)
1458    }
1459
1460    #[cfg(feature = "moment")]
1461    /// Compute the sample skewness of a data set.
1462    ///
1463    /// For normally distributed data, the skewness should be about zero. For
1464    /// uni-modal continuous distributions, a skewness value greater than zero means
1465    /// that there is more weight in the right tail of the distribution. The
1466    /// function `skewtest` can be used to determine if the skewness value
1467    /// is close enough to zero, statistically speaking.
1468    ///
1469    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
1470    pub fn skew(self, bias: bool) -> Expr {
1471        self.map_unary(FunctionExpr::Skew(bias))
1472    }
1473
1474    #[cfg(feature = "moment")]
1475    /// Compute the kurtosis (Fisher or Pearson).
1476    ///
1477    /// Kurtosis is the fourth central moment divided by the square of the
1478    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
1479    /// the result to give 0.0 for a normal distribution.
1480    /// If bias is False then the kurtosis is calculated using k statistics to
1481    /// eliminate bias coming from biased moment estimators.
1482    pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr {
1483        self.map_unary(FunctionExpr::Kurtosis(fisher, bias))
1484    }
1485
1486    /// Get maximal value that could be hold by this dtype.
1487    pub fn upper_bound(self) -> Expr {
1488        self.map_unary(FunctionExpr::UpperBound)
1489    }
1490
1491    /// Get minimal value that could be hold by this dtype.
1492    pub fn lower_bound(self) -> Expr {
1493        self.map_unary(FunctionExpr::LowerBound)
1494    }
1495
1496    #[cfg(feature = "dtype-array")]
1497    pub fn reshape(self, dimensions: &[i64]) -> Self {
1498        let dimensions = dimensions
1499            .iter()
1500            .map(|&v| ReshapeDimension::new(v))
1501            .collect();
1502        self.map_unary(FunctionExpr::Reshape(dimensions))
1503    }
1504
1505    #[cfg(feature = "ewma")]
1506    /// Calculate the exponentially-weighted moving average.
1507    pub fn ewm_mean(self, options: EWMOptions) -> Self {
1508        self.map_unary(FunctionExpr::EwmMean { options })
1509    }
1510
1511    #[cfg(feature = "ewma_by")]
1512    /// Calculate the exponentially-weighted moving average by a time column.
1513    pub fn ewm_mean_by(self, times: Expr, half_life: Duration) -> Self {
1514        self.map_binary(FunctionExpr::EwmMeanBy { half_life }, times)
1515    }
1516
1517    #[cfg(feature = "ewma")]
1518    /// Calculate the exponentially-weighted moving standard deviation.
1519    pub fn ewm_std(self, options: EWMOptions) -> Self {
1520        self.map_unary(FunctionExpr::EwmStd { options })
1521    }
1522
1523    #[cfg(feature = "ewma")]
1524    /// Calculate the exponentially-weighted moving variance.
1525    pub fn ewm_var(self, options: EWMOptions) -> Self {
1526        self.map_unary(FunctionExpr::EwmVar { options })
1527    }
1528
1529    /// Returns whether any of the values in the column are `true`.
1530    ///
1531    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1532    /// if the column contains any null values and no `true` values, the output
1533    /// is null.
1534    ///
1535    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1536    pub fn any(self, ignore_nulls: bool) -> Self {
1537        self.map_unary(BooleanFunction::Any { ignore_nulls })
1538    }
1539
1540    /// Returns whether all values in the column are `true`.
1541    ///
1542    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1543    /// if the column contains any null values and no `false` values, the output
1544    /// is null.
1545    ///
1546    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1547    pub fn all(self, ignore_nulls: bool) -> Self {
1548        self.map_unary(BooleanFunction::All { ignore_nulls })
1549    }
1550
1551    /// Shrink numeric columns to the minimal required datatype
1552    /// needed to fit the extrema of this [`Series`].
1553    /// This can be used to reduce memory pressure.
1554    pub fn shrink_dtype(self) -> Self {
1555        self.map_unary(FunctionExpr::ShrinkType)
1556    }
1557
1558    #[cfg(feature = "dtype-struct")]
1559    /// Count all unique values and create a struct mapping value to count.
1560    /// (Note that it is better to turn parallel off in the aggregation context).
1561    /// The name of the struct field with the counts is given by the parameter `name`.
1562    pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
1563        self.map_unary(FunctionExpr::ValueCounts {
1564            sort,
1565            parallel,
1566            name: name.into(),
1567            normalize,
1568        })
1569    }
1570
1571    #[cfg(feature = "unique_counts")]
1572    /// Returns a count of the unique values in the order of appearance.
1573    /// This method differs from [`Expr::value_counts`] in that it does not return the
1574    /// values, only the counts and might be faster.
1575    pub fn unique_counts(self) -> Self {
1576        self.map_unary(FunctionExpr::UniqueCounts)
1577    }
1578
1579    #[cfg(feature = "log")]
1580    /// Compute the logarithm to a given base.
1581    pub fn log(self, base: f64) -> Self {
1582        self.map_unary(FunctionExpr::Log { base })
1583    }
1584
1585    #[cfg(feature = "log")]
1586    /// Compute the natural logarithm of all elements plus one in the input array.
1587    pub fn log1p(self) -> Self {
1588        self.map_unary(FunctionExpr::Log1p)
1589    }
1590
1591    #[cfg(feature = "log")]
1592    /// Calculate the exponential of all elements in the input array.
1593    pub fn exp(self) -> Self {
1594        self.map_unary(FunctionExpr::Exp)
1595    }
1596
1597    #[cfg(feature = "log")]
1598    /// Compute the entropy as `-sum(pk * log(pk)`.
1599    /// where `pk` are discrete probabilities.
1600    pub fn entropy(self, base: f64, normalize: bool) -> Self {
1601        self.map_unary(FunctionExpr::Entropy { base, normalize })
1602    }
1603    /// Get the null count of the column/group.
1604    pub fn null_count(self) -> Expr {
1605        self.map_unary(FunctionExpr::NullCount)
1606    }
1607
1608    /// Set this `Series` as `sorted` so that downstream code can use
1609    /// fast paths for sorted arrays.
1610    /// # Warning
1611    /// This can lead to incorrect results if this `Series` is not sorted!!
1612    /// Use with care!
1613    pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
1614        // This is `map`. If a column is sorted. Chunks of that column are also sorted.
1615        self.map_unary(FunctionExpr::SetSortedFlag(sorted))
1616    }
1617
1618    #[cfg(feature = "row_hash")]
1619    /// Compute the hash of every element.
1620    pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr {
1621        self.map_unary(FunctionExpr::Hash(k0, k1, k2, k3))
1622    }
1623
1624    pub fn to_physical(self) -> Expr {
1625        self.map_unary(FunctionExpr::ToPhysical)
1626    }
1627
1628    pub fn gather_every(self, n: usize, offset: usize) -> Expr {
1629        self.map_unary(FunctionExpr::GatherEvery { n, offset })
1630    }
1631
1632    #[cfg(feature = "reinterpret")]
1633    pub fn reinterpret(self, signed: bool) -> Expr {
1634        self.map_unary(FunctionExpr::Reinterpret(signed))
1635    }
1636
1637    pub fn extend_constant(self, value: Expr, n: Expr) -> Expr {
1638        self.map_ternary(FunctionExpr::ExtendConstant, value, n)
1639    }
1640
1641    #[cfg(feature = "strings")]
1642    /// Get the [`string::StringNameSpace`]
1643    pub fn str(self) -> string::StringNameSpace {
1644        string::StringNameSpace(self)
1645    }
1646
1647    /// Get the [`binary::BinaryNameSpace`]
1648    pub fn binary(self) -> binary::BinaryNameSpace {
1649        binary::BinaryNameSpace(self)
1650    }
1651
1652    #[cfg(feature = "temporal")]
1653    /// Get the [`dt::DateLikeNameSpace`]
1654    pub fn dt(self) -> dt::DateLikeNameSpace {
1655        dt::DateLikeNameSpace(self)
1656    }
1657
1658    /// Get the [`list::ListNameSpace`]
1659    pub fn list(self) -> list::ListNameSpace {
1660        list::ListNameSpace(self)
1661    }
1662
1663    /// Get the [`name::ExprNameNameSpace`]
1664    pub fn name(self) -> name::ExprNameNameSpace {
1665        name::ExprNameNameSpace(self)
1666    }
1667
1668    /// Get the [`array::ArrayNameSpace`].
1669    #[cfg(feature = "dtype-array")]
1670    pub fn arr(self) -> array::ArrayNameSpace {
1671        array::ArrayNameSpace(self)
1672    }
1673
1674    /// Get the [`CategoricalNameSpace`].
1675    #[cfg(feature = "dtype-categorical")]
1676    pub fn cat(self) -> cat::CategoricalNameSpace {
1677        cat::CategoricalNameSpace(self)
1678    }
1679
1680    /// Get the [`struct_::StructNameSpace`].
1681    #[cfg(feature = "dtype-struct")]
1682    pub fn struct_(self) -> struct_::StructNameSpace {
1683        struct_::StructNameSpace(self)
1684    }
1685
1686    /// Get the [`meta::MetaNameSpace`]
1687    #[cfg(feature = "meta")]
1688    pub fn meta(self) -> meta::MetaNameSpace {
1689        meta::MetaNameSpace(self)
1690    }
1691}
1692
1693/// Apply a function/closure over multiple columns once the logical plan get executed.
1694///
1695/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
1696///
1697///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1698///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1699///
1700/// It is the responsibility of the caller that the schema is correct by giving
1701/// the correct output_type. If None given the output type of the input expr is used.
1702pub fn map_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
1703where
1704    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1705    E: AsRef<[Expr]>,
1706{
1707    let input = expr.as_ref().to_vec();
1708
1709    Expr::AnonymousFunction {
1710        input,
1711        function: new_column_udf(function),
1712        output_type,
1713        options: FunctionOptions::elementwise().with_fmt_str(""),
1714    }
1715}
1716
1717/// Apply a function/closure over the groups of multiple columns. This should only be used in a group_by aggregation.
1718///
1719/// It is the responsibility of the caller that the schema is correct by giving
1720/// the correct output_type. If None given the output type of the input expr is used.
1721///
1722/// This difference with [`map_multiple`] is that [`apply_multiple`] will create a separate [`Series`] per group.
1723///
1724/// * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1725/// * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1726pub fn apply_multiple<F, E>(
1727    function: F,
1728    expr: E,
1729    output_type: GetOutput,
1730    returns_scalar: bool,
1731) -> Expr
1732where
1733    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1734    E: AsRef<[Expr]>,
1735{
1736    let input = expr.as_ref().to_vec();
1737    Expr::AnonymousFunction {
1738        input,
1739        function: new_column_udf(function),
1740        output_type,
1741        options: FunctionOptions::groupwise()
1742            .with_fmt_str("")
1743            .with_flags(|mut f| {
1744                f.set(FunctionFlags::RETURNS_SCALAR, returns_scalar);
1745                f
1746            }),
1747    }
1748}
1749
1750/// Return the number of rows in the context.
1751pub fn len() -> Expr {
1752    Expr::Len
1753}
1754
1755/// First column in a DataFrame.
1756pub fn first() -> Expr {
1757    Expr::Nth(0)
1758}
1759
1760/// Last column in a DataFrame.
1761pub fn last() -> Expr {
1762    Expr::Nth(-1)
1763}
1764
1765/// Nth column in a DataFrame.
1766pub fn nth(n: i64) -> Expr {
1767    Expr::Nth(n)
1768}
polars_plan/dsl/mod.rs

polars_plan/dsl/
mod.rs