polars_plan/dsl/
mod.rs

1#![allow(ambiguous_glob_reexports)]
2//! Domain specific language for the Lazy API.
3#[cfg(feature = "dtype-categorical")]
4pub mod cat;
5
6#[cfg(feature = "dtype-categorical")]
7pub use cat::*;
8#[cfg(feature = "rolling_window_by")]
9pub(crate) use polars_time::prelude::*;
10
11mod arithmetic;
12mod arity;
13#[cfg(feature = "dtype-array")]
14mod array;
15pub mod binary;
16#[cfg(feature = "bitwise")]
17mod bitwise;
18mod builder_dsl;
19pub use builder_dsl::*;
20#[cfg(feature = "temporal")]
21pub mod dt;
22mod expr;
23mod format;
24mod from;
25pub mod function_expr;
26pub mod functions;
27mod list;
28mod match_to_schema;
29#[cfg(feature = "meta")]
30mod meta;
31mod name;
32mod options;
33#[cfg(feature = "python")]
34pub mod python_dsl;
35#[cfg(feature = "random")]
36mod random;
37mod scan_sources;
38mod selector;
39mod statistics;
40#[cfg(feature = "strings")]
41pub mod string;
42#[cfg(feature = "dtype-struct")]
43mod struct_;
44pub mod udf;
45
46use std::fmt::Debug;
47use std::sync::Arc;
48
49mod plan;
50pub use arity::*;
51#[cfg(feature = "dtype-array")]
52pub use array::*;
53pub use expr::*;
54pub use function_expr::schema::FieldsMapper;
55pub use function_expr::*;
56pub use functions::*;
57pub use list::*;
58pub use match_to_schema::*;
59#[cfg(feature = "meta")]
60pub use meta::*;
61pub use name::*;
62pub use options::*;
63pub use plan::*;
64use polars_compute::rolling::QuantileMethod;
65use polars_core::chunked_array::cast::CastOptions;
66use polars_core::error::feature_gated;
67use polars_core::prelude::*;
68use polars_core::series::IsSorted;
69#[cfg(feature = "diff")]
70use polars_core::series::ops::NullBehavior;
71#[cfg(any(feature = "search_sorted", feature = "is_between"))]
72use polars_core::utils::SuperTypeFlags;
73use polars_core::utils::{SuperTypeOptions, try_get_supertype};
74pub use selector::Selector;
75#[cfg(feature = "dtype-struct")]
76pub use struct_::*;
77pub use udf::UserDefinedFunction;
78mod file_scan;
79pub use file_scan::*;
80pub use scan_sources::{ScanSource, ScanSourceIter, ScanSourceRef, ScanSources};
81
82pub use crate::plans::lit;
83use crate::prelude::*;
84
85impl Expr {
86    /// Modify the Options passed to the `Function` node.
87    pub(crate) fn with_function_options<F>(self, func: F) -> Expr
88    where
89        F: Fn(FunctionOptions) -> FunctionOptions,
90    {
91        match self {
92            Self::AnonymousFunction {
93                input,
94                function,
95                output_type,
96                mut options,
97            } => {
98                options = func(options);
99                Self::AnonymousFunction {
100                    input,
101                    function,
102                    output_type,
103                    options,
104                }
105            },
106            Self::Function {
107                input,
108                function,
109                mut options,
110            } => {
111                options = func(options);
112                Self::Function {
113                    input,
114                    function,
115                    options,
116                }
117            },
118            _ => {
119                panic!("implementation error")
120            },
121        }
122    }
123
124    /// Overwrite the function name used for formatting.
125    /// (this is not intended to be used).
126    #[doc(hidden)]
127    pub fn with_fmt(self, name: &'static str) -> Expr {
128        self.with_function_options(|mut options| {
129            options.fmt_str = name;
130            options
131        })
132    }
133
134    /// Compare `Expr` with other `Expr` on equality.
135    pub fn eq<E: Into<Expr>>(self, other: E) -> Expr {
136        binary_expr(self, Operator::Eq, other.into())
137    }
138
139    /// Compare `Expr` with other `Expr` on equality where `None == None`.
140    pub fn eq_missing<E: Into<Expr>>(self, other: E) -> Expr {
141        binary_expr(self, Operator::EqValidity, other.into())
142    }
143
144    /// Compare `Expr` with other `Expr` on non-equality.
145    pub fn neq<E: Into<Expr>>(self, other: E) -> Expr {
146        binary_expr(self, Operator::NotEq, other.into())
147    }
148
149    /// Compare `Expr` with other `Expr` on non-equality where `None == None`.
150    pub fn neq_missing<E: Into<Expr>>(self, other: E) -> Expr {
151        binary_expr(self, Operator::NotEqValidity, other.into())
152    }
153
154    /// Check if `Expr` < `Expr`.
155    pub fn lt<E: Into<Expr>>(self, other: E) -> Expr {
156        binary_expr(self, Operator::Lt, other.into())
157    }
158
159    /// Check if `Expr` > `Expr`.
160    pub fn gt<E: Into<Expr>>(self, other: E) -> Expr {
161        binary_expr(self, Operator::Gt, other.into())
162    }
163
164    /// Check if `Expr` >= `Expr`.
165    pub fn gt_eq<E: Into<Expr>>(self, other: E) -> Expr {
166        binary_expr(self, Operator::GtEq, other.into())
167    }
168
169    /// Check if `Expr` <= `Expr`.
170    pub fn lt_eq<E: Into<Expr>>(self, other: E) -> Expr {
171        binary_expr(self, Operator::LtEq, other.into())
172    }
173
174    /// Negate `Expr`.
175    #[allow(clippy::should_implement_trait)]
176    pub fn not(self) -> Expr {
177        self.map_unary(BooleanFunction::Not)
178    }
179
180    /// Rename Column.
181    pub fn alias<S>(self, name: S) -> Expr
182    where
183        S: Into<PlSmallStr>,
184    {
185        Expr::Alias(Arc::new(self), name.into())
186    }
187
188    /// Run is_null operation on `Expr`.
189    #[allow(clippy::wrong_self_convention)]
190    pub fn is_null(self) -> Self {
191        self.map_unary(BooleanFunction::IsNull)
192    }
193
194    /// Run is_not_null operation on `Expr`.
195    #[allow(clippy::wrong_self_convention)]
196    pub fn is_not_null(self) -> Self {
197        self.map_unary(BooleanFunction::IsNotNull)
198    }
199
200    /// Drop null values.
201    pub fn drop_nulls(self) -> Self {
202        self.map_unary(FunctionExpr::DropNulls)
203    }
204
205    /// Drop NaN values.
206    pub fn drop_nans(self) -> Self {
207        self.map_unary(FunctionExpr::DropNans)
208    }
209
210    /// Get the number of unique values in the groups.
211    pub fn n_unique(self) -> Self {
212        AggExpr::NUnique(Arc::new(self)).into()
213    }
214
215    /// Get the first value in the group.
216    pub fn first(self) -> Self {
217        AggExpr::First(Arc::new(self)).into()
218    }
219
220    /// Get the last value in the group.
221    pub fn last(self) -> Self {
222        AggExpr::Last(Arc::new(self)).into()
223    }
224
225    /// GroupBy the group to a Series.
226    pub fn implode(self) -> Self {
227        AggExpr::Implode(Arc::new(self)).into()
228    }
229
230    /// Compute the quantile per group.
231    pub fn quantile(self, quantile: Expr, method: QuantileMethod) -> Self {
232        AggExpr::Quantile {
233            expr: Arc::new(self),
234            quantile: Arc::new(quantile),
235            method,
236        }
237        .into()
238    }
239
240    /// Get the group indexes of the group by operation.
241    pub fn agg_groups(self) -> Self {
242        AggExpr::AggGroups(Arc::new(self)).into()
243    }
244
245    /// Alias for `explode`.
246    pub fn flatten(self) -> Self {
247        self.explode()
248    }
249
250    /// Explode the String/List column.
251    pub fn explode(self) -> Self {
252        Expr::Explode {
253            input: Arc::new(self),
254            skip_empty: false,
255        }
256    }
257
258    /// Slice the Series.
259    /// `offset` may be negative.
260    pub fn slice<E: Into<Expr>, F: Into<Expr>>(self, offset: E, length: F) -> Self {
261        Expr::Slice {
262            input: Arc::new(self),
263            offset: Arc::new(offset.into()),
264            length: Arc::new(length.into()),
265        }
266    }
267
268    /// Append expressions. This is done by adding the chunks of `other` to this [`Series`].
269    pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
270        let output_type = if upcast {
271            GetOutput::super_type()
272        } else {
273            GetOutput::same_type()
274        };
275
276        apply_binary(
277            self,
278            other.into(),
279            move |mut a, mut b| {
280                if upcast {
281                    let dtype = try_get_supertype(a.dtype(), b.dtype())?;
282                    a = a.cast(&dtype)?;
283                    b = b.cast(&dtype)?;
284                }
285                a.append(&b)?;
286                Ok(Some(a))
287            },
288            output_type,
289        )
290    }
291
292    /// Get the first `n` elements of the Expr result.
293    pub fn head(self, length: Option<usize>) -> Self {
294        self.slice(lit(0), lit(length.unwrap_or(10) as u64))
295    }
296
297    /// Get the last `n` elements of the Expr result.
298    pub fn tail(self, length: Option<usize>) -> Self {
299        let len = length.unwrap_or(10);
300        self.slice(lit(-(len as i64)), lit(len as u64))
301    }
302
303    /// Get unique values of this expression.
304    pub fn unique(self) -> Self {
305        self.map_unary(FunctionExpr::Unique(false))
306    }
307
308    /// Get unique values of this expression, while maintaining order.
309    /// This requires more work than [`Expr::unique`].
310    pub fn unique_stable(self) -> Self {
311        self.map_unary(FunctionExpr::Unique(true))
312    }
313
314    /// Get the first index of unique values of this expression.
315    pub fn arg_unique(self) -> Self {
316        self.map_unary(FunctionExpr::ArgUnique)
317    }
318
319    /// Get the index value that has the minimum value.
320    pub fn arg_min(self) -> Self {
321        let options = FunctionOptions::aggregation().with_fmt_str("arg_min");
322        self.function_with_options(
323            move |c: Column| {
324                Ok(Some(Column::new(
325                    c.name().clone(),
326                    &[c.as_materialized_series().arg_min().map(|idx| idx as u32)],
327                )))
328            },
329            GetOutput::from_type(IDX_DTYPE),
330            options,
331        )
332    }
333
334    /// Get the index value that has the maximum value.
335    pub fn arg_max(self) -> Self {
336        let options = FunctionOptions::aggregation().with_fmt_str("arg_max");
337        self.function_with_options(
338            move |c: Column| {
339                Ok(Some(Column::new(
340                    c.name().clone(),
341                    &[c.as_materialized_series()
342                        .arg_max()
343                        .map(|idx| idx as IdxSize)],
344                )))
345            },
346            GetOutput::from_type(IDX_DTYPE),
347            options,
348        )
349    }
350
351    /// Get the index values that would sort this expression.
352    pub fn arg_sort(self, sort_options: SortOptions) -> Self {
353        let options = FunctionOptions::groupwise().with_fmt_str("arg_sort");
354        self.function_with_options(
355            move |c: Column| {
356                Ok(Some(
357                    c.as_materialized_series()
358                        .arg_sort(sort_options)
359                        .into_column(),
360                ))
361            },
362            GetOutput::from_type(IDX_DTYPE),
363            options,
364        )
365    }
366
367    #[cfg(feature = "index_of")]
368    /// Find the index of a value.
369    pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
370        self.map_binary(FunctionExpr::IndexOf, element.into())
371    }
372
373    #[cfg(feature = "search_sorted")]
374    /// Find indices where elements should be inserted to maintain order.
375    pub fn search_sorted<E: Into<Expr>>(self, element: E, side: SearchSortedSide) -> Expr {
376        self.map_binary(FunctionExpr::SearchSorted(side), element.into())
377    }
378
379    /// Cast expression to another data type.
380    /// Throws an error if conversion had overflows.
381    /// Returns an Error if cast is invalid on rows after predicates are pushed down.
382    pub fn strict_cast(self, dtype: DataType) -> Self {
383        Expr::Cast {
384            expr: Arc::new(self),
385            dtype,
386            options: CastOptions::Strict,
387        }
388    }
389
390    /// Cast expression to another data type.
391    pub fn cast(self, dtype: DataType) -> Self {
392        Expr::Cast {
393            expr: Arc::new(self),
394            dtype,
395            options: CastOptions::NonStrict,
396        }
397    }
398
399    /// Cast expression to another data type.
400    pub fn cast_with_options(self, dtype: DataType, cast_options: CastOptions) -> Self {
401        Expr::Cast {
402            expr: Arc::new(self),
403            dtype,
404            options: cast_options,
405        }
406    }
407
408    /// Take the values by idx.
409    pub fn gather<E: Into<Expr>>(self, idx: E) -> Self {
410        Expr::Gather {
411            expr: Arc::new(self),
412            idx: Arc::new(idx.into()),
413            returns_scalar: false,
414        }
415    }
416
417    /// Take the values by a single index.
418    pub fn get<E: Into<Expr>>(self, idx: E) -> Self {
419        Expr::Gather {
420            expr: Arc::new(self),
421            idx: Arc::new(idx.into()),
422            returns_scalar: true,
423        }
424    }
425
426    /// Sort with given options.
427    ///
428    /// # Example
429    ///
430    /// ```rust
431    /// # use polars_core::prelude::*;
432    /// # use polars_lazy::prelude::*;
433    /// # fn main() -> PolarsResult<()> {
434    /// let lf = df! {
435    ///    "a" => [Some(5), Some(4), Some(3), Some(2), None]
436    /// }?
437    /// .lazy();
438    ///
439    /// let sorted = lf
440    ///     .select(
441    ///         vec![col("a").sort(SortOptions::default())],
442    ///     )
443    ///     .collect()?;
444    ///
445    /// assert_eq!(
446    ///     sorted,
447    ///     df! {
448    ///         "a" => [None, Some(2), Some(3), Some(4), Some(5)]
449    ///     }?
450    /// );
451    /// # Ok(())
452    /// # }
453    /// ```
454    /// See [`SortOptions`] for more options.
455    pub fn sort(self, options: SortOptions) -> Self {
456        Expr::Sort {
457            expr: Arc::new(self),
458            options,
459        }
460    }
461
462    /// Returns the `k` largest elements.
463    ///
464    /// This has time complexity `O(n + k log(n))`.
465    #[cfg(feature = "top_k")]
466    pub fn top_k(self, k: Expr) -> Self {
467        self.map_binary(FunctionExpr::TopK { descending: false }, k)
468    }
469
470    /// Returns the `k` largest rows by given column.
471    ///
472    /// For single column, use [`Expr::top_k`].
473    #[cfg(feature = "top_k")]
474    pub fn top_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
475        self,
476        k: K,
477        by: E,
478        descending: Vec<bool>,
479    ) -> Self {
480        self.map_n_ary(
481            FunctionExpr::TopKBy { descending },
482            [k.into()]
483                .into_iter()
484                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
485        )
486    }
487
488    /// Returns the `k` smallest elements.
489    ///
490    /// This has time complexity `O(n + k log(n))`.
491    #[cfg(feature = "top_k")]
492    pub fn bottom_k(self, k: Expr) -> Self {
493        self.map_binary(FunctionExpr::TopK { descending: true }, k)
494    }
495
496    /// Returns the `k` smallest rows by given column.
497    ///
498    /// For single column, use [`Expr::bottom_k`].
499    // #[cfg(feature = "top_k")]
500    #[cfg(feature = "top_k")]
501    pub fn bottom_k_by<K: Into<Expr>, E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
502        self,
503        k: K,
504        by: E,
505        descending: Vec<bool>,
506    ) -> Self {
507        let descending = descending.into_iter().map(|x| !x).collect();
508        self.map_n_ary(
509            FunctionExpr::TopKBy { descending },
510            [k.into()]
511                .into_iter()
512                .chain(by.as_ref().iter().map(|e| -> Expr { e.clone().into() })),
513        )
514    }
515
516    /// Reverse column
517    pub fn reverse(self) -> Self {
518        self.map_unary(FunctionExpr::Reverse)
519    }
520
521    /// Apply a function/closure once the logical plan get executed.
522    ///
523    /// This function is very similar to [`Expr::apply`], but differs in how it handles aggregations.
524    ///
525    ///  * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
526    ///  * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
527    ///
528    /// It is the responsibility of the caller that the schema is correct by giving
529    /// the correct output_type. If None given the output type of the input expr is used.
530    pub fn map<F>(self, function: F, output_type: GetOutput) -> Self
531    where
532        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
533    {
534        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
535
536        Expr::AnonymousFunction {
537            input: vec![self],
538            function: new_column_udf(f),
539            output_type,
540            options: FunctionOptions::elementwise()
541                .with_fmt_str("map")
542                .with_flags(|f| f | FunctionFlags::OPTIONAL_RE_ENTRANT),
543        }
544    }
545
546    /// Apply a function/closure once the logical plan get executed with many arguments.
547    ///
548    /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
549    pub fn map_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
550    where
551        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
552    {
553        let mut input = vec![self];
554        input.extend_from_slice(arguments);
555
556        Expr::AnonymousFunction {
557            input,
558            function: new_column_udf(function),
559            output_type,
560            options: FunctionOptions::elementwise().with_fmt_str(""),
561        }
562    }
563
564    /// A function that cannot be expressed with `map` or `apply` and requires extra settings.
565    pub fn function_with_options<F>(
566        self,
567        function: F,
568        output_type: GetOutput,
569        options: FunctionOptions,
570    ) -> Self
571    where
572        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
573    {
574        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
575
576        Expr::AnonymousFunction {
577            input: vec![self],
578            function: new_column_udf(f),
579            output_type,
580            options,
581        }
582    }
583
584    /// Apply a function/closure over the groups. This should only be used in a group_by aggregation.
585    ///
586    /// It is the responsibility of the caller that the schema is correct by giving
587    /// the correct output_type. If None given the output type of the input expr is used.
588    ///
589    /// This difference with [map](Self::map) is that `apply` will create a separate `Series` per group.
590    ///
591    /// * `map` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
592    /// * `apply` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
593    pub fn apply<F>(self, function: F, output_type: GetOutput) -> Self
594    where
595        F: Fn(Column) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
596    {
597        let f = move |c: &mut [Column]| function(std::mem::take(&mut c[0]));
598
599        Expr::AnonymousFunction {
600            input: vec![self],
601            function: new_column_udf(f),
602            output_type,
603            options: FunctionOptions::groupwise().with_fmt_str(""),
604        }
605    }
606
607    /// Apply a function/closure over the groups with many arguments. This should only be used in a group_by aggregation.
608    ///
609    /// See the [`Expr::apply`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply).
610    pub fn apply_many<F>(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self
611    where
612        F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
613    {
614        let mut input = vec![self];
615        input.extend_from_slice(arguments);
616
617        Expr::AnonymousFunction {
618            input,
619            function: new_column_udf(function),
620            output_type,
621            options: FunctionOptions::groupwise().with_fmt_str(""),
622        }
623    }
624
625    /// Get mask of finite values if dtype is Float.
626    #[allow(clippy::wrong_self_convention)]
627    pub fn is_finite(self) -> Self {
628        self.map_unary(BooleanFunction::IsFinite)
629    }
630
631    /// Get mask of infinite values if dtype is Float.
632    #[allow(clippy::wrong_self_convention)]
633    pub fn is_infinite(self) -> Self {
634        self.map_unary(BooleanFunction::IsInfinite)
635    }
636
637    /// Get mask of NaN values if dtype is Float.
638    pub fn is_nan(self) -> Self {
639        self.map_unary(BooleanFunction::IsNan)
640    }
641
642    /// Get inverse mask of NaN values if dtype is Float.
643    pub fn is_not_nan(self) -> Self {
644        self.map_unary(BooleanFunction::IsNotNan)
645    }
646
647    /// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::shift).
648    pub fn shift(self, n: Expr) -> Self {
649        self.map_binary(FunctionExpr::Shift, n)
650    }
651
652    /// Shift the values in the array by some period and fill the resulting empty values.
653    pub fn shift_and_fill<E: Into<Expr>, IE: Into<Expr>>(self, n: E, fill_value: IE) -> Self {
654        self.map_ternary(FunctionExpr::ShiftAndFill, n.into(), fill_value.into())
655    }
656
657    /// Cumulatively count values from 0 to len.
658    #[cfg(feature = "cum_agg")]
659    pub fn cum_count(self, reverse: bool) -> Self {
660        self.map_unary(FunctionExpr::CumCount { reverse })
661    }
662
663    /// Get an array with the cumulative sum computed at every element.
664    #[cfg(feature = "cum_agg")]
665    pub fn cum_sum(self, reverse: bool) -> Self {
666        self.map_unary(FunctionExpr::CumSum { reverse })
667    }
668
669    /// Get an array with the cumulative product computed at every element.
670    #[cfg(feature = "cum_agg")]
671    pub fn cum_prod(self, reverse: bool) -> Self {
672        self.map_unary(FunctionExpr::CumProd { reverse })
673    }
674
675    /// Get an array with the cumulative min computed at every element.
676    #[cfg(feature = "cum_agg")]
677    pub fn cum_min(self, reverse: bool) -> Self {
678        self.map_unary(FunctionExpr::CumMin { reverse })
679    }
680
681    /// Get an array with the cumulative max computed at every element.
682    #[cfg(feature = "cum_agg")]
683    pub fn cum_max(self, reverse: bool) -> Self {
684        self.map_unary(FunctionExpr::CumMax { reverse })
685    }
686
687    /// Get the product aggregation of an expression.
688    pub fn product(self) -> Self {
689        let options = FunctionOptions::aggregation().with_fmt_str("product");
690        self.function_with_options(
691            move |c: Column| {
692                Some(
693                    c.product()
694                        .map(|sc| sc.into_series(c.name().clone()).into_column()),
695                )
696                .transpose()
697            },
698            GetOutput::map_dtype(|dt| {
699                use DataType as T;
700                Ok(match dt {
701                    T::Float32 => T::Float32,
702                    T::Float64 => T::Float64,
703                    T::UInt64 => T::UInt64,
704                    #[cfg(feature = "dtype-i128")]
705                    T::Int128 => T::Int128,
706                    _ => T::Int64,
707                })
708            }),
709            options,
710        )
711    }
712
713    /// Round underlying floating point array to given decimal numbers.
714    #[cfg(feature = "round_series")]
715    pub fn round(self, decimals: u32, mode: RoundMode) -> Self {
716        self.map_unary(FunctionExpr::Round { decimals, mode })
717    }
718
719    /// Round to a number of significant figures.
720    #[cfg(feature = "round_series")]
721    pub fn round_sig_figs(self, digits: i32) -> Self {
722        self.map_unary(FunctionExpr::RoundSF { digits })
723    }
724
725    /// Floor underlying floating point array to the lowest integers smaller or equal to the float value.
726    #[cfg(feature = "round_series")]
727    pub fn floor(self) -> Self {
728        self.map_unary(FunctionExpr::Floor)
729    }
730
731    /// Constant Pi
732    #[cfg(feature = "round_series")]
733    pub fn pi() -> Self {
734        lit(std::f64::consts::PI)
735    }
736
737    /// Ceil underlying floating point array to the highest integers smaller or equal to the float value.
738    #[cfg(feature = "round_series")]
739    pub fn ceil(self) -> Self {
740        self.map_unary(FunctionExpr::Ceil)
741    }
742
743    /// Clip underlying values to a set boundary.
744    #[cfg(feature = "round_series")]
745    pub fn clip(self, min: Expr, max: Expr) -> Self {
746        self.map_ternary(
747            FunctionExpr::Clip {
748                has_min: true,
749                has_max: true,
750            },
751            min,
752            max,
753        )
754    }
755
756    /// Clip underlying values to a set boundary.
757    #[cfg(feature = "round_series")]
758    pub fn clip_max(self, max: Expr) -> Self {
759        self.map_binary(
760            FunctionExpr::Clip {
761                has_min: false,
762                has_max: true,
763            },
764            max,
765        )
766    }
767
768    /// Clip underlying values to a set boundary.
769    #[cfg(feature = "round_series")]
770    pub fn clip_min(self, min: Expr) -> Self {
771        self.map_binary(
772            FunctionExpr::Clip {
773                has_min: true,
774                has_max: false,
775            },
776            min,
777        )
778    }
779
780    /// Convert all values to their absolute/positive value.
781    #[cfg(feature = "abs")]
782    pub fn abs(self) -> Self {
783        self.map_unary(FunctionExpr::Abs)
784    }
785
786    /// Apply window function over a subgroup.
787    /// This is similar to a group_by + aggregation + self join.
788    /// Or similar to [window functions in Postgres](https://www.postgresql.org/docs/9.1/tutorial-window.html).
789    ///
790    /// # Example
791    ///
792    /// ``` rust
793    /// #[macro_use] extern crate polars_core;
794    /// use polars_core::prelude::*;
795    /// use polars_lazy::prelude::*;
796    ///
797    /// fn example() -> PolarsResult<()> {
798    ///     let df = df! {
799    ///             "groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
800    ///             "values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
801    ///         }?;
802    ///
803    ///     let out = df
804    ///      .lazy()
805    ///      .select(&[
806    ///          col("groups"),
807    ///          sum("values").over([col("groups")]),
808    ///      ])
809    ///      .collect()?;
810    ///     println!("{}", &out);
811    ///     Ok(())
812    /// }
813    ///
814    /// ```
815    ///
816    /// Outputs:
817    ///
818    /// ``` text
819    /// ╭────────┬────────╮
820    /// │ groups ┆ values │
821    /// │ ---    ┆ ---    │
822    /// │ i32    ┆ i32    │
823    /// ╞════════╪════════╡
824    /// │ 1      ┆ 16     │
825    /// │ 1      ┆ 16     │
826    /// │ 2      ┆ 13     │
827    /// │ 2      ┆ 13     │
828    /// │ …      ┆ …      │
829    /// │ 1      ┆ 16     │
830    /// │ 2      ┆ 13     │
831    /// │ 3      ┆ 15     │
832    /// │ 3      ┆ 15     │
833    /// │ 1      ┆ 16     │
834    /// ╰────────┴────────╯
835    /// ```
836    pub fn over<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(self, partition_by: E) -> Self {
837        self.over_with_options(Some(partition_by), None, Default::default())
838            .expect("We explicitly passed `partition_by`")
839    }
840
841    pub fn over_with_options<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
842        self,
843        partition_by: Option<E>,
844        order_by: Option<(E, SortOptions)>,
845        options: WindowMapping,
846    ) -> PolarsResult<Self> {
847        polars_ensure!(partition_by.is_some() || order_by.is_some(), InvalidOperation: "At least one of `partition_by` and `order_by` must be specified in `over`");
848        let partition_by = if let Some(partition_by) = partition_by {
849            partition_by
850                .as_ref()
851                .iter()
852                .map(|e| e.clone().into())
853                .collect()
854        } else {
855            vec![lit(1)]
856        };
857
858        let order_by = order_by.map(|(e, options)| {
859            let e = e.as_ref();
860            let e = if e.len() == 1 {
861                Arc::new(e[0].clone().into())
862            } else {
863                feature_gated!["dtype-struct", {
864                    let e = e.iter().map(|e| e.clone().into()).collect::<Vec<_>>();
865                    Arc::new(as_struct(e))
866                }]
867            };
868            (e, options)
869        });
870
871        Ok(Expr::Window {
872            function: Arc::new(self),
873            partition_by,
874            order_by,
875            options: options.into(),
876        })
877    }
878
879    #[cfg(feature = "dynamic_group_by")]
880    pub fn rolling(self, options: RollingGroupOptions) -> Self {
881        // We add the index column as `partition expr` so that the optimizer will
882        // not ignore it.
883        let index_col = col(options.index_column.clone());
884        Expr::Window {
885            function: Arc::new(self),
886            partition_by: vec![index_col],
887            order_by: None,
888            options: WindowType::Rolling(options),
889        }
890    }
891
892    fn fill_null_impl(self, fill_value: Expr) -> Self {
893        self.map_binary(FunctionExpr::FillNull, fill_value)
894    }
895
896    /// Replace the null values by a value.
897    pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> Self {
898        self.fill_null_impl(fill_value.into())
899    }
900
901    pub fn fill_null_with_strategy(self, strategy: FillNullStrategy) -> Self {
902        self.map_unary(FunctionExpr::FillNullWithStrategy(strategy))
903    }
904
905    /// Replace the floating point `NaN` values by a value.
906    pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> Self {
907        // we take the not branch so that self is truthy value of `when -> then -> otherwise`
908        // and that ensure we keep the name of `self`
909
910        when(self.clone().is_not_nan().or(self.clone().is_null()))
911            .then(self)
912            .otherwise(fill_value.into())
913    }
914    /// Count the values of the Series
915    /// or
916    /// Get counts of the group by operation.
917    pub fn count(self) -> Self {
918        AggExpr::Count(Arc::new(self), false).into()
919    }
920
921    pub fn len(self) -> Self {
922        AggExpr::Count(Arc::new(self), true).into()
923    }
924
925    /// Get a mask of duplicated values.
926    #[allow(clippy::wrong_self_convention)]
927    #[cfg(feature = "is_unique")]
928    pub fn is_duplicated(self) -> Self {
929        self.map_unary(BooleanFunction::IsDuplicated)
930    }
931
932    #[allow(clippy::wrong_self_convention)]
933    #[cfg(feature = "is_between")]
934    pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
935        self.map_ternary(
936            BooleanFunction::IsBetween { closed },
937            lower.into(),
938            upper.into(),
939        )
940    }
941
942    /// Get a mask of unique values.
943    #[allow(clippy::wrong_self_convention)]
944    #[cfg(feature = "is_unique")]
945    pub fn is_unique(self) -> Self {
946        self.map_unary(BooleanFunction::IsUnique)
947    }
948
949    /// Get the approximate count of unique values.
950    #[cfg(feature = "approx_unique")]
951    pub fn approx_n_unique(self) -> Self {
952        self.map_unary(FunctionExpr::ApproxNUnique)
953    }
954
955    /// Bitwise "and" operation.
956    pub fn and<E: Into<Expr>>(self, expr: E) -> Self {
957        binary_expr(self, Operator::And, expr.into())
958    }
959
960    /// Bitwise "xor" operation.
961    pub fn xor<E: Into<Expr>>(self, expr: E) -> Self {
962        binary_expr(self, Operator::Xor, expr.into())
963    }
964
965    /// Bitwise "or" operation.
966    pub fn or<E: Into<Expr>>(self, expr: E) -> Self {
967        binary_expr(self, Operator::Or, expr.into())
968    }
969
970    /// Logical "or" operation.
971    pub fn logical_or<E: Into<Expr>>(self, expr: E) -> Self {
972        binary_expr(self, Operator::LogicalOr, expr.into())
973    }
974
975    /// Logical "and" operation.
976    pub fn logical_and<E: Into<Expr>>(self, expr: E) -> Self {
977        binary_expr(self, Operator::LogicalAnd, expr.into())
978    }
979
980    /// Filter a single column.
981    ///
982    /// Should be used in aggregation context. If you want to filter on a
983    /// DataFrame level, use `LazyFrame::filter`.
984    pub fn filter<E: Into<Expr>>(self, predicate: E) -> Self {
985        if has_expr(&self, |e| matches!(e, Expr::Wildcard)) {
986            panic!("filter '*' not allowed, use LazyFrame::filter")
987        };
988        Expr::Filter {
989            input: Arc::new(self),
990            by: Arc::new(predicate.into()),
991        }
992    }
993
994    /// Check if the values of the left expression are in the lists of the right expr.
995    #[allow(clippy::wrong_self_convention)]
996    #[cfg(feature = "is_in")]
997    pub fn is_in<E: Into<Expr>>(self, other: E, nulls_equal: bool) -> Self {
998        let other = other.into();
999        let function = BooleanFunction::IsIn { nulls_equal };
1000        let options = function.function_options();
1001        let function = function.into();
1002        Expr::Function {
1003            input: vec![self, other],
1004            function,
1005            options,
1006        }
1007    }
1008
1009    /// Sort this column by the ordering of another column evaluated from given expr.
1010    /// Can also be used in a group_by context to sort the groups.
1011    ///
1012    /// # Example
1013    ///
1014    /// ```rust
1015    /// # use polars_core::prelude::*;
1016    /// # use polars_lazy::prelude::*;
1017    /// # fn main() -> PolarsResult<()> {
1018    /// let lf = df! {
1019    ///     "a" => [1, 2, 3, 4, 5],
1020    ///     "b" => [5, 4, 3, 2, 1]
1021    /// }?.lazy();
1022    ///
1023    /// let sorted = lf
1024    ///     .select(
1025    ///         vec![col("a").sort_by(col("b"), SortOptions::default())],
1026    ///     )
1027    ///     .collect()?;
1028    ///
1029    /// assert_eq!(
1030    ///     sorted,
1031    ///     df! { "a" => [5, 4, 3, 2, 1] }?
1032    /// );
1033    /// # Ok(())
1034    /// # }
1035    pub fn sort_by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
1036        self,
1037        by: E,
1038        sort_options: SortMultipleOptions,
1039    ) -> Expr {
1040        let by = by.as_ref().iter().map(|e| e.clone().into()).collect();
1041        Expr::SortBy {
1042            expr: Arc::new(self),
1043            by,
1044            sort_options,
1045        }
1046    }
1047
1048    #[cfg(feature = "repeat_by")]
1049    /// Repeat the column `n` times, where `n` is determined by the values in `by`.
1050    /// This yields an `Expr` of dtype `List`.
1051    pub fn repeat_by<E: Into<Expr>>(self, by: E) -> Expr {
1052        self.map_binary(FunctionExpr::RepeatBy, by.into())
1053    }
1054
1055    #[cfg(feature = "is_first_distinct")]
1056    #[allow(clippy::wrong_self_convention)]
1057    /// Get a mask of the first unique value.
1058    pub fn is_first_distinct(self) -> Expr {
1059        self.map_unary(BooleanFunction::IsFirstDistinct)
1060    }
1061
1062    #[cfg(feature = "is_last_distinct")]
1063    #[allow(clippy::wrong_self_convention)]
1064    /// Get a mask of the last unique value.
1065    pub fn is_last_distinct(self) -> Expr {
1066        self.map_unary(BooleanFunction::IsLastDistinct)
1067    }
1068
1069    fn dot_impl(self, other: Expr) -> Expr {
1070        (self * other).sum()
1071    }
1072
1073    /// Compute the dot/inner product between two expressions.
1074    pub fn dot<E: Into<Expr>>(self, other: E) -> Expr {
1075        self.dot_impl(other.into())
1076    }
1077
1078    #[cfg(feature = "mode")]
1079    /// Compute the mode(s) of this column. This is the most occurring value.
1080    pub fn mode(self) -> Expr {
1081        self.map_unary(FunctionExpr::Mode)
1082    }
1083
1084    /// Exclude a column from a wildcard/regex selection.
1085    ///
1086    /// You may also use regexes in the exclude as long as they start with `^` and end with `$`.
1087    pub fn exclude(self, columns: impl IntoVec<PlSmallStr>) -> Expr {
1088        let v = columns.into_vec().into_iter().map(Excluded::Name).collect();
1089        Expr::Exclude(Arc::new(self), v)
1090    }
1091
1092    pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Expr {
1093        let v = dtypes
1094            .as_ref()
1095            .iter()
1096            .map(|dt| Excluded::Dtype(dt.clone()))
1097            .collect();
1098        Expr::Exclude(Arc::new(self), v)
1099    }
1100
1101    #[cfg(feature = "interpolate")]
1102    /// Interpolate intermediate values.
1103    /// Nulls at the beginning and end of the series remain null.
1104    pub fn interpolate(self, method: InterpolationMethod) -> Expr {
1105        self.map_unary(FunctionExpr::Interpolate(method))
1106    }
1107
1108    #[cfg(feature = "rolling_window_by")]
1109    #[allow(clippy::type_complexity)]
1110    fn finish_rolling_by(
1111        self,
1112        by: Expr,
1113        options: RollingOptionsDynamicWindow,
1114        rolling_function_by: fn(RollingOptionsDynamicWindow) -> RollingFunctionBy,
1115    ) -> Expr {
1116        self.map_binary(
1117            FunctionExpr::RollingExprBy(rolling_function_by(options)),
1118            by,
1119        )
1120    }
1121
1122    #[cfg(feature = "interpolate_by")]
1123    /// Interpolate intermediate values.
1124    /// Nulls at the beginning and end of the series remain null.
1125    /// The `by` column provides the x-coordinates for interpolation and must not contain nulls.
1126    pub fn interpolate_by(self, by: Expr) -> Expr {
1127        self.map_binary(FunctionExpr::InterpolateBy, by)
1128    }
1129
1130    #[cfg(feature = "rolling_window")]
1131    #[allow(clippy::type_complexity)]
1132    fn finish_rolling(
1133        self,
1134        options: RollingOptionsFixedWindow,
1135        rolling_function: fn(RollingOptionsFixedWindow) -> RollingFunction,
1136    ) -> Expr {
1137        self.map_unary(FunctionExpr::RollingExpr(rolling_function(options)))
1138    }
1139
1140    /// Apply a rolling minimum based on another column.
1141    #[cfg(feature = "rolling_window_by")]
1142    pub fn rolling_min_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1143        self.finish_rolling_by(by, options, RollingFunctionBy::MinBy)
1144    }
1145
1146    /// Apply a rolling maximum based on another column.
1147    #[cfg(feature = "rolling_window_by")]
1148    pub fn rolling_max_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1149        self.finish_rolling_by(by, options, RollingFunctionBy::MaxBy)
1150    }
1151
1152    /// Apply a rolling mean based on another column.
1153    #[cfg(feature = "rolling_window_by")]
1154    pub fn rolling_mean_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1155        self.finish_rolling_by(by, options, RollingFunctionBy::MeanBy)
1156    }
1157
1158    /// Apply a rolling sum based on another column.
1159    #[cfg(feature = "rolling_window_by")]
1160    pub fn rolling_sum_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1161        self.finish_rolling_by(by, options, RollingFunctionBy::SumBy)
1162    }
1163
1164    /// Apply a rolling quantile based on another column.
1165    #[cfg(feature = "rolling_window_by")]
1166    pub fn rolling_quantile_by(
1167        self,
1168        by: Expr,
1169        method: QuantileMethod,
1170        quantile: f64,
1171        mut options: RollingOptionsDynamicWindow,
1172    ) -> Expr {
1173        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1174        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1175            prob: quantile,
1176            method,
1177        }));
1178
1179        self.finish_rolling_by(by, options, RollingFunctionBy::QuantileBy)
1180    }
1181
1182    /// Apply a rolling variance based on another column.
1183    #[cfg(feature = "rolling_window_by")]
1184    pub fn rolling_var_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1185        self.finish_rolling_by(by, options, RollingFunctionBy::VarBy)
1186    }
1187
1188    /// Apply a rolling std-dev based on another column.
1189    #[cfg(feature = "rolling_window_by")]
1190    pub fn rolling_std_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1191        self.finish_rolling_by(by, options, RollingFunctionBy::StdBy)
1192    }
1193
1194    /// Apply a rolling median based on another column.
1195    #[cfg(feature = "rolling_window_by")]
1196    pub fn rolling_median_by(self, by: Expr, options: RollingOptionsDynamicWindow) -> Expr {
1197        self.rolling_quantile_by(by, QuantileMethod::Linear, 0.5, options)
1198    }
1199
1200    /// Apply a rolling minimum.
1201    ///
1202    /// See: [`RollingAgg::rolling_min`]
1203    #[cfg(feature = "rolling_window")]
1204    pub fn rolling_min(self, options: RollingOptionsFixedWindow) -> Expr {
1205        self.finish_rolling(options, RollingFunction::Min)
1206    }
1207
1208    /// Apply a rolling maximum.
1209    ///
1210    /// See: [`RollingAgg::rolling_max`]
1211    #[cfg(feature = "rolling_window")]
1212    pub fn rolling_max(self, options: RollingOptionsFixedWindow) -> Expr {
1213        self.finish_rolling(options, RollingFunction::Max)
1214    }
1215
1216    /// Apply a rolling mean.
1217    ///
1218    /// See: [`RollingAgg::rolling_mean`]
1219    #[cfg(feature = "rolling_window")]
1220    pub fn rolling_mean(self, options: RollingOptionsFixedWindow) -> Expr {
1221        self.finish_rolling(options, RollingFunction::Mean)
1222    }
1223
1224    /// Apply a rolling sum.
1225    ///
1226    /// See: [`RollingAgg::rolling_sum`]
1227    #[cfg(feature = "rolling_window")]
1228    pub fn rolling_sum(self, options: RollingOptionsFixedWindow) -> Expr {
1229        self.finish_rolling(options, RollingFunction::Sum)
1230    }
1231
1232    /// Apply a rolling median.
1233    ///
1234    /// See: [`RollingAgg::rolling_median`]
1235    #[cfg(feature = "rolling_window")]
1236    pub fn rolling_median(self, options: RollingOptionsFixedWindow) -> Expr {
1237        self.rolling_quantile(QuantileMethod::Linear, 0.5, options)
1238    }
1239
1240    /// Apply a rolling quantile.
1241    ///
1242    /// See: [`RollingAgg::rolling_quantile`]
1243    #[cfg(feature = "rolling_window")]
1244    pub fn rolling_quantile(
1245        self,
1246        method: QuantileMethod,
1247        quantile: f64,
1248        mut options: RollingOptionsFixedWindow,
1249    ) -> Expr {
1250        use polars_compute::rolling::{RollingFnParams, RollingQuantileParams};
1251
1252        options.fn_params = Some(RollingFnParams::Quantile(RollingQuantileParams {
1253            prob: quantile,
1254            method,
1255        }));
1256
1257        self.finish_rolling(options, RollingFunction::Quantile)
1258    }
1259
1260    /// Apply a rolling variance.
1261    #[cfg(feature = "rolling_window")]
1262    pub fn rolling_var(self, options: RollingOptionsFixedWindow) -> Expr {
1263        self.finish_rolling(options, RollingFunction::Var)
1264    }
1265
1266    /// Apply a rolling std-dev.
1267    #[cfg(feature = "rolling_window")]
1268    pub fn rolling_std(self, options: RollingOptionsFixedWindow) -> Expr {
1269        self.finish_rolling(options, RollingFunction::Std)
1270    }
1271
1272    /// Apply a rolling skew.
1273    #[cfg(feature = "rolling_window")]
1274    #[cfg(feature = "moment")]
1275    pub fn rolling_skew(self, options: RollingOptionsFixedWindow) -> Expr {
1276        self.finish_rolling(options, RollingFunction::Skew)
1277    }
1278
1279    /// Apply a rolling skew.
1280    #[cfg(feature = "rolling_window")]
1281    #[cfg(feature = "moment")]
1282    pub fn rolling_kurtosis(self, options: RollingOptionsFixedWindow) -> Expr {
1283        self.finish_rolling(options, RollingFunction::Kurtosis)
1284    }
1285
1286    #[cfg(feature = "rolling_window")]
1287    /// Apply a custom function over a rolling/ moving window of the array.
1288    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1289    pub fn rolling_map(
1290        self,
1291        f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
1292        output_type: GetOutput,
1293        options: RollingOptionsFixedWindow,
1294    ) -> Expr {
1295        self.apply(
1296            move |c: Column| {
1297                c.as_materialized_series()
1298                    .rolling_map(f.as_ref(), options.clone())
1299                    .map(Column::from)
1300                    .map(Some)
1301            },
1302            output_type,
1303        )
1304        .with_fmt("rolling_map")
1305    }
1306
1307    #[cfg(feature = "rolling_window")]
1308    /// Apply a custom function over a rolling/ moving window of the array.
1309    /// Prefer this over rolling_apply in case of floating point numbers as this is faster.
1310    /// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
1311    pub fn rolling_map_float<F>(self, window_size: usize, f: F) -> Expr
1312    where
1313        F: 'static + FnMut(&mut Float64Chunked) -> Option<f64> + Send + Sync + Copy,
1314    {
1315        self.apply(
1316            move |c: Column| {
1317                let out = match c.dtype() {
1318                    DataType::Float64 => c
1319                        .f64()
1320                        .unwrap()
1321                        .rolling_map_float(window_size, f)
1322                        .map(|ca| ca.into_column()),
1323                    _ => c
1324                        .cast(&DataType::Float64)?
1325                        .f64()
1326                        .unwrap()
1327                        .rolling_map_float(window_size, f)
1328                        .map(|ca| ca.into_column()),
1329                }?;
1330                if let DataType::Float32 = c.dtype() {
1331                    out.cast(&DataType::Float32).map(Some)
1332                } else {
1333                    Ok(Some(out))
1334                }
1335            },
1336            GetOutput::map_field(|field| {
1337                Ok(match field.dtype() {
1338                    DataType::Float64 => field.clone(),
1339                    DataType::Float32 => Field::new(field.name().clone(), DataType::Float32),
1340                    _ => Field::new(field.name().clone(), DataType::Float64),
1341                })
1342            }),
1343        )
1344        .with_fmt("rolling_map_float")
1345    }
1346
1347    #[cfg(feature = "peaks")]
1348    pub fn peak_min(self) -> Expr {
1349        self.map_unary(FunctionExpr::PeakMin)
1350    }
1351
1352    #[cfg(feature = "peaks")]
1353    pub fn peak_max(self) -> Expr {
1354        self.map_unary(FunctionExpr::PeakMax)
1355    }
1356
1357    #[cfg(feature = "rank")]
1358    /// Assign ranks to data, dealing with ties appropriately.
1359    pub fn rank(self, options: RankOptions, seed: Option<u64>) -> Expr {
1360        self.map_unary(FunctionExpr::Rank { options, seed })
1361    }
1362
1363    #[cfg(feature = "replace")]
1364    /// Replace the given values with other values.
1365    pub fn replace<E: Into<Expr>>(self, old: E, new: E) -> Expr {
1366        let old = old.into();
1367        let new = new.into();
1368        self.map_n_ary(FunctionExpr::Replace, [old, new])
1369    }
1370
1371    #[cfg(feature = "replace")]
1372    /// Replace the given values with other values.
1373    pub fn replace_strict<E: Into<Expr>>(
1374        self,
1375        old: E,
1376        new: E,
1377        default: Option<E>,
1378        return_dtype: Option<DataType>,
1379    ) -> Expr {
1380        let old = old.into();
1381        let new = new.into();
1382        let mut args = vec![old, new];
1383        args.extend(default.map(Into::into));
1384        self.map_n_ary(FunctionExpr::ReplaceStrict { return_dtype }, args)
1385    }
1386
1387    #[cfg(feature = "cutqcut")]
1388    /// Bin continuous values into discrete categories.
1389    pub fn cut(
1390        self,
1391        breaks: Vec<f64>,
1392        labels: Option<impl IntoVec<PlSmallStr>>,
1393        left_closed: bool,
1394        include_breaks: bool,
1395    ) -> Expr {
1396        self.map_unary(FunctionExpr::Cut {
1397            breaks,
1398            labels: labels.map(|x| x.into_vec()),
1399            left_closed,
1400            include_breaks,
1401        })
1402    }
1403
1404    #[cfg(feature = "cutqcut")]
1405    /// Bin continuous values into discrete categories based on their quantiles.
1406    pub fn qcut(
1407        self,
1408        probs: Vec<f64>,
1409        labels: Option<impl IntoVec<PlSmallStr>>,
1410        left_closed: bool,
1411        allow_duplicates: bool,
1412        include_breaks: bool,
1413    ) -> Expr {
1414        self.map_unary(FunctionExpr::QCut {
1415            probs,
1416            labels: labels.map(|x| x.into_vec()),
1417            left_closed,
1418            allow_duplicates,
1419            include_breaks,
1420        })
1421    }
1422
1423    #[cfg(feature = "cutqcut")]
1424    /// Bin continuous values into discrete categories using uniform quantile probabilities.
1425    pub fn qcut_uniform(
1426        self,
1427        n_bins: usize,
1428        labels: Option<impl IntoVec<PlSmallStr>>,
1429        left_closed: bool,
1430        allow_duplicates: bool,
1431        include_breaks: bool,
1432    ) -> Expr {
1433        let probs = (1..n_bins).map(|b| b as f64 / n_bins as f64).collect();
1434        self.map_unary(FunctionExpr::QCut {
1435            probs,
1436            labels: labels.map(|x| x.into_vec()),
1437            left_closed,
1438            allow_duplicates,
1439            include_breaks,
1440        })
1441    }
1442
1443    #[cfg(feature = "rle")]
1444    /// Get the lengths of runs of identical values.
1445    pub fn rle(self) -> Expr {
1446        self.map_unary(FunctionExpr::RLE)
1447    }
1448
1449    #[cfg(feature = "rle")]
1450    /// Similar to `rle`, but maps values to run IDs.
1451    pub fn rle_id(self) -> Expr {
1452        self.map_unary(FunctionExpr::RLEID)
1453    }
1454
1455    #[cfg(feature = "diff")]
1456    /// Calculate the n-th discrete difference between values.
1457    pub fn diff(self, n: Expr, null_behavior: NullBehavior) -> Expr {
1458        self.map_binary(FunctionExpr::Diff(null_behavior), n)
1459    }
1460
1461    #[cfg(feature = "pct_change")]
1462    /// Computes percentage change between values.
1463    pub fn pct_change(self, n: Expr) -> Expr {
1464        self.map_binary(FunctionExpr::PctChange, n)
1465    }
1466
1467    #[cfg(feature = "moment")]
1468    /// Compute the sample skewness of a data set.
1469    ///
1470    /// For normally distributed data, the skewness should be about zero. For
1471    /// uni-modal continuous distributions, a skewness value greater than zero means
1472    /// that there is more weight in the right tail of the distribution. The
1473    /// function `skewtest` can be used to determine if the skewness value
1474    /// is close enough to zero, statistically speaking.
1475    ///
1476    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
1477    pub fn skew(self, bias: bool) -> Expr {
1478        self.map_unary(FunctionExpr::Skew(bias))
1479    }
1480
1481    #[cfg(feature = "moment")]
1482    /// Compute the kurtosis (Fisher or Pearson).
1483    ///
1484    /// Kurtosis is the fourth central moment divided by the square of the
1485    /// variance. If Fisher's definition is used, then 3.0 is subtracted from
1486    /// the result to give 0.0 for a normal distribution.
1487    /// If bias is False then the kurtosis is calculated using k statistics to
1488    /// eliminate bias coming from biased moment estimators.
1489    pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr {
1490        self.map_unary(FunctionExpr::Kurtosis(fisher, bias))
1491    }
1492
1493    /// Get maximal value that could be hold by this dtype.
1494    pub fn upper_bound(self) -> Expr {
1495        self.map_unary(FunctionExpr::UpperBound)
1496    }
1497
1498    /// Get minimal value that could be hold by this dtype.
1499    pub fn lower_bound(self) -> Expr {
1500        self.map_unary(FunctionExpr::LowerBound)
1501    }
1502
1503    #[cfg(feature = "dtype-array")]
1504    pub fn reshape(self, dimensions: &[i64]) -> Self {
1505        let dimensions = dimensions
1506            .iter()
1507            .map(|&v| ReshapeDimension::new(v))
1508            .collect();
1509        self.map_unary(FunctionExpr::Reshape(dimensions))
1510    }
1511
1512    #[cfg(feature = "ewma")]
1513    /// Calculate the exponentially-weighted moving average.
1514    pub fn ewm_mean(self, options: EWMOptions) -> Self {
1515        self.map_unary(FunctionExpr::EwmMean { options })
1516    }
1517
1518    #[cfg(feature = "ewma_by")]
1519    /// Calculate the exponentially-weighted moving average by a time column.
1520    pub fn ewm_mean_by(self, times: Expr, half_life: Duration) -> Self {
1521        self.map_binary(FunctionExpr::EwmMeanBy { half_life }, times)
1522    }
1523
1524    #[cfg(feature = "ewma")]
1525    /// Calculate the exponentially-weighted moving standard deviation.
1526    pub fn ewm_std(self, options: EWMOptions) -> Self {
1527        self.map_unary(FunctionExpr::EwmStd { options })
1528    }
1529
1530    #[cfg(feature = "ewma")]
1531    /// Calculate the exponentially-weighted moving variance.
1532    pub fn ewm_var(self, options: EWMOptions) -> Self {
1533        self.map_unary(FunctionExpr::EwmVar { options })
1534    }
1535
1536    /// Returns whether any of the values in the column are `true`.
1537    ///
1538    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1539    /// if the column contains any null values and no `true` values, the output
1540    /// is null.
1541    ///
1542    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1543    pub fn any(self, ignore_nulls: bool) -> Self {
1544        self.map_unary(BooleanFunction::Any { ignore_nulls })
1545    }
1546
1547    /// Returns whether all values in the column are `true`.
1548    ///
1549    /// If `ignore_nulls` is `False`, [Kleene logic] is used to deal with nulls:
1550    /// if the column contains any null values and no `false` values, the output
1551    /// is null.
1552    ///
1553    /// [Kleene logic]: https://en.wikipedia.org/wiki/Three-valued_logic
1554    pub fn all(self, ignore_nulls: bool) -> Self {
1555        self.map_unary(BooleanFunction::All { ignore_nulls })
1556    }
1557
1558    /// Shrink numeric columns to the minimal required datatype
1559    /// needed to fit the extrema of this [`Series`].
1560    /// This can be used to reduce memory pressure.
1561    pub fn shrink_dtype(self) -> Self {
1562        self.map_unary(FunctionExpr::ShrinkType)
1563    }
1564
1565    #[cfg(feature = "dtype-struct")]
1566    /// Count all unique values and create a struct mapping value to count.
1567    /// (Note that it is better to turn parallel off in the aggregation context).
1568    /// The name of the struct field with the counts is given by the parameter `name`.
1569    pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
1570        self.map_unary(FunctionExpr::ValueCounts {
1571            sort,
1572            parallel,
1573            name: name.into(),
1574            normalize,
1575        })
1576    }
1577
1578    #[cfg(feature = "unique_counts")]
1579    /// Returns a count of the unique values in the order of appearance.
1580    /// This method differs from [`Expr::value_counts`] in that it does not return the
1581    /// values, only the counts and might be faster.
1582    pub fn unique_counts(self) -> Self {
1583        self.map_unary(FunctionExpr::UniqueCounts)
1584    }
1585
1586    #[cfg(feature = "log")]
1587    /// Compute the logarithm to a given base.
1588    pub fn log(self, base: f64) -> Self {
1589        self.map_unary(FunctionExpr::Log { base })
1590    }
1591
1592    #[cfg(feature = "log")]
1593    /// Compute the natural logarithm of all elements plus one in the input array.
1594    pub fn log1p(self) -> Self {
1595        self.map_unary(FunctionExpr::Log1p)
1596    }
1597
1598    #[cfg(feature = "log")]
1599    /// Calculate the exponential of all elements in the input array.
1600    pub fn exp(self) -> Self {
1601        self.map_unary(FunctionExpr::Exp)
1602    }
1603
1604    #[cfg(feature = "log")]
1605    /// Compute the entropy as `-sum(pk * log(pk)`.
1606    /// where `pk` are discrete probabilities.
1607    pub fn entropy(self, base: f64, normalize: bool) -> Self {
1608        self.map_unary(FunctionExpr::Entropy { base, normalize })
1609    }
1610    /// Get the null count of the column/group.
1611    pub fn null_count(self) -> Expr {
1612        self.map_unary(FunctionExpr::NullCount)
1613    }
1614
1615    /// Set this `Series` as `sorted` so that downstream code can use
1616    /// fast paths for sorted arrays.
1617    /// # Warning
1618    /// This can lead to incorrect results if this `Series` is not sorted!!
1619    /// Use with care!
1620    pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
1621        // This is `map`. If a column is sorted. Chunks of that column are also sorted.
1622        self.map_unary(FunctionExpr::SetSortedFlag(sorted))
1623    }
1624
1625    #[cfg(feature = "row_hash")]
1626    /// Compute the hash of every element.
1627    pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr {
1628        self.map_unary(FunctionExpr::Hash(k0, k1, k2, k3))
1629    }
1630
1631    pub fn to_physical(self) -> Expr {
1632        self.map_unary(FunctionExpr::ToPhysical)
1633    }
1634
1635    pub fn gather_every(self, n: usize, offset: usize) -> Expr {
1636        self.map_unary(FunctionExpr::GatherEvery { n, offset })
1637    }
1638
1639    #[cfg(feature = "reinterpret")]
1640    pub fn reinterpret(self, signed: bool) -> Expr {
1641        self.map_unary(FunctionExpr::Reinterpret(signed))
1642    }
1643
1644    pub fn extend_constant(self, value: Expr, n: Expr) -> Expr {
1645        self.map_ternary(FunctionExpr::ExtendConstant, value, n)
1646    }
1647
1648    #[cfg(feature = "strings")]
1649    /// Get the [`string::StringNameSpace`]
1650    pub fn str(self) -> string::StringNameSpace {
1651        string::StringNameSpace(self)
1652    }
1653
1654    /// Get the [`binary::BinaryNameSpace`]
1655    pub fn binary(self) -> binary::BinaryNameSpace {
1656        binary::BinaryNameSpace(self)
1657    }
1658
1659    #[cfg(feature = "temporal")]
1660    /// Get the [`dt::DateLikeNameSpace`]
1661    pub fn dt(self) -> dt::DateLikeNameSpace {
1662        dt::DateLikeNameSpace(self)
1663    }
1664
1665    /// Get the [`list::ListNameSpace`]
1666    pub fn list(self) -> list::ListNameSpace {
1667        list::ListNameSpace(self)
1668    }
1669
1670    /// Get the [`name::ExprNameNameSpace`]
1671    pub fn name(self) -> name::ExprNameNameSpace {
1672        name::ExprNameNameSpace(self)
1673    }
1674
1675    /// Get the [`array::ArrayNameSpace`].
1676    #[cfg(feature = "dtype-array")]
1677    pub fn arr(self) -> array::ArrayNameSpace {
1678        array::ArrayNameSpace(self)
1679    }
1680
1681    /// Get the [`CategoricalNameSpace`].
1682    #[cfg(feature = "dtype-categorical")]
1683    pub fn cat(self) -> cat::CategoricalNameSpace {
1684        cat::CategoricalNameSpace(self)
1685    }
1686
1687    /// Get the [`struct_::StructNameSpace`].
1688    #[cfg(feature = "dtype-struct")]
1689    pub fn struct_(self) -> struct_::StructNameSpace {
1690        struct_::StructNameSpace(self)
1691    }
1692
1693    /// Get the [`meta::MetaNameSpace`]
1694    #[cfg(feature = "meta")]
1695    pub fn meta(self) -> meta::MetaNameSpace {
1696        meta::MetaNameSpace(self)
1697    }
1698}
1699
1700/// Apply a function/closure over multiple columns once the logical plan get executed.
1701///
1702/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
1703///
1704///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1705///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1706///
1707/// It is the responsibility of the caller that the schema is correct by giving
1708/// the correct output_type. If None given the output type of the input expr is used.
1709pub fn map_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
1710where
1711    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1712    E: AsRef<[Expr]>,
1713{
1714    let input = expr.as_ref().to_vec();
1715
1716    Expr::AnonymousFunction {
1717        input,
1718        function: new_column_udf(function),
1719        output_type,
1720        options: FunctionOptions::elementwise().with_fmt_str(""),
1721    }
1722}
1723
1724/// Apply a function/closure over the groups of multiple columns. This should only be used in a group_by aggregation.
1725///
1726/// It is the responsibility of the caller that the schema is correct by giving
1727/// the correct output_type. If None given the output type of the input expr is used.
1728///
1729/// This difference with [`map_multiple`] is that [`apply_multiple`] will create a separate [`Series`] per group.
1730///
1731/// * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
1732/// * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
1733pub fn apply_multiple<F, E>(
1734    function: F,
1735    expr: E,
1736    output_type: GetOutput,
1737    returns_scalar: bool,
1738) -> Expr
1739where
1740    F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
1741    E: AsRef<[Expr]>,
1742{
1743    let input = expr.as_ref().to_vec();
1744    Expr::AnonymousFunction {
1745        input,
1746        function: new_column_udf(function),
1747        output_type,
1748        options: FunctionOptions::groupwise()
1749            .with_fmt_str("")
1750            .with_flags(|mut f| {
1751                f.set(FunctionFlags::RETURNS_SCALAR, returns_scalar);
1752                f
1753            }),
1754    }
1755}
1756
1757/// Return the number of rows in the context.
1758pub fn len() -> Expr {
1759    Expr::Len
1760}
1761
1762/// First column in a DataFrame.
1763pub fn first() -> Expr {
1764    Expr::Nth(0)
1765}
1766
1767/// Last column in a DataFrame.
1768pub fn last() -> Expr {
1769    Expr::Nth(-1)
1770}
1771
1772/// Nth column in a DataFrame.
1773pub fn nth(n: i64) -> Expr {
1774    Expr::Nth(n)
1775}
polars_plan/dsl/mod.rs

polars_plan/dsl/
mod.rs