polars_plan/dsl/
list.rs

1use polars_core::prelude::*;
2#[cfg(feature = "diff")]
3use polars_core::series::ops::NullBehavior;
4
5use crate::prelude::function_expr::ListFunction;
6use crate::prelude::*;
7
8/// Specialized expressions for [`Series`] of [`DataType::List`].
9pub struct ListNameSpace(pub Expr);
10
11impl ListNameSpace {
12    #[cfg(feature = "list_any_all")]
13    pub fn any(self) -> Expr {
14        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Any))
15    }
16
17    #[cfg(feature = "list_any_all")]
18    pub fn all(self) -> Expr {
19        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::All))
20    }
21
22    #[cfg(feature = "list_drop_nulls")]
23    pub fn drop_nulls(self) -> Expr {
24        self.0
25            .map_unary(FunctionExpr::ListExpr(ListFunction::DropNulls))
26    }
27
28    #[cfg(feature = "list_sample")]
29    pub fn sample_n(
30        self,
31        n: Expr,
32        with_replacement: bool,
33        shuffle: bool,
34        seed: Option<u64>,
35    ) -> Expr {
36        self.0.map_binary(
37            FunctionExpr::ListExpr(ListFunction::Sample {
38                is_fraction: false,
39                with_replacement,
40                shuffle,
41                seed,
42            }),
43            n,
44        )
45    }
46
47    #[cfg(feature = "list_sample")]
48    pub fn sample_fraction(
49        self,
50        fraction: Expr,
51        with_replacement: bool,
52        shuffle: bool,
53        seed: Option<u64>,
54    ) -> Expr {
55        self.0.map_binary(
56            FunctionExpr::ListExpr(ListFunction::Sample {
57                is_fraction: true,
58                with_replacement,
59                shuffle,
60                seed,
61            }),
62            fraction,
63        )
64    }
65
66    /// Return the number of elements in each list.
67    ///
68    /// Null values are treated like regular elements in this context.
69    pub fn len(self) -> Expr {
70        self.0
71            .map_unary(FunctionExpr::ListExpr(ListFunction::Length))
72    }
73
74    /// Compute the maximum of the items in every sublist.
75    pub fn max(self) -> Expr {
76        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Max))
77    }
78
79    /// Compute the minimum of the items in every sublist.
80    pub fn min(self) -> Expr {
81        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Min))
82    }
83
84    /// Compute the sum the items in every sublist.
85    pub fn sum(self) -> Expr {
86        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Sum))
87    }
88
89    /// Compute the mean of every sublist and return a `Series` of dtype `Float64`
90    pub fn mean(self) -> Expr {
91        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Mean))
92    }
93
94    pub fn median(self) -> Expr {
95        self.0
96            .map_unary(FunctionExpr::ListExpr(ListFunction::Median))
97    }
98
99    pub fn std(self, ddof: u8) -> Expr {
100        self.0
101            .map_unary(FunctionExpr::ListExpr(ListFunction::Std(ddof)))
102    }
103
104    pub fn var(self, ddof: u8) -> Expr {
105        self.0
106            .map_unary(FunctionExpr::ListExpr(ListFunction::Var(ddof)))
107    }
108
109    /// Sort every sublist.
110    pub fn sort(self, options: SortOptions) -> Expr {
111        self.0
112            .map_unary(FunctionExpr::ListExpr(ListFunction::Sort(options)))
113    }
114
115    /// Reverse every sublist
116    pub fn reverse(self) -> Expr {
117        self.0
118            .map_unary(FunctionExpr::ListExpr(ListFunction::Reverse))
119    }
120
121    /// Keep only the unique values in every sublist.
122    pub fn unique(self) -> Expr {
123        self.0
124            .map_unary(FunctionExpr::ListExpr(ListFunction::Unique(false)))
125    }
126
127    /// Keep only the unique values in every sublist.
128    pub fn unique_stable(self) -> Expr {
129        self.0
130            .map_unary(FunctionExpr::ListExpr(ListFunction::Unique(true)))
131    }
132
133    pub fn n_unique(self) -> Expr {
134        self.0
135            .map_unary(FunctionExpr::ListExpr(ListFunction::NUnique))
136    }
137
138    /// Get items in every sublist by index.
139    pub fn get(self, index: Expr, null_on_oob: bool) -> Expr {
140        self.0.map_binary(
141            FunctionExpr::ListExpr(ListFunction::Get(null_on_oob)),
142            index,
143        )
144    }
145
146    /// Get items in every sublist by multiple indexes.
147    ///
148    /// # Arguments
149    /// - `null_on_oob`: Return a null when an index is out of bounds.
150    ///   This behavior is more expensive than defaulting to returning an `Error`.
151    #[cfg(feature = "list_gather")]
152    pub fn gather(self, index: Expr, null_on_oob: bool) -> Expr {
153        self.0.map_binary(
154            FunctionExpr::ListExpr(ListFunction::Gather(null_on_oob)),
155            index,
156        )
157    }
158
159    #[cfg(feature = "list_gather")]
160    pub fn gather_every(self, n: Expr, offset: Expr) -> Expr {
161        self.0
162            .map_ternary(FunctionExpr::ListExpr(ListFunction::GatherEvery), n, offset)
163    }
164
165    /// Get first item of every sublist.
166    pub fn first(self) -> Expr {
167        self.get(lit(0i64), true)
168    }
169
170    /// Get last item of every sublist.
171    pub fn last(self) -> Expr {
172        self.get(lit(-1i64), true)
173    }
174
175    /// Join all string items in a sublist and place a separator between them.
176    /// # Error
177    /// This errors if inner type of list `!= DataType::String`.
178    pub fn join(self, separator: Expr, ignore_nulls: bool) -> Expr {
179        self.0.map_binary(
180            FunctionExpr::ListExpr(ListFunction::Join(ignore_nulls)),
181            separator,
182        )
183    }
184
185    /// Return the index of the minimal value of every sublist
186    pub fn arg_min(self) -> Expr {
187        self.0
188            .map_unary(FunctionExpr::ListExpr(ListFunction::ArgMin))
189    }
190
191    /// Return the index of the maximum value of every sublist
192    pub fn arg_max(self) -> Expr {
193        self.0
194            .map_unary(FunctionExpr::ListExpr(ListFunction::ArgMax))
195    }
196
197    /// Diff every sublist.
198    #[cfg(feature = "diff")]
199    pub fn diff(self, n: i64, null_behavior: NullBehavior) -> Expr {
200        self.0.map_unary(FunctionExpr::ListExpr(ListFunction::Diff {
201            n,
202            null_behavior,
203        }))
204    }
205
206    /// Shift every sublist.
207    pub fn shift(self, periods: Expr) -> Expr {
208        self.0
209            .map_binary(FunctionExpr::ListExpr(ListFunction::Shift), periods)
210    }
211
212    /// Slice every sublist.
213    pub fn slice(self, offset: Expr, length: Expr) -> Expr {
214        self.0
215            .map_ternary(FunctionExpr::ListExpr(ListFunction::Slice), offset, length)
216    }
217
218    /// Get the head of every sublist
219    pub fn head(self, n: Expr) -> Expr {
220        self.slice(lit(0), n)
221    }
222
223    /// Get the tail of every sublist
224    pub fn tail(self, n: Expr) -> Expr {
225        self.slice(lit(0i64) - n.clone().cast(DataType::Int64), n)
226    }
227
228    #[cfg(feature = "dtype-array")]
229    /// Convert a List column into an Array column with the same inner data type.
230    pub fn to_array(self, width: usize) -> Expr {
231        self.0
232            .map_unary(FunctionExpr::ListExpr(ListFunction::ToArray(width)))
233    }
234
235    #[cfg(feature = "list_to_struct")]
236    #[allow(clippy::wrong_self_convention)]
237    /// Convert this `List` to a `Series` of type `Struct`. The width will be determined according to
238    /// `ListToStructWidthStrategy` and the names of the fields determined by the given `name_generator`.
239    ///
240    /// # Schema
241    ///
242    /// A polars `LazyFrame` needs to know the schema at all time. The caller therefore must provide
243    /// an `upper_bound` of struct fields that will be set.
244    /// If this is incorrectly downstream operation may fail. For instance an `all().sum()` expression
245    /// will look in the current schema to determine which columns to select.
246    pub fn to_struct(self, args: ListToStructArgs) -> Expr {
247        self.0
248            .map_unary(FunctionExpr::ListExpr(ListFunction::ToStruct(args)))
249    }
250
251    #[cfg(feature = "is_in")]
252    /// Check if the list array contain an element
253    pub fn contains<E: Into<Expr>>(self, other: E) -> Expr {
254        self.0
255            .map_binary(FunctionExpr::ListExpr(ListFunction::Contains), other.into())
256    }
257
258    #[cfg(feature = "list_count")]
259    /// Count how often the value produced by ``element`` occurs.
260    pub fn count_matches<E: Into<Expr>>(self, element: E) -> Expr {
261        self.0.map_binary(
262            FunctionExpr::ListExpr(ListFunction::CountMatches),
263            element.into(),
264        )
265    }
266
267    #[cfg(feature = "list_sets")]
268    fn set_operation(self, other: Expr, set_operation: SetOperation) -> Expr {
269        self.0.map_binary(
270            FunctionExpr::ListExpr(ListFunction::SetOperation(set_operation)),
271            other,
272        )
273    }
274
275    /// Return the SET UNION between both list arrays.
276    #[cfg(feature = "list_sets")]
277    pub fn union<E: Into<Expr>>(self, other: E) -> Expr {
278        self.set_operation(other.into(), SetOperation::Union)
279    }
280
281    /// Return the SET DIFFERENCE between both list arrays.
282    #[cfg(feature = "list_sets")]
283    pub fn set_difference<E: Into<Expr>>(self, other: E) -> Expr {
284        self.set_operation(other.into(), SetOperation::Difference)
285    }
286
287    /// Return the SET INTERSECTION between both list arrays.
288    #[cfg(feature = "list_sets")]
289    pub fn set_intersection<E: Into<Expr>>(self, other: E) -> Expr {
290        self.set_operation(other.into(), SetOperation::Intersection)
291    }
292
293    /// Return the SET SYMMETRIC DIFFERENCE between both list arrays.
294    #[cfg(feature = "list_sets")]
295    pub fn set_symmetric_difference<E: Into<Expr>>(self, other: E) -> Expr {
296        self.set_operation(other.into(), SetOperation::SymmetricDifference)
297    }
298}