1use crate::column::Column;
2use crate::dataframe::DataFrame;
3use polars::prelude::*;
4
5#[derive(Debug, Clone)]
10pub struct SortOrder {
11 pub(crate) expr: Expr,
12 pub(crate) descending: bool,
13 pub(crate) nulls_last: bool,
14}
15
16impl SortOrder {
17 pub fn expr(&self) -> &Expr {
18 &self.expr
19 }
20}
21
22pub fn asc(column: &Column) -> SortOrder {
24 SortOrder {
25 expr: column.expr().clone(),
26 descending: false,
27 nulls_last: false,
28 }
29}
30
31pub fn asc_nulls_first(column: &Column) -> SortOrder {
33 SortOrder {
34 expr: column.expr().clone(),
35 descending: false,
36 nulls_last: false,
37 }
38}
39
40pub fn asc_nulls_last(column: &Column) -> SortOrder {
42 SortOrder {
43 expr: column.expr().clone(),
44 descending: false,
45 nulls_last: true,
46 }
47}
48
49pub fn desc(column: &Column) -> SortOrder {
51 SortOrder {
52 expr: column.expr().clone(),
53 descending: true,
54 nulls_last: true,
55 }
56}
57
58pub fn desc_nulls_first(column: &Column) -> SortOrder {
60 SortOrder {
61 expr: column.expr().clone(),
62 descending: true,
63 nulls_last: false,
64 }
65}
66
67pub fn desc_nulls_last(column: &Column) -> SortOrder {
69 SortOrder {
70 expr: column.expr().clone(),
71 descending: true,
72 nulls_last: true,
73 }
74}
75
76pub fn parse_type_name(name: &str) -> Result<DataType, String> {
80 let s = name.trim().to_lowercase();
81 Ok(match s.as_str() {
82 "int" | "integer" => DataType::Int32,
83 "long" | "bigint" => DataType::Int64,
84 "float" => DataType::Float32,
85 "double" => DataType::Float64,
86 "string" | "str" => DataType::String,
87 "boolean" | "bool" => DataType::Boolean,
88 "date" => DataType::Date,
89 "timestamp" => DataType::Datetime(TimeUnit::Microseconds, None),
90 _ => return Err(format!("unknown type name: {name}")),
91 })
92}
93
94pub fn col(name: &str) -> Column {
96 Column::new(name.to_string())
97}
98
99pub fn grouping(column: &Column) -> Column {
101 let _ = column;
102 Column::from_expr(lit(0i32), Some("grouping".to_string()))
103}
104
105pub fn grouping_id(_columns: &[Column]) -> Column {
107 Column::from_expr(lit(0i64), Some("grouping_id".to_string()))
108}
109
110pub fn lit_i32(value: i32) -> Column {
112 let expr: Expr = lit(value);
113 Column::from_expr(expr, None)
114}
115
116pub fn lit_i64(value: i64) -> Column {
117 let expr: Expr = lit(value);
118 Column::from_expr(expr, None)
119}
120
121pub fn lit_f64(value: f64) -> Column {
122 let expr: Expr = lit(value);
123 Column::from_expr(expr, None)
124}
125
126pub fn lit_bool(value: bool) -> Column {
127 let expr: Expr = lit(value);
128 Column::from_expr(expr, None)
129}
130
131pub fn lit_str(value: &str) -> Column {
132 let expr: Expr = lit(value);
133 Column::from_expr(expr, None)
134}
135
136pub fn count(col: &Column) -> Column {
138 Column::from_expr(col.expr().clone().count(), Some("count".to_string()))
139}
140
141pub fn sum(col: &Column) -> Column {
143 Column::from_expr(col.expr().clone().sum(), Some("sum".to_string()))
144}
145
146pub fn avg(col: &Column) -> Column {
148 Column::from_expr(col.expr().clone().mean(), Some("avg".to_string()))
149}
150
151pub fn mean(col: &Column) -> Column {
153 avg(col)
154}
155
156pub fn max(col: &Column) -> Column {
158 Column::from_expr(col.expr().clone().max(), Some("max".to_string()))
159}
160
161pub fn min(col: &Column) -> Column {
163 Column::from_expr(col.expr().clone().min(), Some("min".to_string()))
164}
165
166pub fn stddev(col: &Column) -> Column {
168 Column::from_expr(col.expr().clone().std(1), Some("stddev".to_string()))
169}
170
171pub fn variance(col: &Column) -> Column {
173 Column::from_expr(col.expr().clone().var(1), Some("variance".to_string()))
174}
175
176pub fn stddev_pop(col: &Column) -> Column {
178 Column::from_expr(col.expr().clone().std(0), Some("stddev_pop".to_string()))
179}
180
181pub fn stddev_samp(col: &Column) -> Column {
183 stddev(col)
184}
185
186pub fn std(col: &Column) -> Column {
188 stddev(col)
189}
190
191pub fn var_pop(col: &Column) -> Column {
193 Column::from_expr(col.expr().clone().var(0), Some("var_pop".to_string()))
194}
195
196pub fn var_samp(col: &Column) -> Column {
198 variance(col)
199}
200
201pub fn median(col: &Column) -> Column {
203 use polars::prelude::QuantileMethod;
204 Column::from_expr(
205 col.expr()
206 .clone()
207 .quantile(lit(0.5), QuantileMethod::Linear),
208 Some("median".to_string()),
209 )
210}
211
212pub fn approx_percentile(col: &Column, percentage: f64) -> Column {
214 use polars::prelude::QuantileMethod;
215 Column::from_expr(
216 col.expr()
217 .clone()
218 .quantile(lit(percentage), QuantileMethod::Linear),
219 Some(format!("approx_percentile({percentage})")),
220 )
221}
222
223pub fn percentile_approx(col: &Column, percentage: f64) -> Column {
225 approx_percentile(col, percentage)
226}
227
228pub fn mode(col: &Column) -> Column {
230 col.clone().mode()
231}
232
233pub fn count_distinct(col: &Column) -> Column {
235 use polars::prelude::DataType;
236 Column::from_expr(
237 col.expr().clone().n_unique().cast(DataType::Int64),
238 Some("count_distinct".to_string()),
239 )
240}
241
242pub fn kurtosis(col: &Column) -> Column {
244 Column::from_expr(
245 col.expr()
246 .clone()
247 .cast(DataType::Float64)
248 .kurtosis(true, true),
249 Some("kurtosis".to_string()),
250 )
251}
252
253pub fn skewness(col: &Column) -> Column {
255 Column::from_expr(
256 col.expr().clone().cast(DataType::Float64).skew(true),
257 Some("skewness".to_string()),
258 )
259}
260
261pub fn covar_pop_expr(col1: &str, col2: &str) -> Expr {
263 use polars::prelude::{col as pl_col, len};
264 let c1 = pl_col(col1).cast(DataType::Float64);
265 let c2 = pl_col(col2).cast(DataType::Float64);
266 let n = len().cast(DataType::Float64);
267 let sum_ab = (c1.clone() * c2.clone()).sum();
268 let sum_a = pl_col(col1).sum().cast(DataType::Float64);
269 let sum_b = pl_col(col2).sum().cast(DataType::Float64);
270 (sum_ab - sum_a * sum_b / n.clone()) / n
271}
272
273pub fn covar_samp_expr(col1: &str, col2: &str) -> Expr {
275 use polars::prelude::{col as pl_col, len, lit, when};
276 let c1 = pl_col(col1).cast(DataType::Float64);
277 let c2 = pl_col(col2).cast(DataType::Float64);
278 let n = len().cast(DataType::Float64);
279 let sum_ab = (c1.clone() * c2.clone()).sum();
280 let sum_a = pl_col(col1).sum().cast(DataType::Float64);
281 let sum_b = pl_col(col2).sum().cast(DataType::Float64);
282 when(len().gt(lit(1)))
283 .then((sum_ab - sum_a * sum_b / n.clone()) / (len() - lit(1)).cast(DataType::Float64))
284 .otherwise(lit(f64::NAN))
285}
286
287pub fn corr_expr(col1: &str, col2: &str) -> Expr {
289 use polars::prelude::{col as pl_col, len, lit, when};
290 let c1 = pl_col(col1).cast(DataType::Float64);
291 let c2 = pl_col(col2).cast(DataType::Float64);
292 let n = len().cast(DataType::Float64);
293 let n1 = (len() - lit(1)).cast(DataType::Float64);
294 let sum_ab = (c1.clone() * c2.clone()).sum();
295 let sum_a = pl_col(col1).sum().cast(DataType::Float64);
296 let sum_b = pl_col(col2).sum().cast(DataType::Float64);
297 let sum_a2 = (c1.clone() * c1).sum();
298 let sum_b2 = (c2.clone() * c2).sum();
299 let cov_samp = (sum_ab - sum_a.clone() * sum_b.clone() / n.clone()) / n1.clone();
300 let var_a = (sum_a2 - sum_a.clone() * sum_a / n.clone()) / n1.clone();
301 let var_b = (sum_b2 - sum_b.clone() * sum_b / n.clone()) / n1.clone();
302 let std_a = var_a.sqrt();
303 let std_b = var_b.sqrt();
304 when(len().gt(lit(1)))
305 .then(cov_samp / (std_a * std_b))
306 .otherwise(lit(f64::NAN))
307}
308
309fn regr_cond_and_sums(y_col: &str, x_col: &str) -> (Expr, Expr, Expr, Expr, Expr, Expr) {
312 use polars::prelude::col as pl_col;
313 let y = pl_col(y_col).cast(DataType::Float64);
314 let x = pl_col(x_col).cast(DataType::Float64);
315 let cond = y.clone().is_not_null().and(x.clone().is_not_null());
316 let n = y
317 .clone()
318 .filter(cond.clone())
319 .count()
320 .cast(DataType::Float64);
321 let sum_x = x.clone().filter(cond.clone()).sum();
322 let sum_y = y.clone().filter(cond.clone()).sum();
323 let sum_xx = (x.clone() * x.clone()).filter(cond.clone()).sum();
324 let sum_yy = (y.clone() * y.clone()).filter(cond.clone()).sum();
325 let sum_xy = (x * y).filter(cond).sum();
326 (n, sum_x, sum_y, sum_xx, sum_yy, sum_xy)
327}
328
329pub fn regr_count_expr(y_col: &str, x_col: &str) -> Expr {
331 let (n, ..) = regr_cond_and_sums(y_col, x_col);
332 n
333}
334
335pub fn regr_avgx_expr(y_col: &str, x_col: &str) -> Expr {
337 use polars::prelude::{lit, when};
338 let (n, sum_x, ..) = regr_cond_and_sums(y_col, x_col);
339 when(n.clone().gt(lit(0.0)))
340 .then(sum_x / n)
341 .otherwise(lit(f64::NAN))
342}
343
344pub fn regr_avgy_expr(y_col: &str, x_col: &str) -> Expr {
346 use polars::prelude::{lit, when};
347 let (n, _, sum_y, ..) = regr_cond_and_sums(y_col, x_col);
348 when(n.clone().gt(lit(0.0)))
349 .then(sum_y / n)
350 .otherwise(lit(f64::NAN))
351}
352
353pub fn regr_sxx_expr(y_col: &str, x_col: &str) -> Expr {
355 use polars::prelude::{lit, when};
356 let (n, sum_x, _, sum_xx, ..) = regr_cond_and_sums(y_col, x_col);
357 when(n.clone().gt(lit(0.0)))
358 .then(sum_xx - sum_x.clone() * sum_x / n)
359 .otherwise(lit(f64::NAN))
360}
361
362pub fn regr_syy_expr(y_col: &str, x_col: &str) -> Expr {
364 use polars::prelude::{lit, when};
365 let (n, _, sum_y, _, sum_yy, _) = regr_cond_and_sums(y_col, x_col);
366 when(n.clone().gt(lit(0.0)))
367 .then(sum_yy - sum_y.clone() * sum_y / n)
368 .otherwise(lit(f64::NAN))
369}
370
371pub fn regr_sxy_expr(y_col: &str, x_col: &str) -> Expr {
373 use polars::prelude::{lit, when};
374 let (n, sum_x, sum_y, _, _, sum_xy) = regr_cond_and_sums(y_col, x_col);
375 when(n.clone().gt(lit(0.0)))
376 .then(sum_xy - sum_x * sum_y / n)
377 .otherwise(lit(f64::NAN))
378}
379
380pub fn regr_slope_expr(y_col: &str, x_col: &str) -> Expr {
382 use polars::prelude::{lit, when};
383 let (n, sum_x, sum_y, sum_xx, _sum_yy, sum_xy) = regr_cond_and_sums(y_col, x_col);
384 let regr_sxx = sum_xx.clone() - sum_x.clone() * sum_x.clone() / n.clone();
385 let regr_sxy = sum_xy - sum_x * sum_y / n.clone();
386 when(n.gt(lit(1.0)).and(regr_sxx.clone().gt(lit(0.0))))
387 .then(regr_sxy / regr_sxx)
388 .otherwise(lit(f64::NAN))
389}
390
391pub fn regr_intercept_expr(y_col: &str, x_col: &str) -> Expr {
393 use polars::prelude::{lit, when};
394 let (n, sum_x, sum_y, sum_xx, _, sum_xy) = regr_cond_and_sums(y_col, x_col);
395 let regr_sxx = sum_xx - sum_x.clone() * sum_x.clone() / n.clone();
396 let regr_sxy = sum_xy.clone() - sum_x.clone() * sum_y.clone() / n.clone();
397 let slope = regr_sxy.clone() / regr_sxx.clone();
398 let avg_y = sum_y / n.clone();
399 let avg_x = sum_x / n.clone();
400 when(n.gt(lit(1.0)).and(regr_sxx.clone().gt(lit(0.0))))
401 .then(avg_y - slope * avg_x)
402 .otherwise(lit(f64::NAN))
403}
404
405pub fn regr_r2_expr(y_col: &str, x_col: &str) -> Expr {
407 use polars::prelude::{lit, when};
408 let (n, sum_x, sum_y, sum_xx, sum_yy, sum_xy) = regr_cond_and_sums(y_col, x_col);
409 let regr_sxx = sum_xx - sum_x.clone() * sum_x.clone() / n.clone();
410 let regr_syy = sum_yy - sum_y.clone() * sum_y.clone() / n.clone();
411 let regr_sxy = sum_xy - sum_x * sum_y / n;
412 when(
413 regr_sxx
414 .clone()
415 .gt(lit(0.0))
416 .and(regr_syy.clone().gt(lit(0.0))),
417 )
418 .then(regr_sxy.clone() * regr_sxy / (regr_sxx * regr_syy))
419 .otherwise(lit(f64::NAN))
420}
421
422pub fn when(condition: &Column) -> WhenBuilder {
434 WhenBuilder::new(condition.expr().clone())
435}
436
437pub fn when_then_otherwise_null(condition: &Column, value: &Column) -> Column {
439 use polars::prelude::*;
440 let null_expr = Expr::Literal(LiteralValue::Null);
441 let expr = polars::prelude::when(condition.expr().clone())
442 .then(value.expr().clone())
443 .otherwise(null_expr);
444 crate::column::Column::from_expr(expr, None)
445}
446
447pub struct WhenBuilder {
449 condition: Expr,
450}
451
452impl WhenBuilder {
453 fn new(condition: Expr) -> Self {
454 WhenBuilder { condition }
455 }
456
457 pub fn then(self, value: &Column) -> ThenBuilder {
459 use polars::prelude::*;
460 let when_then = when(self.condition).then(value.expr().clone());
461 ThenBuilder::new(when_then)
462 }
463
464 pub fn otherwise(self, _value: &Column) -> Column {
469 panic!("when().otherwise() requires .then() to be called first. Use when(cond).then(val1).otherwise(val2)");
472 }
473}
474
475pub struct ThenBuilder {
477 when_then: polars::prelude::Then, }
479
480impl ThenBuilder {
481 fn new(when_then: polars::prelude::Then) -> Self {
482 ThenBuilder { when_then }
483 }
484
485 pub fn when(self, _condition: &Column) -> ThenBuilder {
489 self
492 }
493
494 pub fn otherwise(self, value: &Column) -> Column {
496 let expr = self.when_then.otherwise(value.expr().clone());
497 crate::column::Column::from_expr(expr, None)
498 }
499}
500
501pub fn upper(column: &Column) -> Column {
503 column.clone().upper()
504}
505
506pub fn lower(column: &Column) -> Column {
508 column.clone().lower()
509}
510
511pub fn substring(column: &Column, start: i64, length: Option<i64>) -> Column {
513 column.clone().substr(start, length)
514}
515
516pub fn length(column: &Column) -> Column {
518 column.clone().length()
519}
520
521pub fn trim(column: &Column) -> Column {
523 column.clone().trim()
524}
525
526pub fn ltrim(column: &Column) -> Column {
528 column.clone().ltrim()
529}
530
531pub fn rtrim(column: &Column) -> Column {
533 column.clone().rtrim()
534}
535
536pub fn btrim(column: &Column, trim_str: Option<&str>) -> Column {
538 column.clone().btrim(trim_str)
539}
540
541pub fn locate(substr: &str, column: &Column, pos: i64) -> Column {
543 column.clone().locate(substr, pos)
544}
545
546pub fn conv(column: &Column, from_base: i32, to_base: i32) -> Column {
548 column.clone().conv(from_base, to_base)
549}
550
551pub fn hex(column: &Column) -> Column {
553 column.clone().hex()
554}
555
556pub fn unhex(column: &Column) -> Column {
558 column.clone().unhex()
559}
560
561pub fn encode(column: &Column, charset: &str) -> Column {
563 column.clone().encode(charset)
564}
565
566pub fn decode(column: &Column, charset: &str) -> Column {
568 column.clone().decode(charset)
569}
570
571pub fn to_binary(column: &Column, fmt: &str) -> Column {
573 column.clone().to_binary(fmt)
574}
575
576pub fn try_to_binary(column: &Column, fmt: &str) -> Column {
578 column.clone().try_to_binary(fmt)
579}
580
581pub fn aes_encrypt(column: &Column, key: &str) -> Column {
583 column.clone().aes_encrypt(key)
584}
585
586pub fn aes_decrypt(column: &Column, key: &str) -> Column {
588 column.clone().aes_decrypt(key)
589}
590
591pub fn try_aes_decrypt(column: &Column, key: &str) -> Column {
593 column.clone().try_aes_decrypt(key)
594}
595
596pub fn bin(column: &Column) -> Column {
598 column.clone().bin()
599}
600
601pub fn getbit(column: &Column, pos: i64) -> Column {
603 column.clone().getbit(pos)
604}
605
606pub fn bit_and(left: &Column, right: &Column) -> Column {
608 left.clone().bit_and(right)
609}
610
611pub fn bit_or(left: &Column, right: &Column) -> Column {
613 left.clone().bit_or(right)
614}
615
616pub fn bit_xor(left: &Column, right: &Column) -> Column {
618 left.clone().bit_xor(right)
619}
620
621pub fn bit_count(column: &Column) -> Column {
623 column.clone().bit_count()
624}
625
626pub fn bitwise_not(column: &Column) -> Column {
628 column.clone().bitwise_not()
629}
630
631pub fn bitmap_bit_position(column: &Column) -> Column {
635 use polars::prelude::DataType;
636 let expr = column.expr().clone().cast(DataType::Int32);
637 Column::from_expr(expr, None)
638}
639
640pub fn bitmap_bucket_number(column: &Column) -> Column {
642 use polars::prelude::DataType;
643 let expr = column.expr().clone().cast(DataType::Int64) / lit(32768i64);
644 Column::from_expr(expr, None)
645}
646
647pub fn bitmap_count(column: &Column) -> Column {
649 use polars::prelude::{DataType, GetOutput};
650 let expr = column.expr().clone().map(
651 crate::udfs::apply_bitmap_count,
652 GetOutput::from_type(DataType::Int64),
653 );
654 Column::from_expr(expr, None)
655}
656
657pub fn bitmap_construct_agg(column: &Column) -> polars::prelude::Expr {
660 use polars::prelude::{DataType, GetOutput};
661 column.expr().clone().implode().map(
662 crate::udfs::apply_bitmap_construct_agg,
663 GetOutput::from_type(DataType::Binary),
664 )
665}
666
667pub fn bitmap_or_agg(column: &Column) -> polars::prelude::Expr {
669 use polars::prelude::{DataType, GetOutput};
670 column.expr().clone().implode().map(
671 crate::udfs::apply_bitmap_or_agg,
672 GetOutput::from_type(DataType::Binary),
673 )
674}
675
676pub fn bit_get(column: &Column, pos: i64) -> Column {
678 getbit(column, pos)
679}
680
681pub fn assert_true(column: &Column, err_msg: Option<&str>) -> Column {
684 column.clone().assert_true(err_msg)
685}
686
687pub fn raise_error(message: &str) -> Column {
689 let msg = message.to_string();
690 let expr = lit(0i64).map(
691 move |_col| -> PolarsResult<Option<polars::prelude::Column>> {
692 Err(PolarsError::ComputeError(msg.clone().into()))
693 },
694 GetOutput::from_type(DataType::Int64),
695 );
696 Column::from_expr(expr, Some("raise_error".to_string()))
697}
698
699pub fn broadcast(df: &DataFrame) -> DataFrame {
701 df.clone()
702}
703
704pub fn spark_partition_id() -> Column {
706 Column::from_expr(lit(0i32), Some("spark_partition_id".to_string()))
707}
708
709pub fn input_file_name() -> Column {
711 Column::from_expr(lit(""), Some("input_file_name".to_string()))
712}
713
714pub fn monotonically_increasing_id() -> Column {
717 Column::from_expr(lit(0i64), Some("monotonically_increasing_id".to_string()))
718}
719
720pub fn current_catalog() -> Column {
722 Column::from_expr(lit("spark_catalog"), Some("current_catalog".to_string()))
723}
724
725pub fn current_database() -> Column {
727 Column::from_expr(lit("default"), Some("current_database".to_string()))
728}
729
730pub fn current_schema() -> Column {
732 Column::from_expr(lit("default"), Some("current_schema".to_string()))
733}
734
735pub fn current_user() -> Column {
737 Column::from_expr(lit("unknown"), Some("current_user".to_string()))
738}
739
740pub fn user() -> Column {
742 Column::from_expr(lit("unknown"), Some("user".to_string()))
743}
744
745pub fn rand(seed: Option<u64>) -> Column {
748 Column::from_rand(seed)
749}
750
751pub fn randn(seed: Option<u64>) -> Column {
754 Column::from_randn(seed)
755}
756
757pub fn arrays_overlap(left: &Column, right: &Column) -> Column {
759 left.clone().arrays_overlap(right)
760}
761
762pub fn arrays_zip(left: &Column, right: &Column) -> Column {
764 left.clone().arrays_zip(right)
765}
766
767pub fn explode_outer(column: &Column) -> Column {
769 column.clone().explode_outer()
770}
771
772pub fn posexplode_outer(column: &Column) -> (Column, Column) {
774 column.clone().posexplode_outer()
775}
776
777pub fn array_agg(column: &Column) -> Column {
779 column.clone().array_agg()
780}
781
782pub fn transform_keys(column: &Column, key_expr: Expr) -> Column {
784 column.clone().transform_keys(key_expr)
785}
786
787pub fn transform_values(column: &Column, value_expr: Expr) -> Column {
789 column.clone().transform_values(value_expr)
790}
791
792pub fn str_to_map(
794 column: &Column,
795 pair_delim: Option<&str>,
796 key_value_delim: Option<&str>,
797) -> Column {
798 let pd = pair_delim.unwrap_or(",");
799 let kvd = key_value_delim.unwrap_or(":");
800 column.clone().str_to_map(pd, kvd)
801}
802
803pub fn regexp_extract(column: &Column, pattern: &str, group_index: usize) -> Column {
805 column.clone().regexp_extract(pattern, group_index)
806}
807
808pub fn regexp_replace(column: &Column, pattern: &str, replacement: &str) -> Column {
810 column.clone().regexp_replace(pattern, replacement)
811}
812
813pub fn split(column: &Column, delimiter: &str) -> Column {
815 column.clone().split(delimiter)
816}
817
818pub fn initcap(column: &Column) -> Column {
820 column.clone().initcap()
821}
822
823pub fn regexp_extract_all(column: &Column, pattern: &str) -> Column {
825 column.clone().regexp_extract_all(pattern)
826}
827
828pub fn regexp_like(column: &Column, pattern: &str) -> Column {
830 column.clone().regexp_like(pattern)
831}
832
833pub fn regexp_count(column: &Column, pattern: &str) -> Column {
835 column.clone().regexp_count(pattern)
836}
837
838pub fn regexp_substr(column: &Column, pattern: &str) -> Column {
840 column.clone().regexp_substr(pattern)
841}
842
843pub fn split_part(column: &Column, delimiter: &str, part_num: i64) -> Column {
845 column.clone().split_part(delimiter, part_num)
846}
847
848pub fn regexp_instr(column: &Column, pattern: &str, group_idx: Option<usize>) -> Column {
850 column.clone().regexp_instr(pattern, group_idx)
851}
852
853pub fn find_in_set(str_column: &Column, set_column: &Column) -> Column {
855 str_column.clone().find_in_set(set_column)
856}
857
858pub fn format_string(format: &str, columns: &[&Column]) -> Column {
860 use polars::prelude::*;
861 if columns.is_empty() {
862 panic!("format_string needs at least one column");
863 }
864 let format_owned = format.to_string();
865 let args: Vec<Expr> = columns.iter().skip(1).map(|c| c.expr().clone()).collect();
866 let expr = columns[0].expr().clone().map_many(
867 move |cols| crate::udfs::apply_format_string(cols, &format_owned),
868 &args,
869 GetOutput::from_type(DataType::String),
870 );
871 crate::column::Column::from_expr(expr, None)
872}
873
874pub fn printf(format: &str, columns: &[&Column]) -> Column {
876 format_string(format, columns)
877}
878
879pub fn repeat(column: &Column, n: i32) -> Column {
881 column.clone().repeat(n)
882}
883
884pub fn reverse(column: &Column) -> Column {
886 column.clone().reverse()
887}
888
889pub fn instr(column: &Column, substr: &str) -> Column {
891 column.clone().instr(substr)
892}
893
894pub fn position(substr: &str, column: &Column) -> Column {
896 column.clone().instr(substr)
897}
898
899pub fn ascii(column: &Column) -> Column {
901 column.clone().ascii()
902}
903
904pub fn format_number(column: &Column, decimals: u32) -> Column {
906 column.clone().format_number(decimals)
907}
908
909pub fn overlay(column: &Column, replace: &str, pos: i64, length: i64) -> Column {
911 column.clone().overlay(replace, pos, length)
912}
913
914pub fn char(column: &Column) -> Column {
916 column.clone().char()
917}
918
919pub fn chr(column: &Column) -> Column {
921 column.clone().chr()
922}
923
924pub fn base64(column: &Column) -> Column {
926 column.clone().base64()
927}
928
929pub fn unbase64(column: &Column) -> Column {
931 column.clone().unbase64()
932}
933
934pub fn sha1(column: &Column) -> Column {
936 column.clone().sha1()
937}
938
939pub fn sha2(column: &Column, bit_length: i32) -> Column {
941 column.clone().sha2(bit_length)
942}
943
944pub fn md5(column: &Column) -> Column {
946 column.clone().md5()
947}
948
949pub fn lpad(column: &Column, length: i32, pad: &str) -> Column {
951 column.clone().lpad(length, pad)
952}
953
954pub fn rpad(column: &Column, length: i32, pad: &str) -> Column {
956 column.clone().rpad(length, pad)
957}
958
959pub fn translate(column: &Column, from_str: &str, to_str: &str) -> Column {
961 column.clone().translate(from_str, to_str)
962}
963
964pub fn mask(
966 column: &Column,
967 upper_char: Option<char>,
968 lower_char: Option<char>,
969 digit_char: Option<char>,
970 other_char: Option<char>,
971) -> Column {
972 column
973 .clone()
974 .mask(upper_char, lower_char, digit_char, other_char)
975}
976
977pub fn substring_index(column: &Column, delimiter: &str, count: i64) -> Column {
979 column.clone().substring_index(delimiter, count)
980}
981
982pub fn left(column: &Column, n: i64) -> Column {
984 column.clone().left(n)
985}
986
987pub fn right(column: &Column, n: i64) -> Column {
989 column.clone().right(n)
990}
991
992pub fn replace(column: &Column, search: &str, replacement: &str) -> Column {
994 column.clone().replace(search, replacement)
995}
996
997pub fn startswith(column: &Column, prefix: &str) -> Column {
999 column.clone().startswith(prefix)
1000}
1001
1002pub fn endswith(column: &Column, suffix: &str) -> Column {
1004 column.clone().endswith(suffix)
1005}
1006
1007pub fn contains(column: &Column, substring: &str) -> Column {
1009 column.clone().contains(substring)
1010}
1011
1012pub fn like(column: &Column, pattern: &str, escape_char: Option<char>) -> Column {
1015 column.clone().like(pattern, escape_char)
1016}
1017
1018pub fn ilike(column: &Column, pattern: &str, escape_char: Option<char>) -> Column {
1021 column.clone().ilike(pattern, escape_char)
1022}
1023
1024pub fn rlike(column: &Column, pattern: &str) -> Column {
1026 column.clone().regexp_like(pattern)
1027}
1028
1029pub fn regexp(column: &Column, pattern: &str) -> Column {
1031 rlike(column, pattern)
1032}
1033
1034pub fn soundex(column: &Column) -> Column {
1036 column.clone().soundex()
1037}
1038
1039pub fn levenshtein(column: &Column, other: &Column) -> Column {
1041 column.clone().levenshtein(other)
1042}
1043
1044pub fn crc32(column: &Column) -> Column {
1046 column.clone().crc32()
1047}
1048
1049pub fn xxhash64(column: &Column) -> Column {
1051 column.clone().xxhash64()
1052}
1053
1054pub fn abs(column: &Column) -> Column {
1056 column.clone().abs()
1057}
1058
1059pub fn ceil(column: &Column) -> Column {
1061 column.clone().ceil()
1062}
1063
1064pub fn floor(column: &Column) -> Column {
1066 column.clone().floor()
1067}
1068
1069pub fn round(column: &Column, decimals: u32) -> Column {
1071 column.clone().round(decimals)
1072}
1073
1074pub fn bround(column: &Column, scale: i32) -> Column {
1076 column.clone().bround(scale)
1077}
1078
1079pub fn negate(column: &Column) -> Column {
1081 column.clone().negate()
1082}
1083
1084pub fn negative(column: &Column) -> Column {
1086 negate(column)
1087}
1088
1089pub fn positive(column: &Column) -> Column {
1091 column.clone()
1092}
1093
1094pub fn cot(column: &Column) -> Column {
1096 column.clone().cot()
1097}
1098
1099pub fn csc(column: &Column) -> Column {
1101 column.clone().csc()
1102}
1103
1104pub fn sec(column: &Column) -> Column {
1106 column.clone().sec()
1107}
1108
1109pub fn e() -> Column {
1111 Column::from_expr(lit(std::f64::consts::E), Some("e".to_string()))
1112}
1113
1114pub fn pi() -> Column {
1116 Column::from_expr(lit(std::f64::consts::PI), Some("pi".to_string()))
1117}
1118
1119pub fn sqrt(column: &Column) -> Column {
1121 column.clone().sqrt()
1122}
1123
1124pub fn pow(column: &Column, exp: i64) -> Column {
1126 column.clone().pow(exp)
1127}
1128
1129pub fn exp(column: &Column) -> Column {
1131 column.clone().exp()
1132}
1133
1134pub fn log(column: &Column) -> Column {
1136 column.clone().log()
1137}
1138
1139pub fn log_with_base(column: &Column, base: f64) -> Column {
1141 crate::column::Column::from_expr(column.expr().clone().log(base), None)
1142}
1143
1144pub fn sin(column: &Column) -> Column {
1146 column.clone().sin()
1147}
1148
1149pub fn cos(column: &Column) -> Column {
1151 column.clone().cos()
1152}
1153
1154pub fn tan(column: &Column) -> Column {
1156 column.clone().tan()
1157}
1158
1159pub fn asin(column: &Column) -> Column {
1161 column.clone().asin()
1162}
1163
1164pub fn acos(column: &Column) -> Column {
1166 column.clone().acos()
1167}
1168
1169pub fn atan(column: &Column) -> Column {
1171 column.clone().atan()
1172}
1173
1174pub fn atan2(y: &Column, x: &Column) -> Column {
1176 y.clone().atan2(x)
1177}
1178
1179pub fn degrees(column: &Column) -> Column {
1181 column.clone().degrees()
1182}
1183
1184pub fn radians(column: &Column) -> Column {
1186 column.clone().radians()
1187}
1188
1189pub fn signum(column: &Column) -> Column {
1191 column.clone().signum()
1192}
1193
1194pub fn sign(column: &Column) -> Column {
1196 signum(column)
1197}
1198
1199pub fn cast(column: &Column, type_name: &str) -> Result<Column, String> {
1201 let dtype = parse_type_name(type_name)?;
1202 Ok(Column::from_expr(
1203 column.expr().clone().strict_cast(dtype),
1204 None,
1205 ))
1206}
1207
1208pub fn try_cast(column: &Column, type_name: &str) -> Result<Column, String> {
1210 let dtype = parse_type_name(type_name)?;
1211 Ok(Column::from_expr(column.expr().clone().cast(dtype), None))
1212}
1213
1214pub fn to_char(column: &Column, format: Option<&str>) -> Result<Column, String> {
1218 match format {
1219 Some(fmt) => Ok(column
1220 .clone()
1221 .date_format(&crate::udfs::pyspark_format_to_chrono(fmt))),
1222 None => cast(column, "string"),
1223 }
1224}
1225
1226pub fn to_varchar(column: &Column, format: Option<&str>) -> Result<Column, String> {
1228 to_char(column, format)
1229}
1230
1231pub fn to_number(column: &Column, _format: Option<&str>) -> Result<Column, String> {
1234 cast(column, "double")
1235}
1236
1237pub fn try_to_number(column: &Column, _format: Option<&str>) -> Result<Column, String> {
1240 try_cast(column, "double")
1241}
1242
1243pub fn to_timestamp(column: &Column, format: Option<&str>) -> Result<Column, String> {
1245 use polars::prelude::{DataType, GetOutput, TimeUnit};
1246 match format {
1247 None => crate::cast(column, "timestamp"),
1248 Some(fmt) => {
1249 let fmt_owned = fmt.to_string();
1250 let expr = column.expr().clone().map(
1251 move |s| crate::udfs::apply_to_timestamp_format(s, Some(&fmt_owned), true),
1252 GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
1253 );
1254 Ok(crate::column::Column::from_expr(expr, None))
1255 }
1256 }
1257}
1258
1259pub fn try_to_timestamp(column: &Column, format: Option<&str>) -> Result<Column, String> {
1262 use polars::prelude::*;
1263 match format {
1264 None => try_cast(column, "timestamp"),
1265 Some(fmt) => {
1266 let fmt_owned = fmt.to_string();
1267 let expr = column.expr().clone().map(
1268 move |s| crate::udfs::apply_to_timestamp_format(s, Some(&fmt_owned), false),
1269 GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
1270 );
1271 Ok(crate::column::Column::from_expr(expr, None))
1272 }
1273 }
1274}
1275
1276pub fn to_timestamp_ltz(column: &Column, format: Option<&str>) -> Result<Column, String> {
1278 use polars::prelude::{DataType, GetOutput, TimeUnit};
1279 match format {
1280 None => crate::cast(column, "timestamp"),
1281 Some(fmt) => {
1282 let fmt_owned = fmt.to_string();
1283 let expr = column.expr().clone().map(
1284 move |s| crate::udfs::apply_to_timestamp_ltz_format(s, Some(&fmt_owned), true),
1285 GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
1286 );
1287 Ok(crate::column::Column::from_expr(expr, None))
1288 }
1289 }
1290}
1291
1292pub fn to_timestamp_ntz(column: &Column, format: Option<&str>) -> Result<Column, String> {
1294 use polars::prelude::{DataType, GetOutput, TimeUnit};
1295 match format {
1296 None => crate::cast(column, "timestamp"),
1297 Some(fmt) => {
1298 let fmt_owned = fmt.to_string();
1299 let expr = column.expr().clone().map(
1300 move |s| crate::udfs::apply_to_timestamp_ntz_format(s, Some(&fmt_owned), true),
1301 GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
1302 );
1303 Ok(crate::column::Column::from_expr(expr, None))
1304 }
1305 }
1306}
1307
1308pub fn try_divide(left: &Column, right: &Column) -> Column {
1310 use polars::prelude::*;
1311 let zero_cond = right.expr().clone().cast(DataType::Float64).eq(lit(0.0f64));
1312 let null_expr = Expr::Literal(LiteralValue::Null);
1313 let div_expr =
1314 left.expr().clone().cast(DataType::Float64) / right.expr().clone().cast(DataType::Float64);
1315 let expr = polars::prelude::when(zero_cond)
1316 .then(null_expr)
1317 .otherwise(div_expr);
1318 crate::column::Column::from_expr(expr, None)
1319}
1320
1321pub fn try_add(left: &Column, right: &Column) -> Column {
1323 let args = [right.expr().clone()];
1324 let expr =
1325 left.expr()
1326 .clone()
1327 .map_many(crate::udfs::apply_try_add, &args, GetOutput::same_type());
1328 Column::from_expr(expr, None)
1329}
1330
1331pub fn try_subtract(left: &Column, right: &Column) -> Column {
1333 let args = [right.expr().clone()];
1334 let expr = left.expr().clone().map_many(
1335 crate::udfs::apply_try_subtract,
1336 &args,
1337 GetOutput::same_type(),
1338 );
1339 Column::from_expr(expr, None)
1340}
1341
1342pub fn try_multiply(left: &Column, right: &Column) -> Column {
1344 let args = [right.expr().clone()];
1345 let expr = left.expr().clone().map_many(
1346 crate::udfs::apply_try_multiply,
1347 &args,
1348 GetOutput::same_type(),
1349 );
1350 Column::from_expr(expr, None)
1351}
1352
1353pub fn try_element_at(column: &Column, index: i64) -> Column {
1355 column.clone().element_at(index)
1356}
1357
1358pub fn width_bucket(value: &Column, min_val: f64, max_val: f64, num_bucket: i64) -> Column {
1360 if num_bucket <= 0 {
1361 panic!(
1362 "width_bucket: num_bucket must be positive, got {}",
1363 num_bucket
1364 );
1365 }
1366 use polars::prelude::*;
1367 let v = value.expr().clone().cast(DataType::Float64);
1368 let min_expr = lit(min_val);
1369 let max_expr = lit(max_val);
1370 let nb = num_bucket as f64;
1371 let width = (max_val - min_val) / nb;
1372 let bucket_expr = (v.clone() - min_expr.clone()) / lit(width);
1373 let floor_bucket = bucket_expr.floor().cast(DataType::Int64) + lit(1i64);
1374 let bucket_clamped = floor_bucket.clip(lit(1i64), lit(num_bucket));
1375 let expr = polars::prelude::when(v.clone().lt(min_expr))
1376 .then(lit(0i64))
1377 .when(v.gt_eq(max_expr))
1378 .then(lit(num_bucket + 1))
1379 .otherwise(bucket_clamped);
1380 crate::column::Column::from_expr(expr, None)
1381}
1382
1383pub fn elt(index: &Column, columns: &[&Column]) -> Column {
1385 use polars::prelude::*;
1386 if columns.is_empty() {
1387 panic!("elt requires at least one column");
1388 }
1389 let idx_expr = index.expr().clone();
1390 let null_expr = Expr::Literal(LiteralValue::Null);
1391 let mut expr = null_expr;
1392 for (i, c) in columns.iter().enumerate().rev() {
1393 let n = (i + 1) as i64;
1394 expr = polars::prelude::when(idx_expr.clone().eq(lit(n)))
1395 .then(c.expr().clone())
1396 .otherwise(expr);
1397 }
1398 crate::column::Column::from_expr(expr, None)
1399}
1400
1401pub fn bit_length(column: &Column) -> Column {
1403 column.clone().bit_length()
1404}
1405
1406pub fn octet_length(column: &Column) -> Column {
1408 column.clone().octet_length()
1409}
1410
1411pub fn char_length(column: &Column) -> Column {
1413 column.clone().char_length()
1414}
1415
1416pub fn character_length(column: &Column) -> Column {
1418 column.clone().character_length()
1419}
1420
1421pub fn typeof_(column: &Column) -> Column {
1423 column.clone().typeof_()
1424}
1425
1426pub fn isnan(column: &Column) -> Column {
1428 column.clone().is_nan()
1429}
1430
1431pub fn greatest(columns: &[&Column]) -> Result<Column, String> {
1433 if columns.is_empty() {
1434 return Err("greatest requires at least one column".to_string());
1435 }
1436 if columns.len() == 1 {
1437 return Ok((*columns[0]).clone());
1438 }
1439 let mut expr = columns[0].expr().clone();
1440 for c in columns.iter().skip(1) {
1441 let args = [c.expr().clone()];
1442 expr = expr.map_many(crate::udfs::apply_greatest2, &args, GetOutput::same_type());
1443 }
1444 Ok(Column::from_expr(expr, None))
1445}
1446
1447pub fn least(columns: &[&Column]) -> Result<Column, String> {
1449 if columns.is_empty() {
1450 return Err("least requires at least one column".to_string());
1451 }
1452 if columns.len() == 1 {
1453 return Ok((*columns[0]).clone());
1454 }
1455 let mut expr = columns[0].expr().clone();
1456 for c in columns.iter().skip(1) {
1457 let args = [c.expr().clone()];
1458 expr = expr.map_many(crate::udfs::apply_least2, &args, GetOutput::same_type());
1459 }
1460 Ok(Column::from_expr(expr, None))
1461}
1462
1463pub fn year(column: &Column) -> Column {
1465 column.clone().year()
1466}
1467
1468pub fn month(column: &Column) -> Column {
1470 column.clone().month()
1471}
1472
1473pub fn day(column: &Column) -> Column {
1475 column.clone().day()
1476}
1477
1478pub fn to_date(column: &Column) -> Column {
1480 column.clone().to_date()
1481}
1482
1483pub fn date_format(column: &Column, format: &str) -> Column {
1485 column
1486 .clone()
1487 .date_format(&crate::udfs::pyspark_format_to_chrono(format))
1488}
1489
1490pub fn current_date() -> Column {
1492 use polars::prelude::*;
1493 let today = chrono::Utc::now().date_naive();
1494 let days = (today - crate::date_utils::epoch_naive_date()).num_days() as i32;
1495 crate::column::Column::from_expr(Expr::Literal(LiteralValue::Date(days)), None)
1496}
1497
1498pub fn current_timestamp() -> Column {
1500 use polars::prelude::*;
1501 let ts = chrono::Utc::now().timestamp_micros();
1502 crate::column::Column::from_expr(
1503 Expr::Literal(LiteralValue::DateTime(ts, TimeUnit::Microseconds, None)),
1504 None,
1505 )
1506}
1507
1508pub fn curdate() -> Column {
1510 current_date()
1511}
1512
1513pub fn now() -> Column {
1515 current_timestamp()
1516}
1517
1518pub fn localtimestamp() -> Column {
1520 current_timestamp()
1521}
1522
1523pub fn date_diff(end: &Column, start: &Column) -> Column {
1525 datediff(end, start)
1526}
1527
1528pub fn dateadd(column: &Column, n: i32) -> Column {
1530 date_add(column, n)
1531}
1532
1533pub fn extract(column: &Column, field: &str) -> Column {
1535 column.clone().extract(field)
1536}
1537
1538pub fn date_part(column: &Column, field: &str) -> Column {
1540 extract(column, field)
1541}
1542
1543pub fn datepart(column: &Column, field: &str) -> Column {
1545 extract(column, field)
1546}
1547
1548pub fn unix_micros(column: &Column) -> Column {
1550 column.clone().unix_micros()
1551}
1552
1553pub fn unix_millis(column: &Column) -> Column {
1555 column.clone().unix_millis()
1556}
1557
1558pub fn unix_seconds(column: &Column) -> Column {
1560 column.clone().unix_seconds()
1561}
1562
1563pub fn dayname(column: &Column) -> Column {
1565 column.clone().dayname()
1566}
1567
1568pub fn weekday(column: &Column) -> Column {
1570 column.clone().weekday()
1571}
1572
1573pub fn hour(column: &Column) -> Column {
1575 column.clone().hour()
1576}
1577
1578pub fn minute(column: &Column) -> Column {
1580 column.clone().minute()
1581}
1582
1583pub fn second(column: &Column) -> Column {
1585 column.clone().second()
1586}
1587
1588pub fn date_add(column: &Column, n: i32) -> Column {
1590 column.clone().date_add(n)
1591}
1592
1593pub fn date_sub(column: &Column, n: i32) -> Column {
1595 column.clone().date_sub(n)
1596}
1597
1598pub fn datediff(end: &Column, start: &Column) -> Column {
1600 start.clone().datediff(end)
1601}
1602
1603pub fn last_day(column: &Column) -> Column {
1605 column.clone().last_day()
1606}
1607
1608pub fn trunc(column: &Column, format: &str) -> Column {
1610 column.clone().trunc(format)
1611}
1612
1613pub fn date_trunc(format: &str, column: &Column) -> Column {
1615 trunc(column, format)
1616}
1617
1618pub fn quarter(column: &Column) -> Column {
1620 column.clone().quarter()
1621}
1622
1623pub fn weekofyear(column: &Column) -> Column {
1625 column.clone().weekofyear()
1626}
1627
1628pub fn dayofweek(column: &Column) -> Column {
1630 column.clone().dayofweek()
1631}
1632
1633pub fn dayofyear(column: &Column) -> Column {
1635 column.clone().dayofyear()
1636}
1637
1638pub fn add_months(column: &Column, n: i32) -> Column {
1640 column.clone().add_months(n)
1641}
1642
1643pub fn months_between(end: &Column, start: &Column, round_off: bool) -> Column {
1646 end.clone().months_between(start, round_off)
1647}
1648
1649pub fn next_day(column: &Column, day_of_week: &str) -> Column {
1651 column.clone().next_day(day_of_week)
1652}
1653
1654pub fn unix_timestamp_now() -> Column {
1656 use polars::prelude::*;
1657 let secs = chrono::Utc::now().timestamp();
1658 crate::column::Column::from_expr(lit(secs), None)
1659}
1660
1661pub fn unix_timestamp(column: &Column, format: Option<&str>) -> Column {
1663 column.clone().unix_timestamp(format)
1664}
1665
1666pub fn to_unix_timestamp(column: &Column, format: Option<&str>) -> Column {
1668 unix_timestamp(column, format)
1669}
1670
1671pub fn from_unixtime(column: &Column, format: Option<&str>) -> Column {
1673 column.clone().from_unixtime(format)
1674}
1675
1676pub fn make_date(year: &Column, month: &Column, day: &Column) -> Column {
1678 use polars::prelude::*;
1679 let args = [month.expr().clone(), day.expr().clone()];
1680 let expr = year.expr().clone().map_many(
1681 crate::udfs::apply_make_date,
1682 &args,
1683 GetOutput::from_type(DataType::Date),
1684 );
1685 crate::column::Column::from_expr(expr, None)
1686}
1687
1688pub fn make_timestamp(
1691 year: &Column,
1692 month: &Column,
1693 day: &Column,
1694 hour: &Column,
1695 minute: &Column,
1696 sec: &Column,
1697 timezone: Option<&str>,
1698) -> Column {
1699 use polars::prelude::*;
1700 let tz_owned = timezone.map(|s| s.to_string());
1701 let args = [
1702 month.expr().clone(),
1703 day.expr().clone(),
1704 hour.expr().clone(),
1705 minute.expr().clone(),
1706 sec.expr().clone(),
1707 ];
1708 let expr = year.expr().clone().map_many(
1709 move |cols| crate::udfs::apply_make_timestamp(cols, tz_owned.as_deref()),
1710 &args,
1711 GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
1712 );
1713 crate::column::Column::from_expr(expr, None)
1714}
1715
1716pub fn timestampadd(unit: &str, amount: &Column, ts: &Column) -> Column {
1718 ts.clone().timestampadd(unit, amount)
1719}
1720
1721pub fn timestampdiff(unit: &str, start: &Column, end: &Column) -> Column {
1723 start.clone().timestampdiff(unit, end)
1724}
1725
1726pub fn days(n: i64) -> Column {
1728 make_interval(0, 0, 0, n, 0, 0, 0)
1729}
1730
1731pub fn hours(n: i64) -> Column {
1733 make_interval(0, 0, 0, 0, n, 0, 0)
1734}
1735
1736pub fn minutes(n: i64) -> Column {
1738 make_interval(0, 0, 0, 0, 0, n, 0)
1739}
1740
1741pub fn months(n: i64) -> Column {
1743 make_interval(0, n, 0, 0, 0, 0, 0)
1744}
1745
1746pub fn years(n: i64) -> Column {
1748 make_interval(n, 0, 0, 0, 0, 0, 0)
1749}
1750
1751pub fn from_utc_timestamp(column: &Column, tz: &str) -> Column {
1753 column.clone().from_utc_timestamp(tz)
1754}
1755
1756pub fn to_utc_timestamp(column: &Column, tz: &str) -> Column {
1758 column.clone().to_utc_timestamp(tz)
1759}
1760
1761pub fn convert_timezone(source_tz: &str, target_tz: &str, column: &Column) -> Column {
1763 let source_tz = source_tz.to_string();
1764 let target_tz = target_tz.to_string();
1765 let expr = column.expr().clone().map(
1766 move |s| crate::udfs::apply_convert_timezone(s, &source_tz, &target_tz),
1767 GetOutput::same_type(),
1768 );
1769 crate::column::Column::from_expr(expr, None)
1770}
1771
1772pub fn current_timezone() -> Column {
1774 use polars::prelude::*;
1775 crate::column::Column::from_expr(lit("UTC"), None)
1776}
1777
1778pub fn make_interval(
1780 years: i64,
1781 months: i64,
1782 weeks: i64,
1783 days: i64,
1784 hours: i64,
1785 mins: i64,
1786 secs: i64,
1787) -> Column {
1788 use polars::prelude::*;
1789 let total_days = years * 365 + months * 30 + weeks * 7 + days;
1791 let args = DurationArgs::new()
1792 .with_days(lit(total_days))
1793 .with_hours(lit(hours))
1794 .with_minutes(lit(mins))
1795 .with_seconds(lit(secs));
1796 let dur = duration(args);
1797 crate::column::Column::from_expr(dur, None)
1798}
1799
1800pub fn make_dt_interval(days: i64, hours: i64, minutes: i64, seconds: i64) -> Column {
1802 use polars::prelude::*;
1803 let args = DurationArgs::new()
1804 .with_days(lit(days))
1805 .with_hours(lit(hours))
1806 .with_minutes(lit(minutes))
1807 .with_seconds(lit(seconds));
1808 let dur = duration(args);
1809 crate::column::Column::from_expr(dur, None)
1810}
1811
1812pub fn make_ym_interval(years: i32, months: i32) -> Column {
1814 use polars::prelude::*;
1815 let total_months = years * 12 + months;
1816 crate::column::Column::from_expr(lit(total_months), None)
1817}
1818
1819pub fn make_timestamp_ntz(
1821 year: &Column,
1822 month: &Column,
1823 day: &Column,
1824 hour: &Column,
1825 minute: &Column,
1826 sec: &Column,
1827) -> Column {
1828 make_timestamp(year, month, day, hour, minute, sec, None)
1829}
1830
1831pub fn timestamp_seconds(column: &Column) -> Column {
1833 column.clone().timestamp_seconds()
1834}
1835
1836pub fn timestamp_millis(column: &Column) -> Column {
1838 column.clone().timestamp_millis()
1839}
1840
1841pub fn timestamp_micros(column: &Column) -> Column {
1843 column.clone().timestamp_micros()
1844}
1845
1846pub fn unix_date(column: &Column) -> Column {
1848 column.clone().unix_date()
1849}
1850
1851pub fn date_from_unix_date(column: &Column) -> Column {
1853 column.clone().date_from_unix_date()
1854}
1855
1856pub fn pmod(dividend: &Column, divisor: &Column) -> Column {
1858 dividend.clone().pmod(divisor)
1859}
1860
1861pub fn factorial(column: &Column) -> Column {
1863 column.clone().factorial()
1864}
1865
1866pub fn concat(columns: &[&Column]) -> Column {
1868 use polars::prelude::*;
1869 if columns.is_empty() {
1870 panic!("concat requires at least one column");
1871 }
1872 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
1873 crate::column::Column::from_expr(concat_str(&exprs, "", false), None)
1874}
1875
1876pub fn concat_ws(separator: &str, columns: &[&Column]) -> Column {
1878 use polars::prelude::*;
1879 if columns.is_empty() {
1880 panic!("concat_ws requires at least one column");
1881 }
1882 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
1883 crate::column::Column::from_expr(concat_str(&exprs, separator, false), None)
1884}
1885
1886pub fn row_number(column: &Column) -> Column {
1896 column.clone().row_number(false)
1897}
1898
1899pub fn rank(column: &Column, descending: bool) -> Column {
1901 column.clone().rank(descending)
1902}
1903
1904pub fn dense_rank(column: &Column, descending: bool) -> Column {
1906 column.clone().dense_rank(descending)
1907}
1908
1909pub fn lag(column: &Column, n: i64) -> Column {
1911 column.clone().lag(n)
1912}
1913
1914pub fn lead(column: &Column, n: i64) -> Column {
1916 column.clone().lead(n)
1917}
1918
1919pub fn first_value(column: &Column) -> Column {
1921 column.clone().first_value()
1922}
1923
1924pub fn last_value(column: &Column) -> Column {
1926 column.clone().last_value()
1927}
1928
1929pub fn percent_rank(column: &Column, partition_by: &[&str], descending: bool) -> Column {
1931 column.clone().percent_rank(partition_by, descending)
1932}
1933
1934pub fn cume_dist(column: &Column, partition_by: &[&str], descending: bool) -> Column {
1936 column.clone().cume_dist(partition_by, descending)
1937}
1938
1939pub fn ntile(column: &Column, n: u32, partition_by: &[&str], descending: bool) -> Column {
1941 column.clone().ntile(n, partition_by, descending)
1942}
1943
1944pub fn nth_value(column: &Column, n: i64, partition_by: &[&str], descending: bool) -> Column {
1946 column.clone().nth_value(n, partition_by, descending)
1947}
1948
1949pub fn coalesce(columns: &[&Column]) -> Column {
1959 use polars::prelude::*;
1960 if columns.is_empty() {
1961 panic!("coalesce requires at least one column");
1962 }
1963 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
1964 let expr = coalesce(&exprs);
1965 crate::column::Column::from_expr(expr, None)
1966}
1967
1968pub fn nvl(column: &Column, value: &Column) -> Column {
1970 coalesce(&[column, value])
1971}
1972
1973pub fn ifnull(column: &Column, value: &Column) -> Column {
1975 nvl(column, value)
1976}
1977
1978pub fn nullif(column: &Column, value: &Column) -> Column {
1980 use polars::prelude::*;
1981 let cond = column.expr().clone().eq(value.expr().clone());
1982 let null_lit = Expr::Literal(LiteralValue::Null);
1983 let expr = when(cond).then(null_lit).otherwise(column.expr().clone());
1984 crate::column::Column::from_expr(expr, None)
1985}
1986
1987pub fn nanvl(column: &Column, value: &Column) -> Column {
1989 use polars::prelude::*;
1990 let cond = column.expr().clone().is_nan();
1991 let expr = when(cond)
1992 .then(value.expr().clone())
1993 .otherwise(column.expr().clone());
1994 crate::column::Column::from_expr(expr, None)
1995}
1996
1997pub fn nvl2(col1: &Column, col2: &Column, col3: &Column) -> Column {
1999 use polars::prelude::*;
2000 let cond = col1.expr().clone().is_not_null();
2001 let expr = when(cond)
2002 .then(col2.expr().clone())
2003 .otherwise(col3.expr().clone());
2004 crate::column::Column::from_expr(expr, None)
2005}
2006
2007pub fn substr(column: &Column, start: i64, length: Option<i64>) -> Column {
2009 substring(column, start, length)
2010}
2011
2012pub fn power(column: &Column, exp: i64) -> Column {
2014 pow(column, exp)
2015}
2016
2017pub fn ln(column: &Column) -> Column {
2019 log(column)
2020}
2021
2022pub fn ceiling(column: &Column) -> Column {
2024 ceil(column)
2025}
2026
2027pub fn lcase(column: &Column) -> Column {
2029 lower(column)
2030}
2031
2032pub fn ucase(column: &Column) -> Column {
2034 upper(column)
2035}
2036
2037pub fn dayofmonth(column: &Column) -> Column {
2039 day(column)
2040}
2041
2042pub fn to_degrees(column: &Column) -> Column {
2044 degrees(column)
2045}
2046
2047pub fn to_radians(column: &Column) -> Column {
2049 radians(column)
2050}
2051
2052pub fn cosh(column: &Column) -> Column {
2054 column.clone().cosh()
2055}
2056pub fn sinh(column: &Column) -> Column {
2058 column.clone().sinh()
2059}
2060pub fn tanh(column: &Column) -> Column {
2062 column.clone().tanh()
2063}
2064pub fn acosh(column: &Column) -> Column {
2066 column.clone().acosh()
2067}
2068pub fn asinh(column: &Column) -> Column {
2070 column.clone().asinh()
2071}
2072pub fn atanh(column: &Column) -> Column {
2074 column.clone().atanh()
2075}
2076pub fn cbrt(column: &Column) -> Column {
2078 column.clone().cbrt()
2079}
2080pub fn expm1(column: &Column) -> Column {
2082 column.clone().expm1()
2083}
2084pub fn log1p(column: &Column) -> Column {
2086 column.clone().log1p()
2087}
2088pub fn log10(column: &Column) -> Column {
2090 column.clone().log10()
2091}
2092pub fn log2(column: &Column) -> Column {
2094 column.clone().log2()
2095}
2096pub fn rint(column: &Column) -> Column {
2098 column.clone().rint()
2099}
2100pub fn hypot(x: &Column, y: &Column) -> Column {
2102 let xx = x.expr().clone() * x.expr().clone();
2103 let yy = y.expr().clone() * y.expr().clone();
2104 crate::column::Column::from_expr((xx + yy).sqrt(), None)
2105}
2106
2107pub fn isnull(column: &Column) -> Column {
2109 column.clone().is_null()
2110}
2111
2112pub fn isnotnull(column: &Column) -> Column {
2114 column.clone().is_not_null()
2115}
2116
2117pub fn array(columns: &[&Column]) -> Result<crate::column::Column, PolarsError> {
2119 use polars::prelude::*;
2120 if columns.is_empty() {
2121 panic!("array requires at least one column");
2122 }
2123 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
2124 let expr = concat_list(exprs)
2125 .map_err(|e| PolarsError::ComputeError(format!("array concat_list: {e}").into()))?;
2126 Ok(crate::column::Column::from_expr(expr, None))
2127}
2128
2129pub fn array_size(column: &Column) -> Column {
2131 column.clone().array_size()
2132}
2133
2134pub fn size(column: &Column) -> Column {
2136 column.clone().array_size()
2137}
2138
2139pub fn cardinality(column: &Column) -> Column {
2141 column.clone().cardinality()
2142}
2143
2144pub fn array_contains(column: &Column, value: &Column) -> Column {
2146 column.clone().array_contains(value.expr().clone())
2147}
2148
2149pub fn array_join(column: &Column, separator: &str) -> Column {
2151 column.clone().array_join(separator)
2152}
2153
2154pub fn array_max(column: &Column) -> Column {
2156 column.clone().array_max()
2157}
2158
2159pub fn array_min(column: &Column) -> Column {
2161 column.clone().array_min()
2162}
2163
2164pub fn element_at(column: &Column, index: i64) -> Column {
2166 column.clone().element_at(index)
2167}
2168
2169pub fn array_sort(column: &Column) -> Column {
2171 column.clone().array_sort()
2172}
2173
2174pub fn array_distinct(column: &Column) -> Column {
2176 column.clone().array_distinct()
2177}
2178
2179pub fn array_slice(column: &Column, start: i64, length: Option<i64>) -> Column {
2181 column.clone().array_slice(start, length)
2182}
2183
2184pub fn sequence(start: &Column, stop: &Column, step: Option<&Column>) -> Column {
2187 use polars::prelude::{as_struct, lit, DataType, GetOutput};
2188 let step_expr = step
2189 .map(|c| c.expr().clone().alias("2"))
2190 .unwrap_or_else(|| lit(1i64).alias("2"));
2191 let struct_expr = as_struct(vec![
2192 start.expr().clone().alias("0"),
2193 stop.expr().clone().alias("1"),
2194 step_expr,
2195 ]);
2196 let out_dtype = DataType::List(Box::new(DataType::Int64));
2197 let expr = struct_expr.map(crate::udfs::apply_sequence, GetOutput::from_type(out_dtype));
2198 crate::column::Column::from_expr(expr, None)
2199}
2200
2201pub fn shuffle(column: &Column) -> Column {
2203 use polars::prelude::GetOutput;
2204 let expr = column
2205 .expr()
2206 .clone()
2207 .map(crate::udfs::apply_shuffle, GetOutput::same_type());
2208 crate::column::Column::from_expr(expr, None)
2209}
2210
2211pub fn inline(column: &Column) -> Column {
2214 column.clone().explode()
2215}
2216
2217pub fn inline_outer(column: &Column) -> Column {
2219 column.clone().explode_outer()
2220}
2221
2222pub fn explode(column: &Column) -> Column {
2224 column.clone().explode()
2225}
2226
2227pub fn array_position(column: &Column, value: &Column) -> Column {
2230 column.clone().array_position(value.expr().clone())
2231}
2232
2233pub fn array_compact(column: &Column) -> Column {
2235 column.clone().array_compact()
2236}
2237
2238pub fn array_remove(column: &Column, value: &Column) -> Column {
2241 column.clone().array_remove(value.expr().clone())
2242}
2243
2244pub fn array_repeat(column: &Column, n: i64) -> Column {
2246 column.clone().array_repeat(n)
2247}
2248
2249pub fn array_flatten(column: &Column) -> Column {
2251 column.clone().array_flatten()
2252}
2253
2254pub fn array_exists(column: &Column, predicate: Expr) -> Column {
2256 column.clone().array_exists(predicate)
2257}
2258
2259pub fn array_forall(column: &Column, predicate: Expr) -> Column {
2261 column.clone().array_forall(predicate)
2262}
2263
2264pub fn array_filter(column: &Column, predicate: Expr) -> Column {
2266 column.clone().array_filter(predicate)
2267}
2268
2269pub fn array_transform(column: &Column, f: Expr) -> Column {
2271 column.clone().array_transform(f)
2272}
2273
2274pub fn array_sum(column: &Column) -> Column {
2276 column.clone().array_sum()
2277}
2278
2279pub fn aggregate(column: &Column, zero: &Column) -> Column {
2281 column.clone().array_aggregate(zero)
2282}
2283
2284pub fn array_mean(column: &Column) -> Column {
2286 column.clone().array_mean()
2287}
2288
2289pub fn posexplode(column: &Column) -> (Column, Column) {
2292 column.clone().posexplode()
2293}
2294
2295pub fn create_map(key_values: &[&Column]) -> Result<Column, PolarsError> {
2298 use polars::prelude::{as_struct, concat_list};
2299 if key_values.is_empty() {
2300 panic!("create_map requires at least one key-value pair");
2301 }
2302 let mut struct_exprs: Vec<Expr> = Vec::new();
2303 for i in (0..key_values.len()).step_by(2) {
2304 if i + 1 < key_values.len() {
2305 let k = key_values[i].expr().clone().alias("key");
2306 let v = key_values[i + 1].expr().clone().alias("value");
2307 struct_exprs.push(as_struct(vec![k, v]));
2308 }
2309 }
2310 let expr = concat_list(struct_exprs)
2311 .map_err(|e| PolarsError::ComputeError(format!("create_map concat_list: {e}").into()))?;
2312 Ok(crate::column::Column::from_expr(expr, None))
2313}
2314
2315pub fn map_keys(column: &Column) -> Column {
2317 column.clone().map_keys()
2318}
2319
2320pub fn map_values(column: &Column) -> Column {
2322 column.clone().map_values()
2323}
2324
2325pub fn map_entries(column: &Column) -> Column {
2327 column.clone().map_entries()
2328}
2329
2330pub fn map_from_arrays(keys: &Column, values: &Column) -> Column {
2332 keys.clone().map_from_arrays(values)
2333}
2334
2335pub fn map_concat(a: &Column, b: &Column) -> Column {
2337 a.clone().map_concat(b)
2338}
2339
2340pub fn map_from_entries(column: &Column) -> Column {
2342 column.clone().map_from_entries()
2343}
2344
2345pub fn map_contains_key(map_col: &Column, key: &Column) -> Column {
2347 map_col.clone().map_contains_key(key)
2348}
2349
2350pub fn get(map_col: &Column, key: &Column) -> Column {
2352 map_col.clone().get(key)
2353}
2354
2355pub fn map_filter(map_col: &Column, predicate: Expr) -> Column {
2357 map_col.clone().map_filter(predicate)
2358}
2359
2360pub fn map_zip_with(map1: &Column, map2: &Column, merge: Expr) -> Column {
2362 map1.clone().map_zip_with(map2, merge)
2363}
2364
2365pub fn zip_with_coalesce(left: &Column, right: &Column) -> Column {
2367 use polars::prelude::col;
2368 let left_field = col("").struct_().field_by_name("left");
2369 let right_field = col("").struct_().field_by_name("right");
2370 let merge = crate::column::Column::from_expr(
2371 coalesce(&[
2372 &crate::column::Column::from_expr(left_field, None),
2373 &crate::column::Column::from_expr(right_field, None),
2374 ])
2375 .into_expr(),
2376 None,
2377 );
2378 left.clone().zip_with(right, merge.into_expr())
2379}
2380
2381pub fn map_zip_with_coalesce(map1: &Column, map2: &Column) -> Column {
2383 use polars::prelude::col;
2384 let v1 = col("").struct_().field_by_name("value1");
2385 let v2 = col("").struct_().field_by_name("value2");
2386 let merge = coalesce(&[
2387 &crate::column::Column::from_expr(v1, None),
2388 &crate::column::Column::from_expr(v2, None),
2389 ])
2390 .into_expr();
2391 map1.clone().map_zip_with(map2, merge)
2392}
2393
2394pub fn map_filter_value_gt(map_col: &Column, threshold: f64) -> Column {
2396 use polars::prelude::{col, lit};
2397 let pred = col("").struct_().field_by_name("value").gt(lit(threshold));
2398 map_col.clone().map_filter(pred)
2399}
2400
2401pub fn struct_(columns: &[&Column]) -> Column {
2403 use polars::prelude::as_struct;
2404 if columns.is_empty() {
2405 panic!("struct requires at least one column");
2406 }
2407 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
2408 crate::column::Column::from_expr(as_struct(exprs), None)
2409}
2410
2411pub fn named_struct(pairs: &[(&str, &Column)]) -> Column {
2413 use polars::prelude::as_struct;
2414 if pairs.is_empty() {
2415 panic!("named_struct requires at least one (name, column) pair");
2416 }
2417 let exprs: Vec<Expr> = pairs
2418 .iter()
2419 .map(|(name, col)| col.expr().clone().alias(*name))
2420 .collect();
2421 crate::column::Column::from_expr(as_struct(exprs), None)
2422}
2423
2424pub fn array_append(array: &Column, elem: &Column) -> Column {
2426 array.clone().array_append(elem)
2427}
2428
2429pub fn array_prepend(array: &Column, elem: &Column) -> Column {
2431 array.clone().array_prepend(elem)
2432}
2433
2434pub fn array_insert(array: &Column, pos: &Column, elem: &Column) -> Column {
2436 array.clone().array_insert(pos, elem)
2437}
2438
2439pub fn array_except(a: &Column, b: &Column) -> Column {
2441 a.clone().array_except(b)
2442}
2443
2444pub fn array_intersect(a: &Column, b: &Column) -> Column {
2446 a.clone().array_intersect(b)
2447}
2448
2449pub fn array_union(a: &Column, b: &Column) -> Column {
2451 a.clone().array_union(b)
2452}
2453
2454pub fn zip_with(left: &Column, right: &Column, merge: Expr) -> Column {
2456 left.clone().zip_with(right, merge)
2457}
2458
2459pub fn get_json_object(column: &Column, path: &str) -> Column {
2461 column.clone().get_json_object(path)
2462}
2463
2464pub fn json_object_keys(column: &Column) -> Column {
2466 column.clone().json_object_keys()
2467}
2468
2469pub fn json_tuple(column: &Column, keys: &[&str]) -> Column {
2471 column.clone().json_tuple(keys)
2472}
2473
2474pub fn from_csv(column: &Column) -> Column {
2476 column.clone().from_csv()
2477}
2478
2479pub fn to_csv(column: &Column) -> Column {
2481 column.clone().to_csv()
2482}
2483
2484pub fn schema_of_csv(_column: &Column) -> Column {
2486 Column::from_expr(
2487 lit("STRUCT<_c0: STRING, _c1: STRING>".to_string()),
2488 Some("schema_of_csv".to_string()),
2489 )
2490}
2491
2492pub fn schema_of_json(_column: &Column) -> Column {
2494 Column::from_expr(
2495 lit("STRUCT<>".to_string()),
2496 Some("schema_of_json".to_string()),
2497 )
2498}
2499
2500pub fn from_json(column: &Column, schema: Option<polars::datatypes::DataType>) -> Column {
2502 column.clone().from_json(schema)
2503}
2504
2505pub fn to_json(column: &Column) -> Column {
2507 column.clone().to_json()
2508}
2509
2510pub fn isin(column: &Column, other: &Column) -> Column {
2512 column.clone().isin(other)
2513}
2514
2515pub fn isin_i64(column: &Column, values: &[i64]) -> Column {
2517 let s = Series::from_iter(values.iter().cloned());
2518 Column::from_expr(column.expr().clone().is_in(lit(s)), None)
2519}
2520
2521pub fn isin_str(column: &Column, values: &[&str]) -> Column {
2523 let s: Series = Series::from_iter(values.iter().copied());
2524 Column::from_expr(column.expr().clone().is_in(lit(s)), None)
2525}
2526
2527pub fn url_decode(column: &Column) -> Column {
2529 column.clone().url_decode()
2530}
2531
2532pub fn url_encode(column: &Column) -> Column {
2534 column.clone().url_encode()
2535}
2536
2537pub fn shift_left(column: &Column, n: i32) -> Column {
2539 column.clone().shift_left(n)
2540}
2541
2542pub fn shift_right(column: &Column, n: i32) -> Column {
2544 column.clone().shift_right(n)
2545}
2546
2547pub fn shift_right_unsigned(column: &Column, n: i32) -> Column {
2549 column.clone().shift_right_unsigned(n)
2550}
2551
2552pub fn version() -> Column {
2554 Column::from_expr(
2555 lit(concat!("robin-sparkless-", env!("CARGO_PKG_VERSION"))),
2556 None,
2557 )
2558}
2559
2560pub fn equal_null(left: &Column, right: &Column) -> Column {
2562 left.clone().eq_null_safe(right)
2563}
2564
2565pub fn json_array_length(column: &Column, path: &str) -> Column {
2567 column.clone().json_array_length(path)
2568}
2569
2570pub fn parse_url(column: &Column, part: &str, key: Option<&str>) -> Column {
2573 column.clone().parse_url(part, key)
2574}
2575
2576pub fn hash(columns: &[&Column]) -> Column {
2578 use polars::prelude::*;
2579 if columns.is_empty() {
2580 return crate::column::Column::from_expr(lit(0i64), None);
2581 }
2582 if columns.len() == 1 {
2583 return columns[0].clone().hash();
2584 }
2585 let exprs: Vec<Expr> = columns.iter().map(|c| c.expr().clone()).collect();
2586 let struct_expr = polars::prelude::as_struct(exprs);
2587 let name = columns[0].name().to_string();
2588 let expr = struct_expr.map(
2589 crate::udfs::apply_hash_struct,
2590 GetOutput::from_type(DataType::Int64),
2591 );
2592 crate::column::Column::from_expr(expr, Some(name))
2593}
2594
2595pub fn stack(columns: &[&Column]) -> Column {
2597 struct_(columns)
2598}
2599
2600#[cfg(test)]
2601mod tests {
2602 use super::*;
2603 use polars::prelude::{df, IntoLazy};
2604
2605 #[test]
2606 fn test_col_creates_column() {
2607 let column = col("test");
2608 assert_eq!(column.name(), "test");
2609 }
2610
2611 #[test]
2612 fn test_lit_i32() {
2613 let column = lit_i32(42);
2614 assert_eq!(column.name(), "<expr>");
2616 }
2617
2618 #[test]
2619 fn test_lit_i64() {
2620 let column = lit_i64(123456789012345i64);
2621 assert_eq!(column.name(), "<expr>");
2622 }
2623
2624 #[test]
2625 fn test_lit_f64() {
2626 let column = lit_f64(std::f64::consts::PI);
2627 assert_eq!(column.name(), "<expr>");
2628 }
2629
2630 #[test]
2631 fn test_lit_bool() {
2632 let column = lit_bool(true);
2633 assert_eq!(column.name(), "<expr>");
2634 }
2635
2636 #[test]
2637 fn test_lit_str() {
2638 let column = lit_str("hello");
2639 assert_eq!(column.name(), "<expr>");
2640 }
2641
2642 #[test]
2643 fn test_count_aggregation() {
2644 let column = col("value");
2645 let result = count(&column);
2646 assert_eq!(result.name(), "count");
2647 }
2648
2649 #[test]
2650 fn test_sum_aggregation() {
2651 let column = col("value");
2652 let result = sum(&column);
2653 assert_eq!(result.name(), "sum");
2654 }
2655
2656 #[test]
2657 fn test_avg_aggregation() {
2658 let column = col("value");
2659 let result = avg(&column);
2660 assert_eq!(result.name(), "avg");
2661 }
2662
2663 #[test]
2664 fn test_max_aggregation() {
2665 let column = col("value");
2666 let result = max(&column);
2667 assert_eq!(result.name(), "max");
2668 }
2669
2670 #[test]
2671 fn test_min_aggregation() {
2672 let column = col("value");
2673 let result = min(&column);
2674 assert_eq!(result.name(), "min");
2675 }
2676
2677 #[test]
2678 fn test_when_then_otherwise() {
2679 let df = df!(
2681 "age" => &[15, 25, 35]
2682 )
2683 .unwrap();
2684
2685 let age_col = col("age");
2687 let condition = age_col.gt(polars::prelude::lit(18));
2688 let result = when(&condition)
2689 .then(&lit_str("adult"))
2690 .otherwise(&lit_str("minor"));
2691
2692 let result_df = df
2694 .lazy()
2695 .with_column(result.into_expr().alias("status"))
2696 .collect()
2697 .unwrap();
2698
2699 let status_col = result_df.column("status").unwrap();
2701 let values: Vec<Option<&str>> = status_col.str().unwrap().into_iter().collect();
2702
2703 assert_eq!(values[0], Some("minor")); assert_eq!(values[1], Some("adult")); assert_eq!(values[2], Some("adult")); }
2707
2708 #[test]
2709 fn test_coalesce_returns_first_non_null() {
2710 let df = df!(
2712 "a" => &[Some(1), None, None],
2713 "b" => &[None, Some(2), None],
2714 "c" => &[None, None, Some(3)]
2715 )
2716 .unwrap();
2717
2718 let col_a = col("a");
2719 let col_b = col("b");
2720 let col_c = col("c");
2721 let result = coalesce(&[&col_a, &col_b, &col_c]);
2722
2723 let result_df = df
2725 .lazy()
2726 .with_column(result.into_expr().alias("coalesced"))
2727 .collect()
2728 .unwrap();
2729
2730 let coalesced_col = result_df.column("coalesced").unwrap();
2732 let values: Vec<Option<i32>> = coalesced_col.i32().unwrap().into_iter().collect();
2733
2734 assert_eq!(values[0], Some(1)); assert_eq!(values[1], Some(2)); assert_eq!(values[2], Some(3)); }
2738
2739 #[test]
2740 fn test_coalesce_with_literal_fallback() {
2741 let df = df!(
2743 "a" => &[Some(1), None],
2744 "b" => &[None::<i32>, None::<i32>]
2745 )
2746 .unwrap();
2747
2748 let col_a = col("a");
2749 let col_b = col("b");
2750 let fallback = lit_i32(0);
2751 let result = coalesce(&[&col_a, &col_b, &fallback]);
2752
2753 let result_df = df
2755 .lazy()
2756 .with_column(result.into_expr().alias("coalesced"))
2757 .collect()
2758 .unwrap();
2759
2760 let coalesced_col = result_df.column("coalesced").unwrap();
2762 let values: Vec<Option<i32>> = coalesced_col.i32().unwrap().into_iter().collect();
2763
2764 assert_eq!(values[0], Some(1)); assert_eq!(values[1], Some(0)); }
2767
2768 #[test]
2769 #[should_panic(expected = "coalesce requires at least one column")]
2770 fn test_coalesce_empty_panics() {
2771 let columns: [&Column; 0] = [];
2772 let _ = coalesce(&columns);
2773 }
2774}