1use polars::prelude::{
2 col, lit, DataType, Expr, GetOutput, ListNameSpaceExtension, RankMethod, RankOptions, TimeUnit,
3};
4
5fn like_pattern_to_regex(pattern: &str, escape_char: Option<char>) -> String {
8 let mut out = String::with_capacity(pattern.len() * 2);
9 let mut it = pattern.chars();
10 while let Some(c) = it.next() {
11 if escape_char == Some(c) {
12 if let Some(next) = it.next() {
13 if "\\.*+?[](){}^$|".contains(next) {
15 out.push('\\');
16 }
17 out.push(next);
18 } else {
19 out.push('\\');
20 out.push(c);
21 }
22 } else {
23 match c {
24 '%' => out.push_str(".*"),
25 '_' => out.push('.'),
26 '\\' | '.' | '+' | '*' | '?' | '[' | ']' | '(' | ')' | '{' | '}' | '^' | '$'
27 | '|' => {
28 out.push('\\');
29 out.push(c);
30 }
31 _ => out.push(c),
32 }
33 }
34 }
35 format!("^{out}$")
36}
37
38#[derive(Debug, Clone, Copy)]
40pub enum DeferredRandom {
41 Rand(Option<u64>),
42 Randn(Option<u64>),
43}
44
45#[derive(Debug, Clone)]
49pub struct Column {
50 name: String,
51 expr: Expr, pub(crate) deferred: Option<DeferredRandom>,
54 pub(crate) udf_call: Option<(String, Vec<Column>)>,
56}
57
58impl Column {
59 pub fn new(name: String) -> Self {
61 Column {
62 name: name.clone(),
63 expr: col(&name),
64 deferred: None,
65 udf_call: None,
66 }
67 }
68
69 pub fn from_expr(expr: Expr, name: Option<String>) -> Self {
71 let display_name = name.unwrap_or_else(|| "<expr>".to_string());
72 Column {
73 name: display_name,
74 expr,
75 deferred: None,
76 udf_call: None,
77 }
78 }
79
80 pub fn from_udf_call(name: String, args: Vec<Column>) -> Self {
82 Column {
83 name: format!("{name}()"),
84 expr: lit(0i32), deferred: None,
86 udf_call: Some((name, args)),
87 }
88 }
89
90 pub fn from_rand(seed: Option<u64>) -> Self {
92 let expr = lit(1i64).cum_sum(false).map(
93 move |c| crate::udfs::apply_rand_with_seed(c, seed),
94 GetOutput::from_type(DataType::Float64),
95 );
96 Column {
97 name: "rand".to_string(),
98 expr,
99 deferred: Some(DeferredRandom::Rand(seed)),
100 udf_call: None,
101 }
102 }
103
104 pub fn from_randn(seed: Option<u64>) -> Self {
106 let expr = lit(1i64).cum_sum(false).map(
107 move |c| crate::udfs::apply_randn_with_seed(c, seed),
108 GetOutput::from_type(DataType::Float64),
109 );
110 Column {
111 name: "randn".to_string(),
112 expr,
113 deferred: Some(DeferredRandom::Randn(seed)),
114 udf_call: None,
115 }
116 }
117
118 pub fn expr(&self) -> &Expr {
120 &self.expr
121 }
122
123 pub fn into_expr(self) -> Expr {
125 self.expr
126 }
127
128 pub fn name(&self) -> &str {
130 &self.name
131 }
132
133 pub fn alias(&self, name: &str) -> Column {
135 Column {
136 name: name.to_string(),
137 expr: self.expr.clone().alias(name),
138 deferred: self.deferred,
139 udf_call: self.udf_call.clone(),
140 }
141 }
142
143 pub fn asc(&self) -> crate::functions::SortOrder {
145 crate::functions::asc(self)
146 }
147
148 pub fn asc_nulls_first(&self) -> crate::functions::SortOrder {
150 crate::functions::asc_nulls_first(self)
151 }
152
153 pub fn asc_nulls_last(&self) -> crate::functions::SortOrder {
155 crate::functions::asc_nulls_last(self)
156 }
157
158 pub fn desc(&self) -> crate::functions::SortOrder {
160 crate::functions::desc(self)
161 }
162
163 pub fn desc_nulls_first(&self) -> crate::functions::SortOrder {
165 crate::functions::desc_nulls_first(self)
166 }
167
168 pub fn desc_nulls_last(&self) -> crate::functions::SortOrder {
170 crate::functions::desc_nulls_last(self)
171 }
172
173 pub fn is_null(&self) -> Column {
175 Column {
176 name: format!("({} IS NULL)", self.name),
177 expr: self.expr.clone().is_null(),
178 deferred: None,
179 udf_call: None,
180 }
181 }
182
183 pub fn is_not_null(&self) -> Column {
185 Column {
186 name: format!("({} IS NOT NULL)", self.name),
187 expr: self.expr.clone().is_not_null(),
188 deferred: None,
189 udf_call: None,
190 }
191 }
192
193 pub fn isnull(&self) -> Column {
195 self.is_null()
196 }
197
198 pub fn isnotnull(&self) -> Column {
200 self.is_not_null()
201 }
202
203 fn null_boolean_expr() -> Expr {
205 use polars::prelude::*;
206 lit(NULL).cast(DataType::Boolean)
208 }
209
210 pub fn like(&self, pattern: &str, escape_char: Option<char>) -> Column {
213 let regex = like_pattern_to_regex(pattern, escape_char);
214 self.regexp_like(®ex)
215 }
216
217 pub fn ilike(&self, pattern: &str, escape_char: Option<char>) -> Column {
220 use polars::prelude::*;
221 let regex = format!("(?i){}", like_pattern_to_regex(pattern, escape_char));
222 Self::from_expr(self.expr().clone().str().contains(lit(regex), false), None)
223 }
224
225 pub fn eq_pyspark(&self, other: &Column) -> Column {
231 let left_null = self.expr().clone().is_null();
233 let right_null = other.expr().clone().is_null();
234 let either_null = left_null.clone().or(right_null.clone());
235
236 let eq_result = self.expr().clone().eq(other.expr().clone());
238
239 let null_boolean = Self::null_boolean_expr();
241 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
242 .then(&Self::from_expr(null_boolean, None))
243 .otherwise(&Self::from_expr(eq_result, None));
244
245 Self::from_expr(null_aware_expr.into_expr(), None)
246 }
247
248 pub fn ne_pyspark(&self, other: &Column) -> Column {
251 let left_null = self.expr().clone().is_null();
253 let right_null = other.expr().clone().is_null();
254 let either_null = left_null.clone().or(right_null.clone());
255
256 let ne_result = self.expr().clone().neq(other.expr().clone());
258
259 let null_boolean = Self::null_boolean_expr();
261 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
262 .then(&Self::from_expr(null_boolean, None))
263 .otherwise(&Self::from_expr(ne_result, None));
264
265 Self::from_expr(null_aware_expr.into_expr(), None)
266 }
267
268 pub fn eq_null_safe(&self, other: &Column) -> Column {
271 use crate::functions::{lit_bool, when};
272
273 let left_null = self.expr().clone().is_null();
274 let right_null = other.expr().clone().is_null();
275 let both_null = left_null.clone().and(right_null.clone());
276 let either_null = left_null.clone().or(right_null.clone());
277
278 let eq_result = self.expr().clone().eq(other.expr().clone());
280
281 when(&Self::from_expr(both_null, None))
285 .then(&lit_bool(true))
286 .otherwise(
287 &when(&Self::from_expr(either_null, None))
288 .then(&lit_bool(false))
289 .otherwise(&Self::from_expr(eq_result, None)),
290 )
291 }
292
293 pub fn gt_pyspark(&self, other: &Column) -> Column {
296 let left_null = self.expr().clone().is_null();
298 let right_null = other.expr().clone().is_null();
299 let either_null = left_null.clone().or(right_null.clone());
300
301 let gt_result = self.expr().clone().gt(other.expr().clone());
303
304 let null_boolean = Self::null_boolean_expr();
306 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
307 .then(&Self::from_expr(null_boolean, None))
308 .otherwise(&Self::from_expr(gt_result, None));
309
310 Self::from_expr(null_aware_expr.into_expr(), None)
311 }
312
313 pub fn ge_pyspark(&self, other: &Column) -> Column {
316 let left_null = self.expr().clone().is_null();
318 let right_null = other.expr().clone().is_null();
319 let either_null = left_null.clone().or(right_null.clone());
320
321 let ge_result = self.expr().clone().gt_eq(other.expr().clone());
323
324 let null_boolean = Self::null_boolean_expr();
326 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
327 .then(&Self::from_expr(null_boolean, None))
328 .otherwise(&Self::from_expr(ge_result, None));
329
330 Self::from_expr(null_aware_expr.into_expr(), None)
331 }
332
333 pub fn lt_pyspark(&self, other: &Column) -> Column {
336 let left_null = self.expr().clone().is_null();
338 let right_null = other.expr().clone().is_null();
339 let either_null = left_null.clone().or(right_null.clone());
340
341 let lt_result = self.expr().clone().lt(other.expr().clone());
343
344 let null_boolean = Self::null_boolean_expr();
346 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
347 .then(&Self::from_expr(null_boolean, None))
348 .otherwise(&Self::from_expr(lt_result, None));
349
350 Self::from_expr(null_aware_expr.into_expr(), None)
351 }
352
353 pub fn le_pyspark(&self, other: &Column) -> Column {
356 let left_null = self.expr().clone().is_null();
358 let right_null = other.expr().clone().is_null();
359 let either_null = left_null.clone().or(right_null.clone());
360
361 let le_result = self.expr().clone().lt_eq(other.expr().clone());
363
364 let null_boolean = Self::null_boolean_expr();
366 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
367 .then(&Self::from_expr(null_boolean, None))
368 .otherwise(&Self::from_expr(le_result, None));
369
370 Self::from_expr(null_aware_expr.into_expr(), None)
371 }
372
373 pub fn gt(&self, other: Expr) -> Column {
379 Self::from_expr(self.expr().clone().gt(other), None)
380 }
381
382 pub fn gt_eq(&self, other: Expr) -> Column {
384 Self::from_expr(self.expr().clone().gt_eq(other), None)
385 }
386
387 pub fn lt(&self, other: Expr) -> Column {
389 Self::from_expr(self.expr().clone().lt(other), None)
390 }
391
392 pub fn lt_eq(&self, other: Expr) -> Column {
394 Self::from_expr(self.expr().clone().lt_eq(other), None)
395 }
396
397 pub fn eq(&self, other: Expr) -> Column {
399 Self::from_expr(self.expr().clone().eq(other), None)
400 }
401
402 pub fn neq(&self, other: Expr) -> Column {
404 Self::from_expr(self.expr().clone().neq(other), None)
405 }
406
407 pub fn upper(&self) -> Column {
418 Self::from_expr(self.expr().clone().str().to_uppercase(), None)
419 }
420
421 pub fn lower(&self) -> Column {
423 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
424 }
425
426 pub fn lcase(&self) -> Column {
428 self.lower()
429 }
430
431 pub fn ucase(&self) -> Column {
433 self.upper()
434 }
435
436 pub fn substr(&self, start: i64, length: Option<i64>) -> Column {
438 use polars::prelude::*;
439 let offset = (start - 1).max(0);
440 let offset_expr = lit(offset);
441 let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX)); Self::from_expr(
443 self.expr().clone().str().slice(offset_expr, length_expr),
444 None,
445 )
446 }
447
448 pub fn length(&self) -> Column {
450 Self::from_expr(self.expr().clone().str().len_chars(), None)
451 }
452
453 pub fn bit_length(&self) -> Column {
455 use polars::prelude::*;
456 let len_bytes = self.expr().clone().str().len_bytes().cast(DataType::Int32);
457 Self::from_expr(len_bytes * lit(8i32), None)
458 }
459
460 pub fn octet_length(&self) -> Column {
462 use polars::prelude::*;
463 Self::from_expr(
464 self.expr().clone().str().len_bytes().cast(DataType::Int32),
465 None,
466 )
467 }
468
469 pub fn char_length(&self) -> Column {
471 self.length()
472 }
473
474 pub fn character_length(&self) -> Column {
476 self.length()
477 }
478
479 pub fn encode(&self, charset: &str) -> Column {
481 let charset = charset.to_string();
482 let expr = self.expr().clone().map(
483 move |s| crate::udfs::apply_encode(s, &charset),
484 GetOutput::from_type(DataType::String),
485 );
486 Self::from_expr(expr, None)
487 }
488
489 pub fn decode(&self, charset: &str) -> Column {
491 let charset = charset.to_string();
492 let expr = self.expr().clone().map(
493 move |s| crate::udfs::apply_decode(s, &charset),
494 GetOutput::from_type(DataType::String),
495 );
496 Self::from_expr(expr, None)
497 }
498
499 pub fn to_binary(&self, fmt: &str) -> Column {
501 let fmt = fmt.to_string();
502 let expr = self.expr().clone().map(
503 move |s| crate::udfs::apply_to_binary(s, &fmt),
504 GetOutput::from_type(DataType::String),
505 );
506 Self::from_expr(expr, None)
507 }
508
509 pub fn try_to_binary(&self, fmt: &str) -> Column {
511 let fmt = fmt.to_string();
512 let expr = self.expr().clone().map(
513 move |s| crate::udfs::apply_try_to_binary(s, &fmt),
514 GetOutput::from_type(DataType::String),
515 );
516 Self::from_expr(expr, None)
517 }
518
519 pub fn aes_encrypt(&self, key: &str) -> Column {
521 let key = key.to_string();
522 let expr = self.expr().clone().map(
523 move |s| crate::udfs::apply_aes_encrypt(s, &key),
524 GetOutput::from_type(DataType::String),
525 );
526 Self::from_expr(expr, None)
527 }
528
529 pub fn aes_decrypt(&self, key: &str) -> Column {
531 let key = key.to_string();
532 let expr = self.expr().clone().map(
533 move |s| crate::udfs::apply_aes_decrypt(s, &key),
534 GetOutput::from_type(DataType::String),
535 );
536 Self::from_expr(expr, None)
537 }
538
539 pub fn try_aes_decrypt(&self, key: &str) -> Column {
541 let key = key.to_string();
542 let expr = self.expr().clone().map(
543 move |s| crate::udfs::apply_try_aes_decrypt(s, &key),
544 GetOutput::from_type(DataType::String),
545 );
546 Self::from_expr(expr, None)
547 }
548
549 pub fn typeof_(&self) -> Column {
551 Self::from_expr(
552 self.expr().clone().map(
553 crate::udfs::apply_typeof,
554 GetOutput::from_type(DataType::String),
555 ),
556 None,
557 )
558 }
559
560 pub fn trim(&self) -> Column {
562 use polars::prelude::*;
563 Self::from_expr(self.expr().clone().str().strip_chars(lit(" \t\n\r")), None)
564 }
565
566 pub fn ltrim(&self) -> Column {
568 use polars::prelude::*;
569 Self::from_expr(
570 self.expr().clone().str().strip_chars_start(lit(" \t\n\r")),
571 None,
572 )
573 }
574
575 pub fn rtrim(&self) -> Column {
577 use polars::prelude::*;
578 Self::from_expr(
579 self.expr().clone().str().strip_chars_end(lit(" \t\n\r")),
580 None,
581 )
582 }
583
584 pub fn btrim(&self, trim_str: Option<&str>) -> Column {
586 use polars::prelude::*;
587 let chars = trim_str.unwrap_or(" \t\n\r");
588 Self::from_expr(self.expr().clone().str().strip_chars(lit(chars)), None)
589 }
590
591 pub fn locate(&self, substr: &str, pos: i64) -> Column {
593 use polars::prelude::*;
594 if substr.is_empty() {
595 return Self::from_expr(lit(1i64), None);
596 }
597 let start = (pos - 1).max(0);
598 let slice_expr = self.expr().clone().str().slice(lit(start), lit(i64::MAX));
599 let found = slice_expr.str().find_literal(lit(substr.to_string()));
600 Self::from_expr(
601 (found.cast(DataType::Int64) + lit(start + 1)).fill_null(lit(0i64)),
602 None,
603 )
604 }
605
606 pub fn conv(&self, from_base: i32, to_base: i32) -> Column {
608 let expr = self.expr().clone().map(
609 move |s| crate::udfs::apply_conv(s, from_base, to_base),
610 GetOutput::from_type(DataType::String),
611 );
612 Self::from_expr(expr, None)
613 }
614
615 pub fn hex(&self) -> Column {
617 let expr = self.expr().clone().map(
618 crate::udfs::apply_hex,
619 GetOutput::from_type(DataType::String),
620 );
621 Self::from_expr(expr, None)
622 }
623
624 pub fn unhex(&self) -> Column {
626 let expr = self.expr().clone().map(
627 crate::udfs::apply_unhex,
628 GetOutput::from_type(DataType::String),
629 );
630 Self::from_expr(expr, None)
631 }
632
633 pub fn bin(&self) -> Column {
635 let expr = self.expr().clone().map(
636 crate::udfs::apply_bin,
637 GetOutput::from_type(DataType::String),
638 );
639 Self::from_expr(expr, None)
640 }
641
642 pub fn getbit(&self, pos: i64) -> Column {
644 let expr = self.expr().clone().map(
645 move |s| crate::udfs::apply_getbit(s, pos),
646 GetOutput::from_type(DataType::Int64),
647 );
648 Self::from_expr(expr, None)
649 }
650
651 pub fn bit_and(&self, other: &Column) -> Column {
653 let args = [other.expr().clone()];
654 let expr = self.expr().clone().cast(DataType::Int64).map_many(
655 crate::udfs::apply_bit_and,
656 &args,
657 GetOutput::from_type(DataType::Int64),
658 );
659 Self::from_expr(expr, None)
660 }
661
662 pub fn bit_or(&self, other: &Column) -> Column {
664 let args = [other.expr().clone()];
665 let expr = self.expr().clone().cast(DataType::Int64).map_many(
666 crate::udfs::apply_bit_or,
667 &args,
668 GetOutput::from_type(DataType::Int64),
669 );
670 Self::from_expr(expr, None)
671 }
672
673 pub fn bit_xor(&self, other: &Column) -> Column {
675 let args = [other.expr().clone()];
676 let expr = self.expr().clone().cast(DataType::Int64).map_many(
677 crate::udfs::apply_bit_xor,
678 &args,
679 GetOutput::from_type(DataType::Int64),
680 );
681 Self::from_expr(expr, None)
682 }
683
684 pub fn bit_count(&self) -> Column {
686 let expr = self.expr().clone().map(
687 crate::udfs::apply_bit_count,
688 GetOutput::from_type(DataType::Int64),
689 );
690 Self::from_expr(expr, None)
691 }
692
693 pub fn assert_true(&self, err_msg: Option<&str>) -> Column {
696 let msg = err_msg.map(String::from);
697 let expr = self.expr().clone().map(
698 move |c| crate::udfs::apply_assert_true(c, msg.as_deref()),
699 GetOutput::same_type(),
700 );
701 Self::from_expr(expr, None)
702 }
703
704 pub fn bitwise_not(&self) -> Column {
706 let expr = (lit(-1i64) - self.expr().clone().cast(DataType::Int64)).cast(DataType::Int64);
708 Self::from_expr(expr, None)
709 }
710
711 pub fn str_to_map(&self, pair_delim: &str, key_value_delim: &str) -> Column {
713 let pair_delim = pair_delim.to_string();
714 let key_value_delim = key_value_delim.to_string();
715 let expr = self.expr().clone().map(
716 move |s| crate::udfs::apply_str_to_map(s, &pair_delim, &key_value_delim),
717 GetOutput::same_type(),
718 );
719 Self::from_expr(expr, None)
720 }
721
722 pub fn regexp_extract(&self, pattern: &str, group_index: usize) -> Column {
724 use polars::prelude::*;
725 let pat = pattern.to_string();
726 Self::from_expr(
727 self.expr().clone().str().extract(lit(pat), group_index),
728 None,
729 )
730 }
731
732 pub fn regexp_replace(&self, pattern: &str, replacement: &str) -> Column {
734 use polars::prelude::*;
735 let pat = pattern.to_string();
736 let rep = replacement.to_string();
737 Self::from_expr(
738 self.expr().clone().str().replace(lit(pat), lit(rep), false),
739 None,
740 )
741 }
742
743 pub fn left(&self, n: i64) -> Column {
745 use polars::prelude::*;
746 let len = n.max(0) as u32;
747 Self::from_expr(
748 self.expr().clone().str().slice(lit(0i64), lit(len as i64)),
749 None,
750 )
751 }
752
753 pub fn right(&self, n: i64) -> Column {
755 use polars::prelude::*;
756 let n_val = n.max(0);
757 let n_expr = lit(n_val);
758 let len_chars = self.expr().clone().str().len_chars().cast(DataType::Int64);
759 let start = when((len_chars.clone() - n_expr.clone()).lt_eq(lit(0i64)))
760 .then(lit(0i64))
761 .otherwise(len_chars - n_expr.clone());
762 Self::from_expr(self.expr().clone().str().slice(start, n_expr), None)
763 }
764
765 pub fn replace(&self, search: &str, replacement: &str) -> Column {
767 use polars::prelude::*;
768 Self::from_expr(
769 self.expr().clone().str().replace_all(
770 lit(search.to_string()),
771 lit(replacement.to_string()),
772 true,
773 ),
774 None,
775 )
776 }
777
778 pub fn startswith(&self, prefix: &str) -> Column {
780 use polars::prelude::*;
781 Self::from_expr(
782 self.expr()
783 .clone()
784 .str()
785 .starts_with(lit(prefix.to_string())),
786 None,
787 )
788 }
789
790 pub fn endswith(&self, suffix: &str) -> Column {
792 use polars::prelude::*;
793 Self::from_expr(
794 self.expr().clone().str().ends_with(lit(suffix.to_string())),
795 None,
796 )
797 }
798
799 pub fn contains(&self, substring: &str) -> Column {
801 use polars::prelude::*;
802 Self::from_expr(
803 self.expr()
804 .clone()
805 .str()
806 .contains(lit(substring.to_string()), true),
807 None,
808 )
809 }
810
811 pub fn split(&self, delimiter: &str) -> Column {
814 use polars::prelude::*;
815 Self::from_expr(
816 self.expr().clone().str().split(lit(delimiter.to_string())),
817 None,
818 )
819 }
820
821 pub fn initcap(&self) -> Column {
824 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
825 }
826
827 pub fn regexp_extract_all(&self, pattern: &str) -> Column {
829 use polars::prelude::*;
830 Self::from_expr(
831 self.expr()
832 .clone()
833 .str()
834 .extract_all(lit(pattern.to_string())),
835 None,
836 )
837 }
838
839 pub fn regexp_like(&self, pattern: &str) -> Column {
841 use polars::prelude::*;
842 Self::from_expr(
843 self.expr()
844 .clone()
845 .str()
846 .contains(lit(pattern.to_string()), false),
847 None,
848 )
849 }
850
851 pub fn regexp_count(&self, pattern: &str) -> Column {
853 use polars::prelude::*;
854 Self::from_expr(
855 self.expr()
856 .clone()
857 .str()
858 .count_matches(lit(pattern.to_string()), false)
859 .cast(DataType::Int64),
860 None,
861 )
862 }
863
864 pub fn regexp_substr(&self, pattern: &str) -> Column {
866 self.regexp_extract(pattern, 0)
867 }
868
869 pub fn regexp_instr(&self, pattern: &str, group_idx: Option<usize>) -> Column {
871 let idx = group_idx.unwrap_or(0);
872 let pattern = pattern.to_string();
873 let expr = self.expr().clone().map(
874 move |s| crate::udfs::apply_regexp_instr(s, pattern.clone(), idx),
875 GetOutput::from_type(DataType::Int64),
876 );
877 Self::from_expr(expr, None)
878 }
879
880 pub fn find_in_set(&self, set_column: &Column) -> Column {
882 let args = [set_column.expr().clone()];
883 let expr = self.expr().clone().map_many(
884 crate::udfs::apply_find_in_set,
885 &args,
886 GetOutput::from_type(DataType::Int64),
887 );
888 Self::from_expr(expr, None)
889 }
890
891 pub fn repeat(&self, n: i32) -> Column {
893 use polars::prelude::*;
894 Self::from_expr(
896 self.expr()
897 .clone()
898 .repeat_by(lit(n as u32))
899 .list()
900 .join(lit(""), false),
901 None,
902 )
903 }
904
905 pub fn reverse(&self) -> Column {
907 Self::from_expr(self.expr().clone().str().reverse(), None)
908 }
909
910 pub fn instr(&self, substr: &str) -> Column {
912 use polars::prelude::*;
913 let found = self
914 .expr()
915 .clone()
916 .str()
917 .find_literal(lit(substr.to_string()));
918 Self::from_expr(
920 (found.cast(DataType::Int64) + lit(1i64)).fill_null(lit(0i64)),
921 None,
922 )
923 }
924
925 pub fn lpad(&self, length: i32, pad: &str) -> Column {
927 let pad_str = if pad.is_empty() { " " } else { pad };
928 let fill = pad_str.chars().next().unwrap_or(' ');
929 Self::from_expr(
930 self.expr().clone().str().pad_start(length as usize, fill),
931 None,
932 )
933 }
934
935 pub fn rpad(&self, length: i32, pad: &str) -> Column {
937 let pad_str = if pad.is_empty() { " " } else { pad };
938 let fill = pad_str.chars().next().unwrap_or(' ');
939 Self::from_expr(
940 self.expr().clone().str().pad_end(length as usize, fill),
941 None,
942 )
943 }
944
945 pub fn translate(&self, from_str: &str, to_str: &str) -> Column {
947 use polars::prelude::*;
948 let mut e = self.expr().clone();
949 let from_chars: Vec<char> = from_str.chars().collect();
950 let to_chars: Vec<char> = to_str.chars().collect();
951 for (i, fc) in from_chars.iter().enumerate() {
952 let f = fc.to_string();
953 let t = to_chars
954 .get(i)
955 .map(|c| c.to_string())
956 .unwrap_or_else(String::new); e = e.str().replace_all(lit(f), lit(t), true);
958 }
959 Self::from_expr(e, None)
960 }
961
962 pub fn mask(
965 &self,
966 upper_char: Option<char>,
967 lower_char: Option<char>,
968 digit_char: Option<char>,
969 other_char: Option<char>,
970 ) -> Column {
971 use polars::prelude::*;
972 let upper = upper_char.unwrap_or('X').to_string();
973 let lower = lower_char.unwrap_or('x').to_string();
974 let digit = digit_char.unwrap_or('n').to_string();
975 let other = other_char.map(|c| c.to_string());
976 let mut e = self
977 .expr()
978 .clone()
979 .str()
980 .replace_all(lit("[A-Z]".to_string()), lit(upper), false)
981 .str()
982 .replace_all(lit("[a-z]".to_string()), lit(lower), false)
983 .str()
984 .replace_all(lit(r"\d".to_string()), lit(digit), false);
985 if let Some(o) = other {
986 e = e
987 .str()
988 .replace_all(lit("[^A-Za-z0-9]".to_string()), lit(o), false);
989 }
990 Self::from_expr(e, None)
991 }
992
993 pub fn split_part(&self, delimiter: &str, part_num: i64) -> Column {
996 use polars::prelude::*;
997 if part_num == 0 {
998 return Self::from_expr(Expr::Literal(LiteralValue::Null), None);
999 }
1000 let use_regex = delimiter == "|";
1001 if use_regex {
1002 let pattern = delimiter.to_string();
1003 let part = part_num;
1004 let get_expr = self.expr().clone().map(
1005 move |col| crate::udfs::apply_split_part_regex(col, &pattern, part),
1006 GetOutput::from_type(DataType::String),
1007 );
1008 let expr = when(self.expr().clone().is_null())
1009 .then(Expr::Literal(LiteralValue::Null))
1010 .otherwise(get_expr.fill_null(lit("")));
1011 return Self::from_expr(expr, None);
1012 }
1013 let delim = delimiter.to_string();
1014 let split_expr = self.expr().clone().str().split(lit(delim));
1015 let index = if part_num > 0 {
1016 lit(part_num - 1)
1017 } else {
1018 lit(part_num)
1019 };
1020 let get_expr = split_expr.list().get(index, true).fill_null(lit(""));
1021 let expr = when(self.expr().clone().is_null())
1022 .then(Expr::Literal(LiteralValue::Null))
1023 .otherwise(get_expr);
1024 Self::from_expr(expr, None)
1025 }
1026
1027 pub fn substring_index(&self, delimiter: &str, count: i64) -> Column {
1029 use polars::prelude::*;
1030 let delim = delimiter.to_string();
1031 let split_expr = self.expr().clone().str().split(lit(delim.clone()));
1032 let n = count.unsigned_abs() as i64;
1033 let expr = if count > 0 {
1034 split_expr
1035 .clone()
1036 .list()
1037 .slice(lit(0i64), lit(n))
1038 .list()
1039 .join(lit(delim), false)
1040 } else {
1041 let len = split_expr.clone().list().len();
1042 let start = when(len.clone().gt(lit(n)))
1043 .then(len.clone() - lit(n))
1044 .otherwise(lit(0i64));
1045 let slice_len = when(len.clone().gt(lit(n))).then(lit(n)).otherwise(len);
1046 split_expr
1047 .list()
1048 .slice(start, slice_len)
1049 .list()
1050 .join(lit(delim), false)
1051 };
1052 Self::from_expr(expr, None)
1053 }
1054
1055 pub fn soundex(&self) -> Column {
1057 let expr = self
1058 .expr()
1059 .clone()
1060 .map(crate::udfs::apply_soundex, GetOutput::same_type());
1061 Self::from_expr(expr, None)
1062 }
1063
1064 pub fn levenshtein(&self, other: &Column) -> Column {
1066 let args = [other.expr().clone()];
1067 let expr = self.expr().clone().map_many(
1068 crate::udfs::apply_levenshtein,
1069 &args,
1070 GetOutput::from_type(DataType::Int64),
1071 );
1072 Self::from_expr(expr, None)
1073 }
1074
1075 pub fn crc32(&self) -> Column {
1077 let expr = self.expr().clone().map(
1078 crate::udfs::apply_crc32,
1079 GetOutput::from_type(DataType::Int64),
1080 );
1081 Self::from_expr(expr, None)
1082 }
1083
1084 pub fn xxhash64(&self) -> Column {
1086 let expr = self.expr().clone().map(
1087 crate::udfs::apply_xxhash64,
1088 GetOutput::from_type(DataType::Int64),
1089 );
1090 Self::from_expr(expr, None)
1091 }
1092
1093 pub fn ascii(&self) -> Column {
1095 let expr = self.expr().clone().map(
1096 crate::udfs::apply_ascii,
1097 GetOutput::from_type(DataType::Int32),
1098 );
1099 Self::from_expr(expr, None)
1100 }
1101
1102 pub fn format_number(&self, decimals: u32) -> Column {
1104 let expr = self.expr().clone().map(
1105 move |s| crate::udfs::apply_format_number(s, decimals),
1106 GetOutput::from_type(DataType::String),
1107 );
1108 Self::from_expr(expr, None)
1109 }
1110
1111 pub fn char(&self) -> Column {
1113 let expr = self.expr().clone().map(
1114 crate::udfs::apply_char,
1115 GetOutput::from_type(DataType::String),
1116 );
1117 Self::from_expr(expr, None)
1118 }
1119
1120 pub fn chr(&self) -> Column {
1122 self.char()
1123 }
1124
1125 pub fn base64(&self) -> Column {
1127 let expr = self
1128 .expr()
1129 .clone()
1130 .map(crate::udfs::apply_base64, GetOutput::same_type());
1131 Self::from_expr(expr, None)
1132 }
1133
1134 pub fn unbase64(&self) -> Column {
1136 let expr = self
1137 .expr()
1138 .clone()
1139 .map(crate::udfs::apply_unbase64, GetOutput::same_type());
1140 Self::from_expr(expr, None)
1141 }
1142
1143 pub fn sha1(&self) -> Column {
1145 let expr = self
1146 .expr()
1147 .clone()
1148 .map(crate::udfs::apply_sha1, GetOutput::same_type());
1149 Self::from_expr(expr, None)
1150 }
1151
1152 pub fn sha2(&self, bit_length: i32) -> Column {
1154 let expr = self.expr().clone().map(
1155 move |s| crate::udfs::apply_sha2(s, bit_length),
1156 GetOutput::same_type(),
1157 );
1158 Self::from_expr(expr, None)
1159 }
1160
1161 pub fn md5(&self) -> Column {
1163 let expr = self
1164 .expr()
1165 .clone()
1166 .map(crate::udfs::apply_md5, GetOutput::same_type());
1167 Self::from_expr(expr, None)
1168 }
1169
1170 pub fn overlay(&self, replace: &str, pos: i64, length: i64) -> Column {
1172 use polars::prelude::*;
1173 let pos = pos.max(1);
1174 let replace_len = length.max(0);
1175 let start_left = 0i64;
1176 let len_left = (pos - 1).max(0);
1177 let start_right = (pos - 1 + replace_len).max(0);
1178 let len_right = 1_000_000i64; let left = self
1180 .expr()
1181 .clone()
1182 .str()
1183 .slice(lit(start_left), lit(len_left));
1184 let mid = lit(replace.to_string());
1185 let right = self
1186 .expr()
1187 .clone()
1188 .str()
1189 .slice(lit(start_right), lit(len_right));
1190 let exprs = [left, mid, right];
1191 let concat_expr = polars::prelude::concat_str(&exprs, "", false);
1192 Self::from_expr(concat_expr, None)
1193 }
1194
1195 pub fn abs(&self) -> Column {
1199 Self::from_expr(self.expr().clone().abs(), None)
1200 }
1201
1202 pub fn ceil(&self) -> Column {
1204 Self::from_expr(self.expr().clone().ceil(), None)
1205 }
1206
1207 pub fn ceiling(&self) -> Column {
1209 self.ceil()
1210 }
1211
1212 pub fn floor(&self) -> Column {
1214 Self::from_expr(self.expr().clone().floor(), None)
1215 }
1216
1217 pub fn round(&self, decimals: u32) -> Column {
1219 Self::from_expr(self.expr().clone().round(decimals), None)
1220 }
1221
1222 pub fn bround(&self, scale: i32) -> Column {
1224 let expr = self.expr().clone().map(
1225 move |s| crate::udfs::apply_bround(s, scale),
1226 GetOutput::from_type(DataType::Float64),
1227 );
1228 Self::from_expr(expr, None)
1229 }
1230
1231 pub fn negate(&self) -> Column {
1233 use polars::prelude::*;
1234 Self::from_expr(self.expr().clone() * lit(-1), None)
1235 }
1236
1237 pub fn multiply_pyspark(&self, other: &Column) -> Column {
1242 use polars::prelude::GetOutput;
1243 let args = [other.expr().clone()];
1244 let expr = self.expr().clone().map_many(
1245 crate::udfs::apply_pyspark_multiply,
1246 &args,
1247 GetOutput::from_type(DataType::Float64),
1248 );
1249 Self::from_expr(expr, None)
1250 }
1251
1252 pub fn add_pyspark(&self, other: &Column) -> Column {
1254 use polars::prelude::GetOutput;
1255 let args = [other.expr().clone()];
1256 let expr = self.expr().clone().map_many(
1257 crate::udfs::apply_pyspark_add,
1258 &args,
1259 GetOutput::from_type(DataType::Float64),
1260 );
1261 Self::from_expr(expr, None)
1262 }
1263
1264 pub fn subtract_pyspark(&self, other: &Column) -> Column {
1266 use polars::prelude::GetOutput;
1267 let args = [other.expr().clone()];
1268 let expr = self.expr().clone().map_many(
1269 crate::udfs::apply_pyspark_subtract,
1270 &args,
1271 GetOutput::from_type(DataType::Float64),
1272 );
1273 Self::from_expr(expr, None)
1274 }
1275
1276 pub fn divide_pyspark(&self, other: &Column) -> Column {
1278 use polars::prelude::GetOutput;
1279 let args = [other.expr().clone()];
1280 let expr = self.expr().clone().map_many(
1281 crate::udfs::apply_pyspark_divide,
1282 &args,
1283 GetOutput::from_type(DataType::Float64),
1284 );
1285 Self::from_expr(expr, None)
1286 }
1287
1288 pub fn mod_pyspark(&self, other: &Column) -> Column {
1290 use polars::prelude::GetOutput;
1291 let args = [other.expr().clone()];
1292 let expr = self.expr().clone().map_many(
1293 crate::udfs::apply_pyspark_mod,
1294 &args,
1295 GetOutput::from_type(DataType::Float64),
1296 );
1297 Self::from_expr(expr, None)
1298 }
1299
1300 pub fn multiply(&self, other: &Column) -> Column {
1302 Self::from_expr(self.expr().clone() * other.expr().clone(), None)
1303 }
1304
1305 pub fn add(&self, other: &Column) -> Column {
1307 Self::from_expr(self.expr().clone() + other.expr().clone(), None)
1308 }
1309
1310 pub fn subtract(&self, other: &Column) -> Column {
1312 Self::from_expr(self.expr().clone() - other.expr().clone(), None)
1313 }
1314
1315 pub fn divide(&self, other: &Column) -> Column {
1317 Self::from_expr(self.expr().clone() / other.expr().clone(), None)
1318 }
1319
1320 pub fn mod_(&self, other: &Column) -> Column {
1322 Self::from_expr(self.expr().clone() % other.expr().clone(), None)
1323 }
1324
1325 pub fn sqrt(&self) -> Column {
1327 Self::from_expr(self.expr().clone().sqrt(), None)
1328 }
1329
1330 pub fn pow(&self, exp: i64) -> Column {
1332 use polars::prelude::*;
1333 Self::from_expr(self.expr().clone().pow(lit(exp)), None)
1334 }
1335
1336 pub fn power(&self, exp: i64) -> Column {
1338 self.pow(exp)
1339 }
1340
1341 pub fn exp(&self) -> Column {
1343 Self::from_expr(self.expr().clone().exp(), None)
1344 }
1345
1346 pub fn log(&self) -> Column {
1348 Self::from_expr(self.expr().clone().log(std::f64::consts::E), None)
1349 }
1350
1351 pub fn ln(&self) -> Column {
1353 self.log()
1354 }
1355
1356 pub fn sin(&self) -> Column {
1358 let expr = self.expr().clone().map(
1359 crate::udfs::apply_sin,
1360 GetOutput::from_type(DataType::Float64),
1361 );
1362 Self::from_expr(expr, None)
1363 }
1364
1365 pub fn cos(&self) -> Column {
1367 let expr = self.expr().clone().map(
1368 crate::udfs::apply_cos,
1369 GetOutput::from_type(DataType::Float64),
1370 );
1371 Self::from_expr(expr, None)
1372 }
1373
1374 pub fn tan(&self) -> Column {
1376 let expr = self.expr().clone().map(
1377 crate::udfs::apply_tan,
1378 GetOutput::from_type(DataType::Float64),
1379 );
1380 Self::from_expr(expr, None)
1381 }
1382
1383 pub fn cot(&self) -> Column {
1385 let expr = self.expr().clone().map(
1386 crate::udfs::apply_cot,
1387 GetOutput::from_type(DataType::Float64),
1388 );
1389 Self::from_expr(expr, None)
1390 }
1391
1392 pub fn csc(&self) -> Column {
1394 let expr = self.expr().clone().map(
1395 crate::udfs::apply_csc,
1396 GetOutput::from_type(DataType::Float64),
1397 );
1398 Self::from_expr(expr, None)
1399 }
1400
1401 pub fn sec(&self) -> Column {
1403 let expr = self.expr().clone().map(
1404 crate::udfs::apply_sec,
1405 GetOutput::from_type(DataType::Float64),
1406 );
1407 Self::from_expr(expr, None)
1408 }
1409
1410 pub fn asin(&self) -> Column {
1412 let expr = self.expr().clone().map(
1413 crate::udfs::apply_asin,
1414 GetOutput::from_type(DataType::Float64),
1415 );
1416 Self::from_expr(expr, None)
1417 }
1418
1419 pub fn acos(&self) -> Column {
1421 let expr = self.expr().clone().map(
1422 crate::udfs::apply_acos,
1423 GetOutput::from_type(DataType::Float64),
1424 );
1425 Self::from_expr(expr, None)
1426 }
1427
1428 pub fn atan(&self) -> Column {
1430 let expr = self.expr().clone().map(
1431 crate::udfs::apply_atan,
1432 GetOutput::from_type(DataType::Float64),
1433 );
1434 Self::from_expr(expr, None)
1435 }
1436
1437 pub fn atan2(&self, x: &Column) -> Column {
1439 let args = [x.expr().clone()];
1440 let expr = self.expr().clone().map_many(
1441 crate::udfs::apply_atan2,
1442 &args,
1443 GetOutput::from_type(DataType::Float64),
1444 );
1445 Self::from_expr(expr, None)
1446 }
1447
1448 pub fn degrees(&self) -> Column {
1450 let expr = self.expr().clone().map(
1451 crate::udfs::apply_degrees,
1452 GetOutput::from_type(DataType::Float64),
1453 );
1454 Self::from_expr(expr, None)
1455 }
1456
1457 pub fn to_degrees(&self) -> Column {
1459 self.degrees()
1460 }
1461
1462 pub fn radians(&self) -> Column {
1464 let expr = self.expr().clone().map(
1465 crate::udfs::apply_radians,
1466 GetOutput::from_type(DataType::Float64),
1467 );
1468 Self::from_expr(expr, None)
1469 }
1470
1471 pub fn to_radians(&self) -> Column {
1473 self.radians()
1474 }
1475
1476 pub fn signum(&self) -> Column {
1478 let expr = self.expr().clone().map(
1479 crate::udfs::apply_signum,
1480 GetOutput::from_type(DataType::Float64),
1481 );
1482 Self::from_expr(expr, None)
1483 }
1484
1485 pub fn cosh(&self) -> Column {
1487 let expr = self.expr().clone().map(
1488 crate::udfs::apply_cosh,
1489 GetOutput::from_type(DataType::Float64),
1490 );
1491 Self::from_expr(expr, None)
1492 }
1493 pub fn sinh(&self) -> Column {
1495 let expr = self.expr().clone().map(
1496 crate::udfs::apply_sinh,
1497 GetOutput::from_type(DataType::Float64),
1498 );
1499 Self::from_expr(expr, None)
1500 }
1501 pub fn tanh(&self) -> Column {
1503 let expr = self.expr().clone().map(
1504 crate::udfs::apply_tanh,
1505 GetOutput::from_type(DataType::Float64),
1506 );
1507 Self::from_expr(expr, None)
1508 }
1509 pub fn acosh(&self) -> Column {
1511 let expr = self.expr().clone().map(
1512 crate::udfs::apply_acosh,
1513 GetOutput::from_type(DataType::Float64),
1514 );
1515 Self::from_expr(expr, None)
1516 }
1517 pub fn asinh(&self) -> Column {
1519 let expr = self.expr().clone().map(
1520 crate::udfs::apply_asinh,
1521 GetOutput::from_type(DataType::Float64),
1522 );
1523 Self::from_expr(expr, None)
1524 }
1525 pub fn atanh(&self) -> Column {
1527 let expr = self.expr().clone().map(
1528 crate::udfs::apply_atanh,
1529 GetOutput::from_type(DataType::Float64),
1530 );
1531 Self::from_expr(expr, None)
1532 }
1533 pub fn cbrt(&self) -> Column {
1535 let expr = self.expr().clone().map(
1536 crate::udfs::apply_cbrt,
1537 GetOutput::from_type(DataType::Float64),
1538 );
1539 Self::from_expr(expr, None)
1540 }
1541 pub fn expm1(&self) -> Column {
1543 let expr = self.expr().clone().map(
1544 crate::udfs::apply_expm1,
1545 GetOutput::from_type(DataType::Float64),
1546 );
1547 Self::from_expr(expr, None)
1548 }
1549 pub fn log1p(&self) -> Column {
1551 let expr = self.expr().clone().map(
1552 crate::udfs::apply_log1p,
1553 GetOutput::from_type(DataType::Float64),
1554 );
1555 Self::from_expr(expr, None)
1556 }
1557 pub fn log10(&self) -> Column {
1559 let expr = self.expr().clone().map(
1560 crate::udfs::apply_log10,
1561 GetOutput::from_type(DataType::Float64),
1562 );
1563 Self::from_expr(expr, None)
1564 }
1565 pub fn log2(&self) -> Column {
1567 let expr = self.expr().clone().map(
1568 crate::udfs::apply_log2,
1569 GetOutput::from_type(DataType::Float64),
1570 );
1571 Self::from_expr(expr, None)
1572 }
1573 pub fn rint(&self) -> Column {
1575 let expr = self.expr().clone().map(
1576 crate::udfs::apply_rint,
1577 GetOutput::from_type(DataType::Float64),
1578 );
1579 Self::from_expr(expr, None)
1580 }
1581
1582 pub fn hypot(&self, other: &Column) -> Column {
1584 let xx = self.expr().clone() * self.expr().clone();
1585 let yy = other.expr().clone() * other.expr().clone();
1586 Self::from_expr((xx + yy).sqrt(), None)
1587 }
1588
1589 pub fn cast_to(&self, type_name: &str) -> Result<Column, String> {
1591 crate::functions::cast(self, type_name)
1592 }
1593
1594 pub fn try_cast_to(&self, type_name: &str) -> Result<Column, String> {
1596 crate::functions::try_cast(self, type_name)
1597 }
1598
1599 pub fn is_nan(&self) -> Column {
1601 Self::from_expr(self.expr().clone().is_nan(), None)
1602 }
1603
1604 pub fn year(&self) -> Column {
1608 Self::from_expr(self.expr().clone().dt().year(), None)
1609 }
1610
1611 pub fn month(&self) -> Column {
1613 Self::from_expr(self.expr().clone().dt().month(), None)
1614 }
1615
1616 pub fn day(&self) -> Column {
1618 Self::from_expr(self.expr().clone().dt().day(), None)
1619 }
1620
1621 pub fn dayofmonth(&self) -> Column {
1623 self.day()
1624 }
1625
1626 pub fn quarter(&self) -> Column {
1628 Self::from_expr(self.expr().clone().dt().quarter(), None)
1629 }
1630
1631 pub fn weekofyear(&self) -> Column {
1633 Self::from_expr(self.expr().clone().dt().week(), None)
1634 }
1635
1636 pub fn week(&self) -> Column {
1638 self.weekofyear()
1639 }
1640
1641 pub fn dayofweek(&self) -> Column {
1644 let w = self.expr().clone().dt().weekday();
1645 let dayofweek = (w % lit(7i32)) + lit(1i32); Self::from_expr(dayofweek, None)
1647 }
1648
1649 pub fn dayofyear(&self) -> Column {
1651 Self::from_expr(
1652 self.expr().clone().dt().ordinal_day().cast(DataType::Int32),
1653 None,
1654 )
1655 }
1656
1657 pub fn to_date(&self) -> Column {
1659 use polars::prelude::DataType;
1660 Self::from_expr(self.expr().clone().cast(DataType::Date), None)
1661 }
1662
1663 pub fn date_format(&self, format: &str) -> Column {
1665 Self::from_expr(self.expr().clone().dt().strftime(format), None)
1666 }
1667
1668 pub fn hour(&self) -> Column {
1670 Self::from_expr(self.expr().clone().dt().hour(), None)
1671 }
1672
1673 pub fn minute(&self) -> Column {
1675 Self::from_expr(self.expr().clone().dt().minute(), None)
1676 }
1677
1678 pub fn second(&self) -> Column {
1680 Self::from_expr(self.expr().clone().dt().second(), None)
1681 }
1682
1683 pub fn extract(&self, field: &str) -> Column {
1685 use polars::prelude::*;
1686 let e = self.expr().clone();
1687 let expr = match field.trim().to_lowercase().as_str() {
1688 "year" => e.dt().year(),
1689 "month" => e.dt().month(),
1690 "day" => e.dt().day(),
1691 "hour" => e.dt().hour(),
1692 "minute" => e.dt().minute(),
1693 "second" => e.dt().second(),
1694 "quarter" => e.dt().quarter(),
1695 "week" | "weekofyear" => e.dt().week(),
1696 "dayofweek" | "dow" => {
1697 let w = e.dt().weekday();
1698 (w % lit(7i32)) + lit(1i32)
1699 }
1700 "dayofyear" | "doy" => e.dt().ordinal_day().cast(DataType::Int32),
1701 _ => e.dt().year(), };
1703 Self::from_expr(expr, None)
1704 }
1705
1706 pub fn unix_micros(&self) -> Column {
1708 use polars::prelude::*;
1709 Self::from_expr(self.expr().clone().cast(DataType::Int64), None)
1710 }
1711
1712 pub fn unix_millis(&self) -> Column {
1714 use polars::prelude::*;
1715 let micros = self.expr().clone().cast(DataType::Int64);
1716 Self::from_expr(micros / lit(1000i64), None)
1717 }
1718
1719 pub fn unix_seconds(&self) -> Column {
1721 use polars::prelude::*;
1722 let micros = self.expr().clone().cast(DataType::Int64);
1723 Self::from_expr(micros / lit(1_000_000i64), None)
1724 }
1725
1726 pub fn dayname(&self) -> Column {
1728 let expr = self.expr().clone().map(
1729 crate::udfs::apply_dayname,
1730 GetOutput::from_type(DataType::String),
1731 );
1732 Self::from_expr(expr, None)
1733 }
1734
1735 pub fn weekday(&self) -> Column {
1737 let expr = self.expr().clone().map(
1738 crate::udfs::apply_weekday,
1739 GetOutput::from_type(DataType::Int32),
1740 );
1741 Self::from_expr(expr, None)
1742 }
1743
1744 pub fn date_add(&self, n: i32) -> Column {
1746 use polars::prelude::*;
1747 let date_expr = self.expr().clone().cast(DataType::Date);
1748 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1749 Self::from_expr(date_expr + dur, None)
1750 }
1751
1752 pub fn date_sub(&self, n: i32) -> Column {
1754 use polars::prelude::*;
1755 let date_expr = self.expr().clone().cast(DataType::Date);
1756 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1757 Self::from_expr(date_expr - dur, None)
1758 }
1759
1760 pub fn datediff(&self, other: &Column) -> Column {
1762 use polars::prelude::*;
1763 let start = self.expr().clone().cast(DataType::Date);
1764 let end = other.expr().clone().cast(DataType::Date);
1765 Self::from_expr((end - start).dt().total_days(), None)
1766 }
1767
1768 pub fn last_day(&self) -> Column {
1770 Self::from_expr(self.expr().clone().dt().month_end(), None)
1771 }
1772
1773 pub fn timestampadd(&self, unit: &str, amount: &Column) -> Column {
1775 use polars::prelude::*;
1776 let ts = self.expr().clone();
1777 let amt = amount.expr().clone().cast(DataType::Int64);
1778 let dur = match unit.trim().to_uppercase().as_str() {
1779 "DAY" | "DAYS" => duration(DurationArgs::new().with_days(amt)),
1780 "HOUR" | "HOURS" => duration(DurationArgs::new().with_hours(amt)),
1781 "MINUTE" | "MINUTES" => duration(DurationArgs::new().with_minutes(amt)),
1782 "SECOND" | "SECONDS" => duration(DurationArgs::new().with_seconds(amt)),
1783 "WEEK" | "WEEKS" => duration(DurationArgs::new().with_weeks(amt)),
1784 _ => duration(DurationArgs::new().with_days(amt)),
1785 };
1786 Self::from_expr(ts + dur, None)
1787 }
1788
1789 pub fn timestampdiff(&self, unit: &str, other: &Column) -> Column {
1791 let start = self.expr().clone();
1792 let end = other.expr().clone();
1793 let diff = end - start;
1794 let expr = match unit.trim().to_uppercase().as_str() {
1795 "HOUR" | "HOURS" => diff.dt().total_hours(),
1796 "MINUTE" | "MINUTES" => diff.dt().total_minutes(),
1797 "SECOND" | "SECONDS" => diff.dt().total_seconds(),
1798 "DAY" | "DAYS" => diff.dt().total_days(),
1799 _ => diff.dt().total_days(),
1800 };
1801 Self::from_expr(expr, None)
1802 }
1803
1804 pub fn from_utc_timestamp(&self, tz: &str) -> Column {
1806 let tz = tz.to_string();
1807 let expr = self.expr().clone().map(
1808 move |s| crate::udfs::apply_from_utc_timestamp(s, &tz),
1809 GetOutput::same_type(),
1810 );
1811 Self::from_expr(expr, None)
1812 }
1813
1814 pub fn to_utc_timestamp(&self, tz: &str) -> Column {
1816 let tz = tz.to_string();
1817 let expr = self.expr().clone().map(
1818 move |s| crate::udfs::apply_to_utc_timestamp(s, &tz),
1819 GetOutput::same_type(),
1820 );
1821 Self::from_expr(expr, None)
1822 }
1823
1824 pub fn trunc(&self, format: &str) -> Column {
1826 use polars::prelude::*;
1827 Self::from_expr(
1828 self.expr().clone().dt().truncate(lit(format.to_string())),
1829 None,
1830 )
1831 }
1832
1833 pub fn add_months(&self, n: i32) -> Column {
1835 let expr = self.expr().clone().map(
1836 move |col| crate::udfs::apply_add_months(col, n),
1837 GetOutput::from_type(DataType::Date),
1838 );
1839 Self::from_expr(expr, None)
1840 }
1841
1842 pub fn months_between(&self, start: &Column, round_off: bool) -> Column {
1845 let args = [start.expr().clone()];
1846 let expr = self.expr().clone().map_many(
1847 move |cols| crate::udfs::apply_months_between(cols, round_off),
1848 &args,
1849 GetOutput::from_type(DataType::Float64),
1850 );
1851 Self::from_expr(expr, None)
1852 }
1853
1854 pub fn next_day(&self, day_of_week: &str) -> Column {
1856 let day = day_of_week.to_string();
1857 let expr = self.expr().clone().map(
1858 move |col| crate::udfs::apply_next_day(col, &day),
1859 GetOutput::from_type(DataType::Date),
1860 );
1861 Self::from_expr(expr, None)
1862 }
1863
1864 pub fn unix_timestamp(&self, format: Option<&str>) -> Column {
1866 let fmt = format.map(String::from);
1867 let expr = self.expr().clone().map(
1868 move |col| crate::udfs::apply_unix_timestamp(col, fmt.as_deref()),
1869 GetOutput::from_type(DataType::Int64),
1870 );
1871 Self::from_expr(expr, None)
1872 }
1873
1874 pub fn from_unixtime(&self, format: Option<&str>) -> Column {
1876 let fmt = format.map(String::from);
1877 let expr = self.expr().clone().map(
1878 move |col| crate::udfs::apply_from_unixtime(col, fmt.as_deref()),
1879 GetOutput::from_type(DataType::String),
1880 );
1881 Self::from_expr(expr, None)
1882 }
1883
1884 pub fn timestamp_seconds(&self) -> Column {
1886 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1_000_000i64))
1887 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1888 Self::from_expr(expr, None)
1889 }
1890
1891 pub fn timestamp_millis(&self) -> Column {
1893 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1000i64))
1894 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1895 Self::from_expr(expr, None)
1896 }
1897
1898 pub fn timestamp_micros(&self) -> Column {
1900 let expr = self
1901 .expr()
1902 .clone()
1903 .cast(DataType::Int64)
1904 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1905 Self::from_expr(expr, None)
1906 }
1907
1908 pub fn unix_date(&self) -> Column {
1910 let expr = self.expr().clone().map(
1911 crate::udfs::apply_unix_date,
1912 GetOutput::from_type(DataType::Int32),
1913 );
1914 Self::from_expr(expr, None)
1915 }
1916
1917 pub fn date_from_unix_date(&self) -> Column {
1919 let expr = self.expr().clone().map(
1920 crate::udfs::apply_date_from_unix_date,
1921 GetOutput::from_type(DataType::Date),
1922 );
1923 Self::from_expr(expr, None)
1924 }
1925
1926 pub fn pmod(&self, divisor: &Column) -> Column {
1928 let args = [divisor.expr().clone()];
1929 let expr = self.expr().clone().map_many(
1930 crate::udfs::apply_pmod,
1931 &args,
1932 GetOutput::from_type(DataType::Float64),
1933 );
1934 Self::from_expr(expr, None)
1935 }
1936
1937 pub fn factorial(&self) -> Column {
1939 let expr = self.expr().clone().map(
1940 crate::udfs::apply_factorial,
1941 GetOutput::from_type(DataType::Int64),
1942 );
1943 Self::from_expr(expr, None)
1944 }
1945
1946 pub fn over(&self, partition_by: &[&str]) -> Column {
1951 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1952 Self::from_expr(self.expr().clone().over(partition_exprs), None)
1953 }
1954
1955 pub fn rank(&self, descending: bool) -> Column {
1957 let opts = RankOptions {
1958 method: RankMethod::Min,
1959 descending,
1960 };
1961 Self::from_expr(self.expr().clone().rank(opts, None), None)
1962 }
1963
1964 pub fn dense_rank(&self, descending: bool) -> Column {
1966 let opts = RankOptions {
1967 method: RankMethod::Dense,
1968 descending,
1969 };
1970 Self::from_expr(self.expr().clone().rank(opts, None), None)
1971 }
1972
1973 pub fn row_number(&self, descending: bool) -> Column {
1975 let opts = RankOptions {
1976 method: RankMethod::Ordinal,
1977 descending,
1978 };
1979 Self::from_expr(self.expr().clone().rank(opts, None), None)
1980 }
1981
1982 pub fn lag(&self, n: i64) -> Column {
1984 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(n)), None)
1985 }
1986
1987 pub fn lead(&self, n: i64) -> Column {
1989 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(-n)), None)
1990 }
1991
1992 pub fn first_value(&self) -> Column {
1994 Self::from_expr(self.expr().clone().first(), None)
1995 }
1996
1997 pub fn last_value(&self) -> Column {
1999 Self::from_expr(self.expr().clone().last(), None)
2000 }
2001
2002 pub fn percent_rank(&self, partition_by: &[&str], descending: bool) -> Column {
2004 use polars::prelude::*;
2005 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2006 let opts = RankOptions {
2007 method: RankMethod::Min,
2008 descending,
2009 };
2010 let rank_expr = self
2011 .expr()
2012 .clone()
2013 .rank(opts, None)
2014 .over(partition_exprs.clone());
2015 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2016 let rank_f = (rank_expr - lit(1i64)).cast(DataType::Float64);
2017 let count_f = (count_expr - lit(1i64)).cast(DataType::Float64);
2018 let pct = rank_f / count_f;
2019 Self::from_expr(pct, None)
2020 }
2021
2022 pub fn cume_dist(&self, partition_by: &[&str], descending: bool) -> Column {
2024 use polars::prelude::*;
2025 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2026 let opts = RankOptions {
2027 method: RankMethod::Ordinal,
2028 descending,
2029 };
2030 let row_num = self
2031 .expr()
2032 .clone()
2033 .rank(opts, None)
2034 .over(partition_exprs.clone());
2035 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2036 let cume = row_num / count_expr;
2037 Self::from_expr(cume.cast(DataType::Float64), None)
2038 }
2039
2040 pub fn ntile(&self, n: u32, partition_by: &[&str], descending: bool) -> Column {
2042 use polars::prelude::*;
2043 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2044 let opts = RankOptions {
2045 method: RankMethod::Ordinal,
2046 descending,
2047 };
2048 let rank_expr = self
2049 .expr()
2050 .clone()
2051 .rank(opts, None)
2052 .over(partition_exprs.clone());
2053 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2054 let n_expr = lit(n as f64);
2055 let rank_f = rank_expr.cast(DataType::Float64);
2056 let count_f = count_expr.cast(DataType::Float64);
2057 let bucket = (rank_f * n_expr / count_f).ceil();
2058 let clamped = bucket.clip(lit(1.0), lit(n as f64));
2059 Self::from_expr(clamped.cast(DataType::Int32), None)
2060 }
2061
2062 pub fn nth_value(&self, n: i64, partition_by: &[&str], descending: bool) -> Column {
2064 use polars::prelude::*;
2065 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2066 let opts = RankOptions {
2067 method: RankMethod::Ordinal,
2068 descending,
2069 };
2070 let rank_expr = self
2071 .expr()
2072 .clone()
2073 .rank(opts, None)
2074 .over(partition_exprs.clone());
2075 let cond_col = Self::from_expr(rank_expr.eq(lit(n)), None);
2076 let null_col = Self::from_expr(Expr::Literal(LiteralValue::Null), None);
2077 let value_col = Self::from_expr(self.expr().clone(), None);
2078 let when_expr = crate::functions::when(&cond_col)
2079 .then(&value_col)
2080 .otherwise(&null_col)
2081 .into_expr();
2082 let windowed = when_expr.max().over(partition_exprs);
2083 Self::from_expr(windowed, None)
2084 }
2085
2086 pub fn array_size(&self) -> Column {
2088 use polars::prelude::*;
2089 Self::from_expr(
2090 self.expr().clone().list().len().cast(DataType::Int32),
2091 Some("size".to_string()),
2092 )
2093 }
2094
2095 pub fn cardinality(&self) -> Column {
2097 self.array_size()
2098 }
2099
2100 pub fn array_contains(&self, value: Expr) -> Column {
2102 Self::from_expr(self.expr().clone().list().contains(value), None)
2103 }
2104
2105 pub fn array_join(&self, separator: &str) -> Column {
2107 use polars::prelude::*;
2108 Self::from_expr(
2109 self.expr()
2110 .clone()
2111 .list()
2112 .join(lit(separator.to_string()), false),
2113 None,
2114 )
2115 }
2116
2117 pub fn array_max(&self) -> Column {
2119 Self::from_expr(self.expr().clone().list().max(), None)
2120 }
2121
2122 pub fn array_min(&self) -> Column {
2124 Self::from_expr(self.expr().clone().list().min(), None)
2125 }
2126
2127 pub fn element_at(&self, index: i64) -> Column {
2129 use polars::prelude::*;
2130 let idx = if index >= 1 { index - 1 } else { index };
2132 Self::from_expr(self.expr().clone().list().get(lit(idx), true), None)
2133 }
2134
2135 pub fn array_sort(&self) -> Column {
2137 use polars::prelude::SortOptions;
2138 let opts = SortOptions {
2139 descending: false,
2140 nulls_last: true,
2141 ..Default::default()
2142 };
2143 Self::from_expr(self.expr().clone().list().sort(opts), None)
2144 }
2145
2146 pub fn array_distinct(&self) -> Column {
2148 let expr = self.expr().clone().map(
2149 crate::udfs::apply_array_distinct_first_order,
2150 GetOutput::same_type(),
2151 );
2152 Self::from_expr(expr, None)
2153 }
2154
2155 pub fn mode(&self) -> Column {
2158 let vc = self
2162 .expr()
2163 .clone()
2164 .value_counts(true, false, "count", false);
2165 let first_struct = vc.first();
2166 let val_expr = first_struct.struct_().field_by_index(0);
2167 Self::from_expr(val_expr, Some("mode".to_string()))
2168 }
2169
2170 pub fn array_slice(&self, start: i64, length: Option<i64>) -> Column {
2172 use polars::prelude::*;
2173 let start_expr = lit((start - 1).max(0)); let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX));
2175 Self::from_expr(
2176 self.expr().clone().list().slice(start_expr, length_expr),
2177 None,
2178 )
2179 }
2180
2181 pub fn explode(&self) -> Column {
2183 Self::from_expr(self.expr().clone().explode(), None)
2184 }
2185
2186 pub fn explode_outer(&self) -> Column {
2188 Self::from_expr(self.expr().clone().explode(), None)
2189 }
2190
2191 pub fn posexplode_outer(&self) -> (Column, Column) {
2193 self.posexplode()
2194 }
2195
2196 pub fn arrays_zip(&self, other: &Column) -> Column {
2198 let args = [other.expr().clone()];
2199 let expr = self.expr().clone().map_many(
2200 crate::udfs::apply_arrays_zip,
2201 &args,
2202 GetOutput::same_type(),
2203 );
2204 Self::from_expr(expr, None)
2205 }
2206
2207 pub fn arrays_overlap(&self, other: &Column) -> Column {
2209 let args = [other.expr().clone()];
2210 let expr = self.expr().clone().map_many(
2211 crate::udfs::apply_arrays_overlap,
2212 &args,
2213 GetOutput::from_type(DataType::Boolean),
2214 );
2215 Self::from_expr(expr, None)
2216 }
2217
2218 pub fn array_agg(&self) -> Column {
2220 Self::from_expr(self.expr().clone().implode(), None)
2221 }
2222
2223 pub fn array_position(&self, value: Expr) -> Column {
2226 use polars::prelude::{DataType, NULL};
2227 let cond = Self::from_expr(col("").eq(value), None);
2229 let then_val = Self::from_expr(col("").cum_count(false), None);
2230 let else_val = Self::from_expr(lit(NULL), None);
2231 let idx_expr = crate::functions::when(&cond)
2232 .then(&then_val)
2233 .otherwise(&else_val)
2234 .into_expr();
2235 let list_expr = self
2236 .expr()
2237 .clone()
2238 .list()
2239 .eval(idx_expr, false)
2240 .list()
2241 .min()
2242 .fill_null(lit(0i64))
2243 .cast(DataType::Int64);
2244 Self::from_expr(list_expr, Some("array_position".to_string()))
2245 }
2246
2247 pub fn array_compact(&self) -> Column {
2249 let list_expr = self.expr().clone().list().drop_nulls();
2250 Self::from_expr(list_expr, None)
2251 }
2252
2253 pub fn array_remove(&self, value: Expr) -> Column {
2256 use polars::prelude::NULL;
2257 let cond = Self::from_expr(col("").neq(value), None);
2259 let then_val = Self::from_expr(col(""), None);
2260 let else_val = Self::from_expr(lit(NULL), None);
2261 let elem_neq = crate::functions::when(&cond)
2262 .then(&then_val)
2263 .otherwise(&else_val)
2264 .into_expr();
2265 let list_expr = self
2266 .expr()
2267 .clone()
2268 .list()
2269 .eval(elem_neq, false)
2270 .list()
2271 .drop_nulls();
2272 Self::from_expr(list_expr, None)
2273 }
2274
2275 pub fn array_repeat(&self, n: i64) -> Column {
2277 let expr = self.expr().clone().map(
2278 move |c| crate::udfs::apply_array_repeat(c, n),
2279 GetOutput::same_type(),
2280 );
2281 Self::from_expr(expr, None)
2282 }
2283
2284 pub fn array_flatten(&self) -> Column {
2286 let expr = self
2287 .expr()
2288 .clone()
2289 .map(crate::udfs::apply_array_flatten, GetOutput::same_type());
2290 Self::from_expr(expr, None)
2291 }
2292
2293 pub fn array_append(&self, elem: &Column) -> Column {
2295 let args = [elem.expr().clone()];
2296 let expr = self.expr().clone().map_many(
2297 crate::udfs::apply_array_append,
2298 &args,
2299 GetOutput::same_type(),
2300 );
2301 Self::from_expr(expr, None)
2302 }
2303
2304 pub fn array_prepend(&self, elem: &Column) -> Column {
2306 let args = [elem.expr().clone()];
2307 let expr = self.expr().clone().map_many(
2308 crate::udfs::apply_array_prepend,
2309 &args,
2310 GetOutput::same_type(),
2311 );
2312 Self::from_expr(expr, None)
2313 }
2314
2315 pub fn array_insert(&self, pos: &Column, elem: &Column) -> Column {
2317 let args = [pos.expr().clone(), elem.expr().clone()];
2318 let expr = self.expr().clone().map_many(
2319 crate::udfs::apply_array_insert,
2320 &args,
2321 GetOutput::same_type(),
2322 );
2323 Self::from_expr(expr, None)
2324 }
2325
2326 pub fn array_except(&self, other: &Column) -> Column {
2328 let args = [other.expr().clone()];
2329 let expr = self.expr().clone().map_many(
2330 crate::udfs::apply_array_except,
2331 &args,
2332 GetOutput::same_type(),
2333 );
2334 Self::from_expr(expr, None)
2335 }
2336
2337 pub fn array_intersect(&self, other: &Column) -> Column {
2339 let args = [other.expr().clone()];
2340 let expr = self.expr().clone().map_many(
2341 crate::udfs::apply_array_intersect,
2342 &args,
2343 GetOutput::same_type(),
2344 );
2345 Self::from_expr(expr, None)
2346 }
2347
2348 pub fn array_union(&self, other: &Column) -> Column {
2350 let args = [other.expr().clone()];
2351 let expr = self.expr().clone().map_many(
2352 crate::udfs::apply_array_union,
2353 &args,
2354 GetOutput::same_type(),
2355 );
2356 Self::from_expr(expr, None)
2357 }
2358
2359 pub fn zip_with(&self, other: &Column, merge: Expr) -> Column {
2362 let args = [other.expr().clone()];
2363 let zip_expr = self.expr().clone().map_many(
2364 crate::udfs::apply_zip_arrays_to_struct,
2365 &args,
2366 GetOutput::same_type(),
2367 );
2368 let list_expr = zip_expr.list().eval(merge, false);
2369 Self::from_expr(list_expr, None)
2370 }
2371
2372 pub fn array_exists(&self, predicate: Expr) -> Column {
2374 let pred_expr = self
2375 .expr()
2376 .clone()
2377 .list()
2378 .eval(predicate, false)
2379 .list()
2380 .any();
2381 Self::from_expr(pred_expr, Some("exists".to_string()))
2382 }
2383
2384 pub fn array_forall(&self, predicate: Expr) -> Column {
2386 let pred_expr = self
2387 .expr()
2388 .clone()
2389 .list()
2390 .eval(predicate, false)
2391 .list()
2392 .all();
2393 Self::from_expr(pred_expr, Some("forall".to_string()))
2394 }
2395
2396 pub fn array_filter(&self, predicate: Expr) -> Column {
2398 use polars::prelude::NULL;
2399 let then_val = Self::from_expr(col(""), None);
2400 let else_val = Self::from_expr(lit(NULL), None);
2401 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2402 .then(&then_val)
2403 .otherwise(&else_val)
2404 .into_expr();
2405 let list_expr = self
2406 .expr()
2407 .clone()
2408 .list()
2409 .eval(elem_expr, false)
2410 .list()
2411 .drop_nulls();
2412 Self::from_expr(list_expr, None)
2413 }
2414
2415 pub fn array_transform(&self, f: Expr) -> Column {
2417 let list_expr = self.expr().clone().list().eval(f, false);
2418 Self::from_expr(list_expr, None)
2419 }
2420
2421 pub fn array_sum(&self) -> Column {
2423 Self::from_expr(self.expr().clone().list().sum(), None)
2424 }
2425
2426 pub fn array_aggregate(&self, zero: &Column) -> Column {
2428 let sum_expr = self.expr().clone().list().sum();
2429 Self::from_expr(sum_expr + zero.expr().clone(), None)
2430 }
2431
2432 pub fn array_mean(&self) -> Column {
2434 Self::from_expr(self.expr().clone().list().mean(), None)
2435 }
2436
2437 pub fn posexplode(&self) -> (Column, Column) {
2440 let pos_expr = self
2441 .expr()
2442 .clone()
2443 .list()
2444 .eval(col("").cum_count(false), false)
2445 .explode();
2446 let val_expr = self.expr().clone().explode();
2447 (
2448 Self::from_expr(pos_expr, Some("pos".to_string())),
2449 Self::from_expr(val_expr, Some("col".to_string())),
2450 )
2451 }
2452
2453 pub fn map_keys(&self) -> Column {
2455 let elem_key = col("").struct_().field_by_name("key");
2456 let list_expr = self.expr().clone().list().eval(elem_key, false);
2457 Self::from_expr(list_expr, None)
2458 }
2459
2460 pub fn map_values(&self) -> Column {
2462 let elem_val = col("").struct_().field_by_name("value");
2463 let list_expr = self.expr().clone().list().eval(elem_val, false);
2464 Self::from_expr(list_expr, None)
2465 }
2466
2467 pub fn map_entries(&self) -> Column {
2469 Self::from_expr(self.expr().clone(), None)
2470 }
2471
2472 pub fn map_from_arrays(&self, values: &Column) -> Column {
2474 let args = [values.expr().clone()];
2475 let expr = self.expr().clone().map_many(
2476 crate::udfs::apply_map_from_arrays,
2477 &args,
2478 GetOutput::same_type(),
2479 );
2480 Self::from_expr(expr, None)
2481 }
2482
2483 pub fn map_concat(&self, other: &Column) -> Column {
2485 let args = [other.expr().clone()];
2486 let expr = self.expr().clone().map_many(
2487 crate::udfs::apply_map_concat,
2488 &args,
2489 GetOutput::same_type(),
2490 );
2491 Self::from_expr(expr, None)
2492 }
2493
2494 pub fn transform_keys(&self, key_expr: Expr) -> Column {
2496 use polars::prelude::as_struct;
2497 let value = col("").struct_().field_by_name("value");
2498 let new_struct = as_struct(vec![key_expr.alias("key"), value.alias("value")]);
2499 let list_expr = self.expr().clone().list().eval(new_struct, false);
2500 Self::from_expr(list_expr, None)
2501 }
2502
2503 pub fn transform_values(&self, value_expr: Expr) -> Column {
2505 use polars::prelude::as_struct;
2506 let key = col("").struct_().field_by_name("key");
2507 let new_struct = as_struct(vec![key.alias("key"), value_expr.alias("value")]);
2508 let list_expr = self.expr().clone().list().eval(new_struct, false);
2509 Self::from_expr(list_expr, None)
2510 }
2511
2512 pub fn map_zip_with(&self, other: &Column, merge: Expr) -> Column {
2515 use polars::prelude::as_struct;
2516 let args = [other.expr().clone()];
2517 let zip_expr = self.expr().clone().map_many(
2518 crate::udfs::apply_map_zip_to_struct,
2519 &args,
2520 GetOutput::same_type(),
2521 );
2522 let key_field = col("").struct_().field_by_name("key").alias("key");
2523 let value_field = merge.alias("value");
2524 let merge_expr = as_struct(vec![key_field, value_field]);
2525 let list_expr = zip_expr.list().eval(merge_expr, false);
2526 Self::from_expr(list_expr, None)
2527 }
2528
2529 pub fn map_filter(&self, predicate: Expr) -> Column {
2532 use polars::prelude::NULL;
2533 let then_val = Self::from_expr(col(""), None);
2534 let else_val = Self::from_expr(lit(NULL), None);
2535 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2536 .then(&then_val)
2537 .otherwise(&else_val)
2538 .into_expr();
2539 let list_expr = self
2540 .expr()
2541 .clone()
2542 .list()
2543 .eval(elem_expr, false)
2544 .list()
2545 .drop_nulls();
2546 Self::from_expr(list_expr, None)
2547 }
2548
2549 pub fn map_from_entries(&self) -> Column {
2551 Self::from_expr(self.expr().clone(), None)
2552 }
2553
2554 pub fn map_contains_key(&self, key: &Column) -> Column {
2556 let args = [key.expr().clone()];
2557 let expr = self.expr().clone().map_many(
2558 crate::udfs::apply_map_contains_key,
2559 &args,
2560 GetOutput::from_type(DataType::Boolean),
2561 );
2562 Self::from_expr(expr, None)
2563 }
2564
2565 pub fn get(&self, key: &Column) -> Column {
2567 let args = [key.expr().clone()];
2568 let expr =
2569 self.expr()
2570 .clone()
2571 .map_many(crate::udfs::apply_get, &args, GetOutput::same_type());
2572 Self::from_expr(expr, None)
2573 }
2574
2575 pub fn get_json_object(&self, path: &str) -> Column {
2577 let path_expr = polars::prelude::lit(path.to_string());
2578 let out = self.expr().clone().str().json_path_match(path_expr);
2579 Self::from_expr(out, None)
2580 }
2581
2582 pub fn from_json(&self, schema: Option<polars::datatypes::DataType>) -> Column {
2584 let out = self.expr().clone().str().json_decode(schema, None);
2585 Self::from_expr(out, None)
2586 }
2587
2588 pub fn to_json(&self) -> Column {
2590 let out = self.expr().clone().struct_().json_encode();
2591 Self::from_expr(out, None)
2592 }
2593
2594 pub fn json_array_length(&self, path: &str) -> Column {
2596 let path = path.to_string();
2597 let expr = self.expr().clone().map(
2598 move |s| crate::udfs::apply_json_array_length(s, &path),
2599 GetOutput::from_type(DataType::Int64),
2600 );
2601 Self::from_expr(expr, None)
2602 }
2603
2604 pub fn json_object_keys(&self) -> Column {
2606 let expr = self.expr().clone().map(
2607 crate::udfs::apply_json_object_keys,
2608 GetOutput::from_type(DataType::List(Box::new(DataType::String))),
2609 );
2610 Self::from_expr(expr, None)
2611 }
2612
2613 pub fn json_tuple(&self, keys: &[&str]) -> Column {
2615 let keys_vec: Vec<String> = keys.iter().map(|s| (*s).to_string()).collect();
2616 let struct_fields: Vec<polars::datatypes::Field> = keys_vec
2617 .iter()
2618 .map(|k| polars::datatypes::Field::new(k.as_str().into(), DataType::String))
2619 .collect();
2620 let expr = self.expr().clone().map(
2621 move |s| crate::udfs::apply_json_tuple(s, &keys_vec),
2622 GetOutput::from_type(DataType::Struct(struct_fields)),
2623 );
2624 Self::from_expr(expr, None)
2625 }
2626
2627 pub fn from_csv(&self) -> Column {
2629 let expr = self.expr().clone().map(
2630 crate::udfs::apply_from_csv,
2631 GetOutput::from_type(DataType::Struct(vec![])),
2632 );
2633 Self::from_expr(expr, None)
2634 }
2635
2636 pub fn to_csv(&self) -> Column {
2638 let expr = self.expr().clone().map(
2639 crate::udfs::apply_to_csv,
2640 GetOutput::from_type(DataType::String),
2641 );
2642 Self::from_expr(expr, None)
2643 }
2644
2645 pub fn parse_url(&self, part: &str, key: Option<&str>) -> Column {
2648 let part = part.to_string();
2649 let key_owned = key.map(String::from);
2650 let expr = self.expr().clone().map(
2651 move |s| crate::udfs::apply_parse_url(s, &part, key_owned.as_deref()),
2652 GetOutput::from_type(DataType::String),
2653 );
2654 Self::from_expr(expr, None)
2655 }
2656
2657 pub fn hash(&self) -> Column {
2659 let expr = self.expr().clone().map(
2660 crate::udfs::apply_hash_one,
2661 GetOutput::from_type(DataType::Int64),
2662 );
2663 Self::from_expr(expr, None)
2664 }
2665
2666 pub fn isin(&self, other: &Column) -> Column {
2668 let out = self.expr().clone().is_in(other.expr().clone());
2669 Self::from_expr(out, None)
2670 }
2671
2672 pub fn url_decode(&self) -> Column {
2674 let expr = self.expr().clone().map(
2675 crate::udfs::apply_url_decode,
2676 GetOutput::from_type(DataType::String),
2677 );
2678 Self::from_expr(expr, None)
2679 }
2680
2681 pub fn url_encode(&self) -> Column {
2683 let expr = self.expr().clone().map(
2684 crate::udfs::apply_url_encode,
2685 GetOutput::from_type(DataType::String),
2686 );
2687 Self::from_expr(expr, None)
2688 }
2689
2690 pub fn shift_left(&self, n: i32) -> Column {
2692 use polars::prelude::*;
2693 let pow = lit(2i64).pow(lit(n as i64));
2694 Self::from_expr(
2695 (self.expr().clone().cast(DataType::Int64) * pow).cast(DataType::Int64),
2696 None,
2697 )
2698 }
2699
2700 pub fn shift_right(&self, n: i32) -> Column {
2702 use polars::prelude::*;
2703 let pow = lit(2i64).pow(lit(n as i64));
2704 Self::from_expr(
2705 (self.expr().clone().cast(DataType::Int64) / pow).cast(DataType::Int64),
2706 None,
2707 )
2708 }
2709
2710 pub fn shift_right_unsigned(&self, n: i32) -> Column {
2712 let expr = self.expr().clone().map(
2713 move |s| crate::udfs::apply_shift_right_unsigned(s, n),
2714 GetOutput::from_type(DataType::Int64),
2715 );
2716 Self::from_expr(expr, None)
2717 }
2718}
2719
2720#[cfg(test)]
2721mod tests {
2722 use super::Column;
2723 use polars::prelude::{col, df, lit, IntoLazy};
2724
2725 fn test_df() -> polars::prelude::DataFrame {
2727 df!(
2728 "a" => &[1, 2, 3, 4, 5],
2729 "b" => &[10, 20, 30, 40, 50]
2730 )
2731 .unwrap()
2732 }
2733
2734 fn test_df_with_nulls() -> polars::prelude::DataFrame {
2736 df!(
2737 "a" => &[Some(1), Some(2), None, Some(4), None],
2738 "b" => &[Some(10), None, Some(30), None, None]
2739 )
2740 .unwrap()
2741 }
2742
2743 #[test]
2744 fn test_column_new() {
2745 let column = Column::new("age".to_string());
2746 assert_eq!(column.name(), "age");
2747 }
2748
2749 #[test]
2750 fn test_column_from_expr() {
2751 let expr = col("test");
2752 let column = Column::from_expr(expr, Some("test".to_string()));
2753 assert_eq!(column.name(), "test");
2754 }
2755
2756 #[test]
2757 fn test_column_from_expr_default_name() {
2758 let expr = col("test").gt(lit(5));
2759 let column = Column::from_expr(expr, None);
2760 assert_eq!(column.name(), "<expr>");
2761 }
2762
2763 #[test]
2764 fn test_column_alias() {
2765 let column = Column::new("original".to_string());
2766 let aliased = column.alias("new_name");
2767 assert_eq!(aliased.name(), "new_name");
2768 }
2769
2770 #[test]
2771 fn test_column_gt() {
2772 let df = test_df();
2773 let column = Column::new("a".to_string());
2774 let result = column.gt(lit(3));
2775
2776 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2778 assert_eq!(filtered.height(), 2); }
2780
2781 #[test]
2782 fn test_column_lt() {
2783 let df = test_df();
2784 let column = Column::new("a".to_string());
2785 let result = column.lt(lit(3));
2786
2787 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2788 assert_eq!(filtered.height(), 2); }
2790
2791 #[test]
2792 fn test_column_eq() {
2793 let df = test_df();
2794 let column = Column::new("a".to_string());
2795 let result = column.eq(lit(3));
2796
2797 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2798 assert_eq!(filtered.height(), 1); }
2800
2801 #[test]
2802 fn test_column_neq() {
2803 let df = test_df();
2804 let column = Column::new("a".to_string());
2805 let result = column.neq(lit(3));
2806
2807 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2808 assert_eq!(filtered.height(), 4); }
2810
2811 #[test]
2812 fn test_column_gt_eq() {
2813 let df = test_df();
2814 let column = Column::new("a".to_string());
2815 let result = column.gt_eq(lit(3));
2816
2817 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2818 assert_eq!(filtered.height(), 3); }
2820
2821 #[test]
2822 fn test_column_lt_eq() {
2823 let df = test_df();
2824 let column = Column::new("a".to_string());
2825 let result = column.lt_eq(lit(3));
2826
2827 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2828 assert_eq!(filtered.height(), 3); }
2830
2831 #[test]
2832 fn test_column_is_null() {
2833 let df = test_df_with_nulls();
2834 let column = Column::new("a".to_string());
2835 let result = column.is_null();
2836
2837 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2838 assert_eq!(filtered.height(), 2); }
2840
2841 #[test]
2842 fn test_column_is_not_null() {
2843 let df = test_df_with_nulls();
2844 let column = Column::new("a".to_string());
2845 let result = column.is_not_null();
2846
2847 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2848 assert_eq!(filtered.height(), 3); }
2850
2851 #[test]
2852 fn test_eq_null_safe_both_null() {
2853 let df = df!(
2855 "a" => &[Some(1), None, Some(3)],
2856 "b" => &[Some(1), None, Some(4)]
2857 )
2858 .unwrap();
2859
2860 let col_a = Column::new("a".to_string());
2861 let col_b = Column::new("b".to_string());
2862 let result = col_a.eq_null_safe(&col_b);
2863
2864 let result_df = df
2866 .lazy()
2867 .with_column(result.into_expr().alias("eq_null_safe"))
2868 .collect()
2869 .unwrap();
2870
2871 let eq_col = result_df.column("eq_null_safe").unwrap();
2873 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
2874
2875 assert_eq!(values[0], Some(true));
2879 assert_eq!(values[1], Some(true)); assert_eq!(values[2], Some(false));
2881 }
2882
2883 #[test]
2884 fn test_eq_null_safe_one_null() {
2885 let df = df!(
2887 "a" => &[Some(1), None, Some(3)],
2888 "b" => &[Some(1), Some(2), None]
2889 )
2890 .unwrap();
2891
2892 let col_a = Column::new("a".to_string());
2893 let col_b = Column::new("b".to_string());
2894 let result = col_a.eq_null_safe(&col_b);
2895
2896 let result_df = df
2897 .lazy()
2898 .with_column(result.into_expr().alias("eq_null_safe"))
2899 .collect()
2900 .unwrap();
2901
2902 let eq_col = result_df.column("eq_null_safe").unwrap();
2903 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
2904
2905 assert_eq!(values[0], Some(true));
2909 assert_eq!(values[1], Some(false));
2910 assert_eq!(values[2], Some(false));
2911 }
2912}