1use polars::prelude::{
2 col, lit, DataType, Expr, GetOutput, ListNameSpaceExtension, RankMethod, RankOptions, TimeUnit,
3};
4
5fn like_pattern_to_regex(pattern: &str, escape_char: Option<char>) -> String {
8 let mut out = String::with_capacity(pattern.len() * 2);
9 let mut it = pattern.chars();
10 while let Some(c) = it.next() {
11 if escape_char == Some(c) {
12 if let Some(next) = it.next() {
13 if "\\.*+?[](){}^$|".contains(next) {
15 out.push('\\');
16 }
17 out.push(next);
18 } else {
19 out.push('\\');
20 out.push(c);
21 }
22 } else {
23 match c {
24 '%' => out.push_str(".*"),
25 '_' => out.push('.'),
26 '\\' | '.' | '+' | '*' | '?' | '[' | ']' | '(' | ')' | '{' | '}' | '^' | '$'
27 | '|' => {
28 out.push('\\');
29 out.push(c);
30 }
31 _ => out.push(c),
32 }
33 }
34 }
35 format!("^{out}$")
36}
37
38#[derive(Debug, Clone, Copy)]
40pub enum DeferredRandom {
41 Rand(Option<u64>),
42 Randn(Option<u64>),
43}
44
45#[derive(Debug, Clone)]
48pub struct Column {
49 name: String,
50 expr: Expr, pub(crate) deferred: Option<DeferredRandom>,
53}
54
55impl Column {
56 pub fn new(name: String) -> Self {
58 Column {
59 name: name.clone(),
60 expr: col(&name),
61 deferred: None,
62 }
63 }
64
65 pub fn from_expr(expr: Expr, name: Option<String>) -> Self {
67 let display_name = name.unwrap_or_else(|| "<expr>".to_string());
68 Column {
69 name: display_name,
70 expr,
71 deferred: None,
72 }
73 }
74
75 pub fn from_rand(seed: Option<u64>) -> Self {
77 let expr = lit(1i64).cum_sum(false).map(
78 move |c| crate::udfs::apply_rand_with_seed(c, seed),
79 GetOutput::from_type(DataType::Float64),
80 );
81 Column {
82 name: "rand".to_string(),
83 expr,
84 deferred: Some(DeferredRandom::Rand(seed)),
85 }
86 }
87
88 pub fn from_randn(seed: Option<u64>) -> Self {
90 let expr = lit(1i64).cum_sum(false).map(
91 move |c| crate::udfs::apply_randn_with_seed(c, seed),
92 GetOutput::from_type(DataType::Float64),
93 );
94 Column {
95 name: "randn".to_string(),
96 expr,
97 deferred: Some(DeferredRandom::Randn(seed)),
98 }
99 }
100
101 pub fn expr(&self) -> &Expr {
103 &self.expr
104 }
105
106 pub fn into_expr(self) -> Expr {
108 self.expr
109 }
110
111 pub fn name(&self) -> &str {
113 &self.name
114 }
115
116 pub fn alias(&self, name: &str) -> Column {
118 Column {
119 name: name.to_string(),
120 expr: self.expr.clone().alias(name),
121 deferred: self.deferred,
122 }
123 }
124
125 pub fn asc(&self) -> crate::functions::SortOrder {
127 crate::functions::asc(self)
128 }
129
130 pub fn asc_nulls_first(&self) -> crate::functions::SortOrder {
132 crate::functions::asc_nulls_first(self)
133 }
134
135 pub fn asc_nulls_last(&self) -> crate::functions::SortOrder {
137 crate::functions::asc_nulls_last(self)
138 }
139
140 pub fn desc(&self) -> crate::functions::SortOrder {
142 crate::functions::desc(self)
143 }
144
145 pub fn desc_nulls_first(&self) -> crate::functions::SortOrder {
147 crate::functions::desc_nulls_first(self)
148 }
149
150 pub fn desc_nulls_last(&self) -> crate::functions::SortOrder {
152 crate::functions::desc_nulls_last(self)
153 }
154
155 pub fn is_null(&self) -> Column {
157 Column {
158 name: format!("({} IS NULL)", self.name),
159 expr: self.expr.clone().is_null(),
160 deferred: None,
161 }
162 }
163
164 pub fn is_not_null(&self) -> Column {
166 Column {
167 name: format!("({} IS NOT NULL)", self.name),
168 expr: self.expr.clone().is_not_null(),
169 deferred: None,
170 }
171 }
172
173 pub fn isnull(&self) -> Column {
175 self.is_null()
176 }
177
178 pub fn isnotnull(&self) -> Column {
180 self.is_not_null()
181 }
182
183 fn null_boolean_expr() -> Expr {
185 use polars::prelude::*;
186 lit(NULL).cast(DataType::Boolean)
188 }
189
190 pub fn like(&self, pattern: &str, escape_char: Option<char>) -> Column {
193 let regex = like_pattern_to_regex(pattern, escape_char);
194 self.regexp_like(®ex)
195 }
196
197 pub fn ilike(&self, pattern: &str, escape_char: Option<char>) -> Column {
200 use polars::prelude::*;
201 let regex = format!("(?i){}", like_pattern_to_regex(pattern, escape_char));
202 Self::from_expr(self.expr().clone().str().contains(lit(regex), false), None)
203 }
204
205 pub fn eq_pyspark(&self, other: &Column) -> Column {
211 let left_null = self.expr().clone().is_null();
213 let right_null = other.expr().clone().is_null();
214 let either_null = left_null.clone().or(right_null.clone());
215
216 let eq_result = self.expr().clone().eq(other.expr().clone());
218
219 let null_boolean = Self::null_boolean_expr();
221 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
222 .then(&Self::from_expr(null_boolean, None))
223 .otherwise(&Self::from_expr(eq_result, None));
224
225 Self::from_expr(null_aware_expr.into_expr(), None)
226 }
227
228 pub fn ne_pyspark(&self, other: &Column) -> Column {
231 let left_null = self.expr().clone().is_null();
233 let right_null = other.expr().clone().is_null();
234 let either_null = left_null.clone().or(right_null.clone());
235
236 let ne_result = self.expr().clone().neq(other.expr().clone());
238
239 let null_boolean = Self::null_boolean_expr();
241 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
242 .then(&Self::from_expr(null_boolean, None))
243 .otherwise(&Self::from_expr(ne_result, None));
244
245 Self::from_expr(null_aware_expr.into_expr(), None)
246 }
247
248 pub fn eq_null_safe(&self, other: &Column) -> Column {
251 use crate::functions::{lit_bool, when};
252
253 let left_null = self.expr().clone().is_null();
254 let right_null = other.expr().clone().is_null();
255 let both_null = left_null.clone().and(right_null.clone());
256 let either_null = left_null.clone().or(right_null.clone());
257
258 let eq_result = self.expr().clone().eq(other.expr().clone());
260
261 when(&Self::from_expr(both_null, None))
265 .then(&lit_bool(true))
266 .otherwise(
267 &when(&Self::from_expr(either_null, None))
268 .then(&lit_bool(false))
269 .otherwise(&Self::from_expr(eq_result, None)),
270 )
271 }
272
273 pub fn gt_pyspark(&self, other: &Column) -> Column {
276 let left_null = self.expr().clone().is_null();
278 let right_null = other.expr().clone().is_null();
279 let either_null = left_null.clone().or(right_null.clone());
280
281 let gt_result = self.expr().clone().gt(other.expr().clone());
283
284 let null_boolean = Self::null_boolean_expr();
286 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
287 .then(&Self::from_expr(null_boolean, None))
288 .otherwise(&Self::from_expr(gt_result, None));
289
290 Self::from_expr(null_aware_expr.into_expr(), None)
291 }
292
293 pub fn ge_pyspark(&self, other: &Column) -> Column {
296 let left_null = self.expr().clone().is_null();
298 let right_null = other.expr().clone().is_null();
299 let either_null = left_null.clone().or(right_null.clone());
300
301 let ge_result = self.expr().clone().gt_eq(other.expr().clone());
303
304 let null_boolean = Self::null_boolean_expr();
306 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
307 .then(&Self::from_expr(null_boolean, None))
308 .otherwise(&Self::from_expr(ge_result, None));
309
310 Self::from_expr(null_aware_expr.into_expr(), None)
311 }
312
313 pub fn lt_pyspark(&self, other: &Column) -> Column {
316 let left_null = self.expr().clone().is_null();
318 let right_null = other.expr().clone().is_null();
319 let either_null = left_null.clone().or(right_null.clone());
320
321 let lt_result = self.expr().clone().lt(other.expr().clone());
323
324 let null_boolean = Self::null_boolean_expr();
326 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
327 .then(&Self::from_expr(null_boolean, None))
328 .otherwise(&Self::from_expr(lt_result, None));
329
330 Self::from_expr(null_aware_expr.into_expr(), None)
331 }
332
333 pub fn le_pyspark(&self, other: &Column) -> Column {
336 let left_null = self.expr().clone().is_null();
338 let right_null = other.expr().clone().is_null();
339 let either_null = left_null.clone().or(right_null.clone());
340
341 let le_result = self.expr().clone().lt_eq(other.expr().clone());
343
344 let null_boolean = Self::null_boolean_expr();
346 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
347 .then(&Self::from_expr(null_boolean, None))
348 .otherwise(&Self::from_expr(le_result, None));
349
350 Self::from_expr(null_aware_expr.into_expr(), None)
351 }
352
353 pub fn gt(&self, other: Expr) -> Column {
359 Self::from_expr(self.expr().clone().gt(other), None)
360 }
361
362 pub fn gt_eq(&self, other: Expr) -> Column {
364 Self::from_expr(self.expr().clone().gt_eq(other), None)
365 }
366
367 pub fn lt(&self, other: Expr) -> Column {
369 Self::from_expr(self.expr().clone().lt(other), None)
370 }
371
372 pub fn lt_eq(&self, other: Expr) -> Column {
374 Self::from_expr(self.expr().clone().lt_eq(other), None)
375 }
376
377 pub fn eq(&self, other: Expr) -> Column {
379 Self::from_expr(self.expr().clone().eq(other), None)
380 }
381
382 pub fn neq(&self, other: Expr) -> Column {
384 Self::from_expr(self.expr().clone().neq(other), None)
385 }
386
387 pub fn upper(&self) -> Column {
391 Self::from_expr(self.expr().clone().str().to_uppercase(), None)
392 }
393
394 pub fn lower(&self) -> Column {
396 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
397 }
398
399 pub fn lcase(&self) -> Column {
401 self.lower()
402 }
403
404 pub fn ucase(&self) -> Column {
406 self.upper()
407 }
408
409 pub fn substr(&self, start: i64, length: Option<i64>) -> Column {
411 use polars::prelude::*;
412 let offset = (start - 1).max(0);
413 let offset_expr = lit(offset);
414 let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX)); Self::from_expr(
416 self.expr().clone().str().slice(offset_expr, length_expr),
417 None,
418 )
419 }
420
421 pub fn length(&self) -> Column {
423 Self::from_expr(self.expr().clone().str().len_chars(), None)
424 }
425
426 pub fn bit_length(&self) -> Column {
428 use polars::prelude::*;
429 let len_bytes = self.expr().clone().str().len_bytes().cast(DataType::Int32);
430 Self::from_expr(len_bytes * lit(8i32), None)
431 }
432
433 pub fn octet_length(&self) -> Column {
435 use polars::prelude::*;
436 Self::from_expr(
437 self.expr().clone().str().len_bytes().cast(DataType::Int32),
438 None,
439 )
440 }
441
442 pub fn char_length(&self) -> Column {
444 self.length()
445 }
446
447 pub fn character_length(&self) -> Column {
449 self.length()
450 }
451
452 pub fn encode(&self, charset: &str) -> Column {
454 let charset = charset.to_string();
455 let expr = self.expr().clone().map(
456 move |s| crate::udfs::apply_encode(s, &charset),
457 GetOutput::from_type(DataType::String),
458 );
459 Self::from_expr(expr, None)
460 }
461
462 pub fn decode(&self, charset: &str) -> Column {
464 let charset = charset.to_string();
465 let expr = self.expr().clone().map(
466 move |s| crate::udfs::apply_decode(s, &charset),
467 GetOutput::from_type(DataType::String),
468 );
469 Self::from_expr(expr, None)
470 }
471
472 pub fn to_binary(&self, fmt: &str) -> Column {
474 let fmt = fmt.to_string();
475 let expr = self.expr().clone().map(
476 move |s| crate::udfs::apply_to_binary(s, &fmt),
477 GetOutput::from_type(DataType::String),
478 );
479 Self::from_expr(expr, None)
480 }
481
482 pub fn try_to_binary(&self, fmt: &str) -> Column {
484 let fmt = fmt.to_string();
485 let expr = self.expr().clone().map(
486 move |s| crate::udfs::apply_try_to_binary(s, &fmt),
487 GetOutput::from_type(DataType::String),
488 );
489 Self::from_expr(expr, None)
490 }
491
492 pub fn aes_encrypt(&self, key: &str) -> Column {
494 let key = key.to_string();
495 let expr = self.expr().clone().map(
496 move |s| crate::udfs::apply_aes_encrypt(s, &key),
497 GetOutput::from_type(DataType::String),
498 );
499 Self::from_expr(expr, None)
500 }
501
502 pub fn aes_decrypt(&self, key: &str) -> Column {
504 let key = key.to_string();
505 let expr = self.expr().clone().map(
506 move |s| crate::udfs::apply_aes_decrypt(s, &key),
507 GetOutput::from_type(DataType::String),
508 );
509 Self::from_expr(expr, None)
510 }
511
512 pub fn try_aes_decrypt(&self, key: &str) -> Column {
514 let key = key.to_string();
515 let expr = self.expr().clone().map(
516 move |s| crate::udfs::apply_try_aes_decrypt(s, &key),
517 GetOutput::from_type(DataType::String),
518 );
519 Self::from_expr(expr, None)
520 }
521
522 pub fn typeof_(&self) -> Column {
524 Self::from_expr(
525 self.expr().clone().map(
526 crate::udfs::apply_typeof,
527 GetOutput::from_type(DataType::String),
528 ),
529 None,
530 )
531 }
532
533 pub fn trim(&self) -> Column {
535 use polars::prelude::*;
536 Self::from_expr(self.expr().clone().str().strip_chars(lit(" \t\n\r")), None)
537 }
538
539 pub fn ltrim(&self) -> Column {
541 use polars::prelude::*;
542 Self::from_expr(
543 self.expr().clone().str().strip_chars_start(lit(" \t\n\r")),
544 None,
545 )
546 }
547
548 pub fn rtrim(&self) -> Column {
550 use polars::prelude::*;
551 Self::from_expr(
552 self.expr().clone().str().strip_chars_end(lit(" \t\n\r")),
553 None,
554 )
555 }
556
557 pub fn btrim(&self, trim_str: Option<&str>) -> Column {
559 use polars::prelude::*;
560 let chars = trim_str.unwrap_or(" \t\n\r");
561 Self::from_expr(self.expr().clone().str().strip_chars(lit(chars)), None)
562 }
563
564 pub fn locate(&self, substr: &str, pos: i64) -> Column {
566 use polars::prelude::*;
567 if substr.is_empty() {
568 return Self::from_expr(lit(1i64), None);
569 }
570 let start = (pos - 1).max(0);
571 let slice_expr = self.expr().clone().str().slice(lit(start), lit(i64::MAX));
572 let found = slice_expr.str().find_literal(lit(substr.to_string()));
573 Self::from_expr(
574 (found.cast(DataType::Int64) + lit(start + 1)).fill_null(lit(0i64)),
575 None,
576 )
577 }
578
579 pub fn conv(&self, from_base: i32, to_base: i32) -> Column {
581 let expr = self.expr().clone().map(
582 move |s| crate::udfs::apply_conv(s, from_base, to_base),
583 GetOutput::from_type(DataType::String),
584 );
585 Self::from_expr(expr, None)
586 }
587
588 pub fn hex(&self) -> Column {
590 let expr = self.expr().clone().map(
591 crate::udfs::apply_hex,
592 GetOutput::from_type(DataType::String),
593 );
594 Self::from_expr(expr, None)
595 }
596
597 pub fn unhex(&self) -> Column {
599 let expr = self.expr().clone().map(
600 crate::udfs::apply_unhex,
601 GetOutput::from_type(DataType::String),
602 );
603 Self::from_expr(expr, None)
604 }
605
606 pub fn bin(&self) -> Column {
608 let expr = self.expr().clone().map(
609 crate::udfs::apply_bin,
610 GetOutput::from_type(DataType::String),
611 );
612 Self::from_expr(expr, None)
613 }
614
615 pub fn getbit(&self, pos: i64) -> Column {
617 let expr = self.expr().clone().map(
618 move |s| crate::udfs::apply_getbit(s, pos),
619 GetOutput::from_type(DataType::Int64),
620 );
621 Self::from_expr(expr, None)
622 }
623
624 pub fn bit_and(&self, other: &Column) -> Column {
626 let args = [other.expr().clone()];
627 let expr = self.expr().clone().cast(DataType::Int64).map_many(
628 crate::udfs::apply_bit_and,
629 &args,
630 GetOutput::from_type(DataType::Int64),
631 );
632 Self::from_expr(expr, None)
633 }
634
635 pub fn bit_or(&self, other: &Column) -> Column {
637 let args = [other.expr().clone()];
638 let expr = self.expr().clone().cast(DataType::Int64).map_many(
639 crate::udfs::apply_bit_or,
640 &args,
641 GetOutput::from_type(DataType::Int64),
642 );
643 Self::from_expr(expr, None)
644 }
645
646 pub fn bit_xor(&self, other: &Column) -> Column {
648 let args = [other.expr().clone()];
649 let expr = self.expr().clone().cast(DataType::Int64).map_many(
650 crate::udfs::apply_bit_xor,
651 &args,
652 GetOutput::from_type(DataType::Int64),
653 );
654 Self::from_expr(expr, None)
655 }
656
657 pub fn bit_count(&self) -> Column {
659 let expr = self.expr().clone().map(
660 crate::udfs::apply_bit_count,
661 GetOutput::from_type(DataType::Int64),
662 );
663 Self::from_expr(expr, None)
664 }
665
666 pub fn assert_true(&self, err_msg: Option<&str>) -> Column {
669 let msg = err_msg.map(String::from);
670 let expr = self.expr().clone().map(
671 move |c| crate::udfs::apply_assert_true(c, msg.as_deref()),
672 GetOutput::same_type(),
673 );
674 Self::from_expr(expr, None)
675 }
676
677 pub fn bitwise_not(&self) -> Column {
679 let expr = (lit(-1i64) - self.expr().clone().cast(DataType::Int64)).cast(DataType::Int64);
681 Self::from_expr(expr, None)
682 }
683
684 pub fn str_to_map(&self, pair_delim: &str, key_value_delim: &str) -> Column {
686 let pair_delim = pair_delim.to_string();
687 let key_value_delim = key_value_delim.to_string();
688 let expr = self.expr().clone().map(
689 move |s| crate::udfs::apply_str_to_map(s, &pair_delim, &key_value_delim),
690 GetOutput::same_type(),
691 );
692 Self::from_expr(expr, None)
693 }
694
695 pub fn regexp_extract(&self, pattern: &str, group_index: usize) -> Column {
697 use polars::prelude::*;
698 let pat = pattern.to_string();
699 Self::from_expr(
700 self.expr().clone().str().extract(lit(pat), group_index),
701 None,
702 )
703 }
704
705 pub fn regexp_replace(&self, pattern: &str, replacement: &str) -> Column {
707 use polars::prelude::*;
708 let pat = pattern.to_string();
709 let rep = replacement.to_string();
710 Self::from_expr(
711 self.expr().clone().str().replace(lit(pat), lit(rep), false),
712 None,
713 )
714 }
715
716 pub fn left(&self, n: i64) -> Column {
718 use polars::prelude::*;
719 let len = n.max(0) as u32;
720 Self::from_expr(
721 self.expr().clone().str().slice(lit(0i64), lit(len as i64)),
722 None,
723 )
724 }
725
726 pub fn right(&self, n: i64) -> Column {
728 use polars::prelude::*;
729 let n_val = n.max(0);
730 let n_expr = lit(n_val);
731 let len_chars = self.expr().clone().str().len_chars().cast(DataType::Int64);
732 let start = when((len_chars.clone() - n_expr.clone()).lt_eq(lit(0i64)))
733 .then(lit(0i64))
734 .otherwise(len_chars - n_expr.clone());
735 Self::from_expr(self.expr().clone().str().slice(start, n_expr), None)
736 }
737
738 pub fn replace(&self, search: &str, replacement: &str) -> Column {
740 use polars::prelude::*;
741 Self::from_expr(
742 self.expr().clone().str().replace_all(
743 lit(search.to_string()),
744 lit(replacement.to_string()),
745 true,
746 ),
747 None,
748 )
749 }
750
751 pub fn startswith(&self, prefix: &str) -> Column {
753 use polars::prelude::*;
754 Self::from_expr(
755 self.expr()
756 .clone()
757 .str()
758 .starts_with(lit(prefix.to_string())),
759 None,
760 )
761 }
762
763 pub fn endswith(&self, suffix: &str) -> Column {
765 use polars::prelude::*;
766 Self::from_expr(
767 self.expr().clone().str().ends_with(lit(suffix.to_string())),
768 None,
769 )
770 }
771
772 pub fn contains(&self, substring: &str) -> Column {
774 use polars::prelude::*;
775 Self::from_expr(
776 self.expr()
777 .clone()
778 .str()
779 .contains(lit(substring.to_string()), true),
780 None,
781 )
782 }
783
784 pub fn split(&self, delimiter: &str) -> Column {
787 use polars::prelude::*;
788 Self::from_expr(
789 self.expr().clone().str().split(lit(delimiter.to_string())),
790 None,
791 )
792 }
793
794 pub fn initcap(&self) -> Column {
797 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
798 }
799
800 pub fn regexp_extract_all(&self, pattern: &str) -> Column {
802 use polars::prelude::*;
803 Self::from_expr(
804 self.expr()
805 .clone()
806 .str()
807 .extract_all(lit(pattern.to_string())),
808 None,
809 )
810 }
811
812 pub fn regexp_like(&self, pattern: &str) -> Column {
814 use polars::prelude::*;
815 Self::from_expr(
816 self.expr()
817 .clone()
818 .str()
819 .contains(lit(pattern.to_string()), false),
820 None,
821 )
822 }
823
824 pub fn regexp_count(&self, pattern: &str) -> Column {
826 use polars::prelude::*;
827 Self::from_expr(
828 self.expr()
829 .clone()
830 .str()
831 .count_matches(lit(pattern.to_string()), false)
832 .cast(DataType::Int64),
833 None,
834 )
835 }
836
837 pub fn regexp_substr(&self, pattern: &str) -> Column {
839 self.regexp_extract(pattern, 0)
840 }
841
842 pub fn regexp_instr(&self, pattern: &str, group_idx: Option<usize>) -> Column {
844 let idx = group_idx.unwrap_or(0);
845 let pattern = pattern.to_string();
846 let expr = self.expr().clone().map(
847 move |s| crate::udfs::apply_regexp_instr(s, pattern.clone(), idx),
848 GetOutput::from_type(DataType::Int64),
849 );
850 Self::from_expr(expr, None)
851 }
852
853 pub fn find_in_set(&self, set_column: &Column) -> Column {
855 let args = [set_column.expr().clone()];
856 let expr = self.expr().clone().map_many(
857 crate::udfs::apply_find_in_set,
858 &args,
859 GetOutput::from_type(DataType::Int64),
860 );
861 Self::from_expr(expr, None)
862 }
863
864 pub fn repeat(&self, n: i32) -> Column {
866 use polars::prelude::*;
867 Self::from_expr(
869 self.expr()
870 .clone()
871 .repeat_by(lit(n as u32))
872 .list()
873 .join(lit(""), false),
874 None,
875 )
876 }
877
878 pub fn reverse(&self) -> Column {
880 Self::from_expr(self.expr().clone().str().reverse(), None)
881 }
882
883 pub fn instr(&self, substr: &str) -> Column {
885 use polars::prelude::*;
886 let found = self
887 .expr()
888 .clone()
889 .str()
890 .find_literal(lit(substr.to_string()));
891 Self::from_expr(
893 (found.cast(DataType::Int64) + lit(1i64)).fill_null(lit(0i64)),
894 None,
895 )
896 }
897
898 pub fn lpad(&self, length: i32, pad: &str) -> Column {
900 let pad_str = if pad.is_empty() { " " } else { pad };
901 let fill = pad_str.chars().next().unwrap_or(' ');
902 Self::from_expr(
903 self.expr().clone().str().pad_start(length as usize, fill),
904 None,
905 )
906 }
907
908 pub fn rpad(&self, length: i32, pad: &str) -> Column {
910 let pad_str = if pad.is_empty() { " " } else { pad };
911 let fill = pad_str.chars().next().unwrap_or(' ');
912 Self::from_expr(
913 self.expr().clone().str().pad_end(length as usize, fill),
914 None,
915 )
916 }
917
918 pub fn translate(&self, from_str: &str, to_str: &str) -> Column {
920 use polars::prelude::*;
921 let mut e = self.expr().clone();
922 let from_chars: Vec<char> = from_str.chars().collect();
923 let to_chars: Vec<char> = to_str.chars().collect();
924 for (i, fc) in from_chars.iter().enumerate() {
925 let f = fc.to_string();
926 let t = to_chars
927 .get(i)
928 .map(|c| c.to_string())
929 .unwrap_or_else(String::new); e = e.str().replace_all(lit(f), lit(t), true);
931 }
932 Self::from_expr(e, None)
933 }
934
935 pub fn mask(
938 &self,
939 upper_char: Option<char>,
940 lower_char: Option<char>,
941 digit_char: Option<char>,
942 other_char: Option<char>,
943 ) -> Column {
944 use polars::prelude::*;
945 let upper = upper_char.unwrap_or('X').to_string();
946 let lower = lower_char.unwrap_or('x').to_string();
947 let digit = digit_char.unwrap_or('n').to_string();
948 let other = other_char.map(|c| c.to_string());
949 let mut e = self
950 .expr()
951 .clone()
952 .str()
953 .replace_all(lit("[A-Z]".to_string()), lit(upper), false)
954 .str()
955 .replace_all(lit("[a-z]".to_string()), lit(lower), false)
956 .str()
957 .replace_all(lit(r"\d".to_string()), lit(digit), false);
958 if let Some(o) = other {
959 e = e
960 .str()
961 .replace_all(lit("[^A-Za-z0-9]".to_string()), lit(o), false);
962 }
963 Self::from_expr(e, None)
964 }
965
966 pub fn split_part(&self, delimiter: &str, part_num: i64) -> Column {
969 use polars::prelude::*;
970 if part_num == 0 {
971 return Self::from_expr(Expr::Literal(LiteralValue::Null), None);
972 }
973 let use_regex = delimiter == "|";
974 if use_regex {
975 let pattern = delimiter.to_string();
976 let part = part_num;
977 let get_expr = self.expr().clone().map(
978 move |col| crate::udfs::apply_split_part_regex(col, &pattern, part),
979 GetOutput::from_type(DataType::String),
980 );
981 let expr = when(self.expr().clone().is_null())
982 .then(Expr::Literal(LiteralValue::Null))
983 .otherwise(get_expr.fill_null(lit("")));
984 return Self::from_expr(expr, None);
985 }
986 let delim = delimiter.to_string();
987 let split_expr = self.expr().clone().str().split(lit(delim));
988 let index = if part_num > 0 {
989 lit(part_num - 1)
990 } else {
991 lit(part_num)
992 };
993 let get_expr = split_expr.list().get(index, true).fill_null(lit(""));
994 let expr = when(self.expr().clone().is_null())
995 .then(Expr::Literal(LiteralValue::Null))
996 .otherwise(get_expr);
997 Self::from_expr(expr, None)
998 }
999
1000 pub fn substring_index(&self, delimiter: &str, count: i64) -> Column {
1002 use polars::prelude::*;
1003 let delim = delimiter.to_string();
1004 let split_expr = self.expr().clone().str().split(lit(delim.clone()));
1005 let n = count.unsigned_abs() as i64;
1006 let expr = if count > 0 {
1007 split_expr
1008 .clone()
1009 .list()
1010 .slice(lit(0i64), lit(n))
1011 .list()
1012 .join(lit(delim), false)
1013 } else {
1014 let len = split_expr.clone().list().len();
1015 let start = when(len.clone().gt(lit(n)))
1016 .then(len.clone() - lit(n))
1017 .otherwise(lit(0i64));
1018 let slice_len = when(len.clone().gt(lit(n))).then(lit(n)).otherwise(len);
1019 split_expr
1020 .list()
1021 .slice(start, slice_len)
1022 .list()
1023 .join(lit(delim), false)
1024 };
1025 Self::from_expr(expr, None)
1026 }
1027
1028 pub fn soundex(&self) -> Column {
1030 let expr = self
1031 .expr()
1032 .clone()
1033 .map(crate::udfs::apply_soundex, GetOutput::same_type());
1034 Self::from_expr(expr, None)
1035 }
1036
1037 pub fn levenshtein(&self, other: &Column) -> Column {
1039 let args = [other.expr().clone()];
1040 let expr = self.expr().clone().map_many(
1041 crate::udfs::apply_levenshtein,
1042 &args,
1043 GetOutput::from_type(DataType::Int64),
1044 );
1045 Self::from_expr(expr, None)
1046 }
1047
1048 pub fn crc32(&self) -> Column {
1050 let expr = self.expr().clone().map(
1051 crate::udfs::apply_crc32,
1052 GetOutput::from_type(DataType::Int64),
1053 );
1054 Self::from_expr(expr, None)
1055 }
1056
1057 pub fn xxhash64(&self) -> Column {
1059 let expr = self.expr().clone().map(
1060 crate::udfs::apply_xxhash64,
1061 GetOutput::from_type(DataType::Int64),
1062 );
1063 Self::from_expr(expr, None)
1064 }
1065
1066 pub fn ascii(&self) -> Column {
1068 let expr = self.expr().clone().map(
1069 crate::udfs::apply_ascii,
1070 GetOutput::from_type(DataType::Int32),
1071 );
1072 Self::from_expr(expr, None)
1073 }
1074
1075 pub fn format_number(&self, decimals: u32) -> Column {
1077 let expr = self.expr().clone().map(
1078 move |s| crate::udfs::apply_format_number(s, decimals),
1079 GetOutput::from_type(DataType::String),
1080 );
1081 Self::from_expr(expr, None)
1082 }
1083
1084 pub fn char(&self) -> Column {
1086 let expr = self.expr().clone().map(
1087 crate::udfs::apply_char,
1088 GetOutput::from_type(DataType::String),
1089 );
1090 Self::from_expr(expr, None)
1091 }
1092
1093 pub fn chr(&self) -> Column {
1095 self.char()
1096 }
1097
1098 pub fn base64(&self) -> Column {
1100 let expr = self
1101 .expr()
1102 .clone()
1103 .map(crate::udfs::apply_base64, GetOutput::same_type());
1104 Self::from_expr(expr, None)
1105 }
1106
1107 pub fn unbase64(&self) -> Column {
1109 let expr = self
1110 .expr()
1111 .clone()
1112 .map(crate::udfs::apply_unbase64, GetOutput::same_type());
1113 Self::from_expr(expr, None)
1114 }
1115
1116 pub fn sha1(&self) -> Column {
1118 let expr = self
1119 .expr()
1120 .clone()
1121 .map(crate::udfs::apply_sha1, GetOutput::same_type());
1122 Self::from_expr(expr, None)
1123 }
1124
1125 pub fn sha2(&self, bit_length: i32) -> Column {
1127 let expr = self.expr().clone().map(
1128 move |s| crate::udfs::apply_sha2(s, bit_length),
1129 GetOutput::same_type(),
1130 );
1131 Self::from_expr(expr, None)
1132 }
1133
1134 pub fn md5(&self) -> Column {
1136 let expr = self
1137 .expr()
1138 .clone()
1139 .map(crate::udfs::apply_md5, GetOutput::same_type());
1140 Self::from_expr(expr, None)
1141 }
1142
1143 pub fn overlay(&self, replace: &str, pos: i64, length: i64) -> Column {
1145 use polars::prelude::*;
1146 let pos = pos.max(1);
1147 let replace_len = length.max(0);
1148 let start_left = 0i64;
1149 let len_left = (pos - 1).max(0);
1150 let start_right = (pos - 1 + replace_len).max(0);
1151 let len_right = 1_000_000i64; let left = self
1153 .expr()
1154 .clone()
1155 .str()
1156 .slice(lit(start_left), lit(len_left));
1157 let mid = lit(replace.to_string());
1158 let right = self
1159 .expr()
1160 .clone()
1161 .str()
1162 .slice(lit(start_right), lit(len_right));
1163 let exprs = [left, mid, right];
1164 let concat_expr = polars::prelude::concat_str(&exprs, "", false);
1165 Self::from_expr(concat_expr, None)
1166 }
1167
1168 pub fn abs(&self) -> Column {
1172 Self::from_expr(self.expr().clone().abs(), None)
1173 }
1174
1175 pub fn ceil(&self) -> Column {
1177 Self::from_expr(self.expr().clone().ceil(), None)
1178 }
1179
1180 pub fn ceiling(&self) -> Column {
1182 self.ceil()
1183 }
1184
1185 pub fn floor(&self) -> Column {
1187 Self::from_expr(self.expr().clone().floor(), None)
1188 }
1189
1190 pub fn round(&self, decimals: u32) -> Column {
1192 Self::from_expr(self.expr().clone().round(decimals), None)
1193 }
1194
1195 pub fn bround(&self, scale: i32) -> Column {
1197 let expr = self.expr().clone().map(
1198 move |s| crate::udfs::apply_bround(s, scale),
1199 GetOutput::from_type(DataType::Float64),
1200 );
1201 Self::from_expr(expr, None)
1202 }
1203
1204 pub fn negate(&self) -> Column {
1206 use polars::prelude::*;
1207 Self::from_expr(self.expr().clone() * lit(-1), None)
1208 }
1209
1210 pub fn multiply(&self, other: &Column) -> Column {
1212 Self::from_expr(self.expr().clone() * other.expr().clone(), None)
1213 }
1214
1215 pub fn sqrt(&self) -> Column {
1217 Self::from_expr(self.expr().clone().sqrt(), None)
1218 }
1219
1220 pub fn pow(&self, exp: i64) -> Column {
1222 use polars::prelude::*;
1223 Self::from_expr(self.expr().clone().pow(lit(exp)), None)
1224 }
1225
1226 pub fn power(&self, exp: i64) -> Column {
1228 self.pow(exp)
1229 }
1230
1231 pub fn exp(&self) -> Column {
1233 Self::from_expr(self.expr().clone().exp(), None)
1234 }
1235
1236 pub fn log(&self) -> Column {
1238 Self::from_expr(self.expr().clone().log(std::f64::consts::E), None)
1239 }
1240
1241 pub fn ln(&self) -> Column {
1243 self.log()
1244 }
1245
1246 pub fn sin(&self) -> Column {
1248 let expr = self.expr().clone().map(
1249 crate::udfs::apply_sin,
1250 GetOutput::from_type(DataType::Float64),
1251 );
1252 Self::from_expr(expr, None)
1253 }
1254
1255 pub fn cos(&self) -> Column {
1257 let expr = self.expr().clone().map(
1258 crate::udfs::apply_cos,
1259 GetOutput::from_type(DataType::Float64),
1260 );
1261 Self::from_expr(expr, None)
1262 }
1263
1264 pub fn tan(&self) -> Column {
1266 let expr = self.expr().clone().map(
1267 crate::udfs::apply_tan,
1268 GetOutput::from_type(DataType::Float64),
1269 );
1270 Self::from_expr(expr, None)
1271 }
1272
1273 pub fn cot(&self) -> Column {
1275 let expr = self.expr().clone().map(
1276 crate::udfs::apply_cot,
1277 GetOutput::from_type(DataType::Float64),
1278 );
1279 Self::from_expr(expr, None)
1280 }
1281
1282 pub fn csc(&self) -> Column {
1284 let expr = self.expr().clone().map(
1285 crate::udfs::apply_csc,
1286 GetOutput::from_type(DataType::Float64),
1287 );
1288 Self::from_expr(expr, None)
1289 }
1290
1291 pub fn sec(&self) -> Column {
1293 let expr = self.expr().clone().map(
1294 crate::udfs::apply_sec,
1295 GetOutput::from_type(DataType::Float64),
1296 );
1297 Self::from_expr(expr, None)
1298 }
1299
1300 pub fn asin(&self) -> Column {
1302 let expr = self.expr().clone().map(
1303 crate::udfs::apply_asin,
1304 GetOutput::from_type(DataType::Float64),
1305 );
1306 Self::from_expr(expr, None)
1307 }
1308
1309 pub fn acos(&self) -> Column {
1311 let expr = self.expr().clone().map(
1312 crate::udfs::apply_acos,
1313 GetOutput::from_type(DataType::Float64),
1314 );
1315 Self::from_expr(expr, None)
1316 }
1317
1318 pub fn atan(&self) -> Column {
1320 let expr = self.expr().clone().map(
1321 crate::udfs::apply_atan,
1322 GetOutput::from_type(DataType::Float64),
1323 );
1324 Self::from_expr(expr, None)
1325 }
1326
1327 pub fn atan2(&self, x: &Column) -> Column {
1329 let args = [x.expr().clone()];
1330 let expr = self.expr().clone().map_many(
1331 crate::udfs::apply_atan2,
1332 &args,
1333 GetOutput::from_type(DataType::Float64),
1334 );
1335 Self::from_expr(expr, None)
1336 }
1337
1338 pub fn degrees(&self) -> Column {
1340 let expr = self.expr().clone().map(
1341 crate::udfs::apply_degrees,
1342 GetOutput::from_type(DataType::Float64),
1343 );
1344 Self::from_expr(expr, None)
1345 }
1346
1347 pub fn to_degrees(&self) -> Column {
1349 self.degrees()
1350 }
1351
1352 pub fn radians(&self) -> Column {
1354 let expr = self.expr().clone().map(
1355 crate::udfs::apply_radians,
1356 GetOutput::from_type(DataType::Float64),
1357 );
1358 Self::from_expr(expr, None)
1359 }
1360
1361 pub fn to_radians(&self) -> Column {
1363 self.radians()
1364 }
1365
1366 pub fn signum(&self) -> Column {
1368 let expr = self.expr().clone().map(
1369 crate::udfs::apply_signum,
1370 GetOutput::from_type(DataType::Float64),
1371 );
1372 Self::from_expr(expr, None)
1373 }
1374
1375 pub fn cosh(&self) -> Column {
1377 let expr = self.expr().clone().map(
1378 crate::udfs::apply_cosh,
1379 GetOutput::from_type(DataType::Float64),
1380 );
1381 Self::from_expr(expr, None)
1382 }
1383 pub fn sinh(&self) -> Column {
1385 let expr = self.expr().clone().map(
1386 crate::udfs::apply_sinh,
1387 GetOutput::from_type(DataType::Float64),
1388 );
1389 Self::from_expr(expr, None)
1390 }
1391 pub fn tanh(&self) -> Column {
1393 let expr = self.expr().clone().map(
1394 crate::udfs::apply_tanh,
1395 GetOutput::from_type(DataType::Float64),
1396 );
1397 Self::from_expr(expr, None)
1398 }
1399 pub fn acosh(&self) -> Column {
1401 let expr = self.expr().clone().map(
1402 crate::udfs::apply_acosh,
1403 GetOutput::from_type(DataType::Float64),
1404 );
1405 Self::from_expr(expr, None)
1406 }
1407 pub fn asinh(&self) -> Column {
1409 let expr = self.expr().clone().map(
1410 crate::udfs::apply_asinh,
1411 GetOutput::from_type(DataType::Float64),
1412 );
1413 Self::from_expr(expr, None)
1414 }
1415 pub fn atanh(&self) -> Column {
1417 let expr = self.expr().clone().map(
1418 crate::udfs::apply_atanh,
1419 GetOutput::from_type(DataType::Float64),
1420 );
1421 Self::from_expr(expr, None)
1422 }
1423 pub fn cbrt(&self) -> Column {
1425 let expr = self.expr().clone().map(
1426 crate::udfs::apply_cbrt,
1427 GetOutput::from_type(DataType::Float64),
1428 );
1429 Self::from_expr(expr, None)
1430 }
1431 pub fn expm1(&self) -> Column {
1433 let expr = self.expr().clone().map(
1434 crate::udfs::apply_expm1,
1435 GetOutput::from_type(DataType::Float64),
1436 );
1437 Self::from_expr(expr, None)
1438 }
1439 pub fn log1p(&self) -> Column {
1441 let expr = self.expr().clone().map(
1442 crate::udfs::apply_log1p,
1443 GetOutput::from_type(DataType::Float64),
1444 );
1445 Self::from_expr(expr, None)
1446 }
1447 pub fn log10(&self) -> Column {
1449 let expr = self.expr().clone().map(
1450 crate::udfs::apply_log10,
1451 GetOutput::from_type(DataType::Float64),
1452 );
1453 Self::from_expr(expr, None)
1454 }
1455 pub fn log2(&self) -> Column {
1457 let expr = self.expr().clone().map(
1458 crate::udfs::apply_log2,
1459 GetOutput::from_type(DataType::Float64),
1460 );
1461 Self::from_expr(expr, None)
1462 }
1463 pub fn rint(&self) -> Column {
1465 let expr = self.expr().clone().map(
1466 crate::udfs::apply_rint,
1467 GetOutput::from_type(DataType::Float64),
1468 );
1469 Self::from_expr(expr, None)
1470 }
1471
1472 pub fn hypot(&self, other: &Column) -> Column {
1474 let xx = self.expr().clone() * self.expr().clone();
1475 let yy = other.expr().clone() * other.expr().clone();
1476 Self::from_expr((xx + yy).sqrt(), None)
1477 }
1478
1479 pub fn cast_to(&self, type_name: &str) -> Result<Column, String> {
1481 crate::functions::cast(self, type_name)
1482 }
1483
1484 pub fn try_cast_to(&self, type_name: &str) -> Result<Column, String> {
1486 crate::functions::try_cast(self, type_name)
1487 }
1488
1489 pub fn is_nan(&self) -> Column {
1491 Self::from_expr(self.expr().clone().is_nan(), None)
1492 }
1493
1494 pub fn year(&self) -> Column {
1498 Self::from_expr(self.expr().clone().dt().year(), None)
1499 }
1500
1501 pub fn month(&self) -> Column {
1503 Self::from_expr(self.expr().clone().dt().month(), None)
1504 }
1505
1506 pub fn day(&self) -> Column {
1508 Self::from_expr(self.expr().clone().dt().day(), None)
1509 }
1510
1511 pub fn dayofmonth(&self) -> Column {
1513 self.day()
1514 }
1515
1516 pub fn quarter(&self) -> Column {
1518 Self::from_expr(self.expr().clone().dt().quarter(), None)
1519 }
1520
1521 pub fn weekofyear(&self) -> Column {
1523 Self::from_expr(self.expr().clone().dt().week(), None)
1524 }
1525
1526 pub fn week(&self) -> Column {
1528 self.weekofyear()
1529 }
1530
1531 pub fn dayofweek(&self) -> Column {
1534 let w = self.expr().clone().dt().weekday();
1535 let dayofweek = (w % lit(7i32)) + lit(1i32); Self::from_expr(dayofweek, None)
1537 }
1538
1539 pub fn dayofyear(&self) -> Column {
1541 Self::from_expr(
1542 self.expr().clone().dt().ordinal_day().cast(DataType::Int32),
1543 None,
1544 )
1545 }
1546
1547 pub fn to_date(&self) -> Column {
1549 use polars::prelude::DataType;
1550 Self::from_expr(self.expr().clone().cast(DataType::Date), None)
1551 }
1552
1553 pub fn date_format(&self, format: &str) -> Column {
1555 Self::from_expr(self.expr().clone().dt().strftime(format), None)
1556 }
1557
1558 pub fn hour(&self) -> Column {
1560 Self::from_expr(self.expr().clone().dt().hour(), None)
1561 }
1562
1563 pub fn minute(&self) -> Column {
1565 Self::from_expr(self.expr().clone().dt().minute(), None)
1566 }
1567
1568 pub fn second(&self) -> Column {
1570 Self::from_expr(self.expr().clone().dt().second(), None)
1571 }
1572
1573 pub fn extract(&self, field: &str) -> Column {
1575 use polars::prelude::*;
1576 let e = self.expr().clone();
1577 let expr = match field.trim().to_lowercase().as_str() {
1578 "year" => e.dt().year(),
1579 "month" => e.dt().month(),
1580 "day" => e.dt().day(),
1581 "hour" => e.dt().hour(),
1582 "minute" => e.dt().minute(),
1583 "second" => e.dt().second(),
1584 "quarter" => e.dt().quarter(),
1585 "week" | "weekofyear" => e.dt().week(),
1586 "dayofweek" | "dow" => {
1587 let w = e.dt().weekday();
1588 (w % lit(7i32)) + lit(1i32)
1589 }
1590 "dayofyear" | "doy" => e.dt().ordinal_day().cast(DataType::Int32),
1591 _ => e.dt().year(), };
1593 Self::from_expr(expr, None)
1594 }
1595
1596 pub fn unix_micros(&self) -> Column {
1598 use polars::prelude::*;
1599 Self::from_expr(self.expr().clone().cast(DataType::Int64), None)
1600 }
1601
1602 pub fn unix_millis(&self) -> Column {
1604 use polars::prelude::*;
1605 let micros = self.expr().clone().cast(DataType::Int64);
1606 Self::from_expr(micros / lit(1000i64), None)
1607 }
1608
1609 pub fn unix_seconds(&self) -> Column {
1611 use polars::prelude::*;
1612 let micros = self.expr().clone().cast(DataType::Int64);
1613 Self::from_expr(micros / lit(1_000_000i64), None)
1614 }
1615
1616 pub fn dayname(&self) -> Column {
1618 let expr = self.expr().clone().map(
1619 crate::udfs::apply_dayname,
1620 GetOutput::from_type(DataType::String),
1621 );
1622 Self::from_expr(expr, None)
1623 }
1624
1625 pub fn weekday(&self) -> Column {
1627 let expr = self.expr().clone().map(
1628 crate::udfs::apply_weekday,
1629 GetOutput::from_type(DataType::Int32),
1630 );
1631 Self::from_expr(expr, None)
1632 }
1633
1634 pub fn date_add(&self, n: i32) -> Column {
1636 use polars::prelude::*;
1637 let date_expr = self.expr().clone().cast(DataType::Date);
1638 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1639 Self::from_expr(date_expr + dur, None)
1640 }
1641
1642 pub fn date_sub(&self, n: i32) -> Column {
1644 use polars::prelude::*;
1645 let date_expr = self.expr().clone().cast(DataType::Date);
1646 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1647 Self::from_expr(date_expr - dur, None)
1648 }
1649
1650 pub fn datediff(&self, other: &Column) -> Column {
1652 use polars::prelude::*;
1653 let start = self.expr().clone().cast(DataType::Date);
1654 let end = other.expr().clone().cast(DataType::Date);
1655 Self::from_expr((end - start).dt().total_days(), None)
1656 }
1657
1658 pub fn last_day(&self) -> Column {
1660 Self::from_expr(self.expr().clone().dt().month_end(), None)
1661 }
1662
1663 pub fn timestampadd(&self, unit: &str, amount: &Column) -> Column {
1665 use polars::prelude::*;
1666 let ts = self.expr().clone();
1667 let amt = amount.expr().clone().cast(DataType::Int64);
1668 let dur = match unit.trim().to_uppercase().as_str() {
1669 "DAY" | "DAYS" => duration(DurationArgs::new().with_days(amt)),
1670 "HOUR" | "HOURS" => duration(DurationArgs::new().with_hours(amt)),
1671 "MINUTE" | "MINUTES" => duration(DurationArgs::new().with_minutes(amt)),
1672 "SECOND" | "SECONDS" => duration(DurationArgs::new().with_seconds(amt)),
1673 "WEEK" | "WEEKS" => duration(DurationArgs::new().with_weeks(amt)),
1674 _ => duration(DurationArgs::new().with_days(amt)),
1675 };
1676 Self::from_expr(ts + dur, None)
1677 }
1678
1679 pub fn timestampdiff(&self, unit: &str, other: &Column) -> Column {
1681 let start = self.expr().clone();
1682 let end = other.expr().clone();
1683 let diff = end - start;
1684 let expr = match unit.trim().to_uppercase().as_str() {
1685 "HOUR" | "HOURS" => diff.dt().total_hours(),
1686 "MINUTE" | "MINUTES" => diff.dt().total_minutes(),
1687 "SECOND" | "SECONDS" => diff.dt().total_seconds(),
1688 "DAY" | "DAYS" => diff.dt().total_days(),
1689 _ => diff.dt().total_days(),
1690 };
1691 Self::from_expr(expr, None)
1692 }
1693
1694 pub fn from_utc_timestamp(&self, tz: &str) -> Column {
1696 let tz = tz.to_string();
1697 let expr = self.expr().clone().map(
1698 move |s| crate::udfs::apply_from_utc_timestamp(s, &tz),
1699 GetOutput::same_type(),
1700 );
1701 Self::from_expr(expr, None)
1702 }
1703
1704 pub fn to_utc_timestamp(&self, tz: &str) -> Column {
1706 let tz = tz.to_string();
1707 let expr = self.expr().clone().map(
1708 move |s| crate::udfs::apply_to_utc_timestamp(s, &tz),
1709 GetOutput::same_type(),
1710 );
1711 Self::from_expr(expr, None)
1712 }
1713
1714 pub fn trunc(&self, format: &str) -> Column {
1716 use polars::prelude::*;
1717 Self::from_expr(
1718 self.expr().clone().dt().truncate(lit(format.to_string())),
1719 None,
1720 )
1721 }
1722
1723 pub fn add_months(&self, n: i32) -> Column {
1725 let expr = self.expr().clone().map(
1726 move |col| crate::udfs::apply_add_months(col, n),
1727 GetOutput::from_type(DataType::Date),
1728 );
1729 Self::from_expr(expr, None)
1730 }
1731
1732 pub fn months_between(&self, start: &Column, round_off: bool) -> Column {
1735 let args = [start.expr().clone()];
1736 let expr = self.expr().clone().map_many(
1737 move |cols| crate::udfs::apply_months_between(cols, round_off),
1738 &args,
1739 GetOutput::from_type(DataType::Float64),
1740 );
1741 Self::from_expr(expr, None)
1742 }
1743
1744 pub fn next_day(&self, day_of_week: &str) -> Column {
1746 let day = day_of_week.to_string();
1747 let expr = self.expr().clone().map(
1748 move |col| crate::udfs::apply_next_day(col, &day),
1749 GetOutput::from_type(DataType::Date),
1750 );
1751 Self::from_expr(expr, None)
1752 }
1753
1754 pub fn unix_timestamp(&self, format: Option<&str>) -> Column {
1756 let fmt = format.map(String::from);
1757 let expr = self.expr().clone().map(
1758 move |col| crate::udfs::apply_unix_timestamp(col, fmt.as_deref()),
1759 GetOutput::from_type(DataType::Int64),
1760 );
1761 Self::from_expr(expr, None)
1762 }
1763
1764 pub fn from_unixtime(&self, format: Option<&str>) -> Column {
1766 let fmt = format.map(String::from);
1767 let expr = self.expr().clone().map(
1768 move |col| crate::udfs::apply_from_unixtime(col, fmt.as_deref()),
1769 GetOutput::from_type(DataType::String),
1770 );
1771 Self::from_expr(expr, None)
1772 }
1773
1774 pub fn timestamp_seconds(&self) -> Column {
1776 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1_000_000i64))
1777 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1778 Self::from_expr(expr, None)
1779 }
1780
1781 pub fn timestamp_millis(&self) -> Column {
1783 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1000i64))
1784 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1785 Self::from_expr(expr, None)
1786 }
1787
1788 pub fn timestamp_micros(&self) -> Column {
1790 let expr = self
1791 .expr()
1792 .clone()
1793 .cast(DataType::Int64)
1794 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
1795 Self::from_expr(expr, None)
1796 }
1797
1798 pub fn unix_date(&self) -> Column {
1800 let expr = self.expr().clone().map(
1801 crate::udfs::apply_unix_date,
1802 GetOutput::from_type(DataType::Int32),
1803 );
1804 Self::from_expr(expr, None)
1805 }
1806
1807 pub fn date_from_unix_date(&self) -> Column {
1809 let expr = self.expr().clone().map(
1810 crate::udfs::apply_date_from_unix_date,
1811 GetOutput::from_type(DataType::Date),
1812 );
1813 Self::from_expr(expr, None)
1814 }
1815
1816 pub fn pmod(&self, divisor: &Column) -> Column {
1818 let args = [divisor.expr().clone()];
1819 let expr = self.expr().clone().map_many(
1820 crate::udfs::apply_pmod,
1821 &args,
1822 GetOutput::from_type(DataType::Float64),
1823 );
1824 Self::from_expr(expr, None)
1825 }
1826
1827 pub fn factorial(&self) -> Column {
1829 let expr = self.expr().clone().map(
1830 crate::udfs::apply_factorial,
1831 GetOutput::from_type(DataType::Int64),
1832 );
1833 Self::from_expr(expr, None)
1834 }
1835
1836 pub fn over(&self, partition_by: &[&str]) -> Column {
1841 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1842 Self::from_expr(self.expr().clone().over(partition_exprs), None)
1843 }
1844
1845 pub fn rank(&self, descending: bool) -> Column {
1847 let opts = RankOptions {
1848 method: RankMethod::Min,
1849 descending,
1850 };
1851 Self::from_expr(self.expr().clone().rank(opts, None), None)
1852 }
1853
1854 pub fn dense_rank(&self, descending: bool) -> Column {
1856 let opts = RankOptions {
1857 method: RankMethod::Dense,
1858 descending,
1859 };
1860 Self::from_expr(self.expr().clone().rank(opts, None), None)
1861 }
1862
1863 pub fn row_number(&self, descending: bool) -> Column {
1865 let opts = RankOptions {
1866 method: RankMethod::Ordinal,
1867 descending,
1868 };
1869 Self::from_expr(self.expr().clone().rank(opts, None), None)
1870 }
1871
1872 pub fn lag(&self, n: i64) -> Column {
1874 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(n)), None)
1875 }
1876
1877 pub fn lead(&self, n: i64) -> Column {
1879 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(-n)), None)
1880 }
1881
1882 pub fn first_value(&self) -> Column {
1884 Self::from_expr(self.expr().clone().first(), None)
1885 }
1886
1887 pub fn last_value(&self) -> Column {
1889 Self::from_expr(self.expr().clone().last(), None)
1890 }
1891
1892 pub fn percent_rank(&self, partition_by: &[&str], descending: bool) -> Column {
1894 use polars::prelude::*;
1895 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1896 let opts = RankOptions {
1897 method: RankMethod::Min,
1898 descending,
1899 };
1900 let rank_expr = self
1901 .expr()
1902 .clone()
1903 .rank(opts, None)
1904 .over(partition_exprs.clone());
1905 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
1906 let rank_f = (rank_expr - lit(1i64)).cast(DataType::Float64);
1907 let count_f = (count_expr - lit(1i64)).cast(DataType::Float64);
1908 let pct = rank_f / count_f;
1909 Self::from_expr(pct, None)
1910 }
1911
1912 pub fn cume_dist(&self, partition_by: &[&str], descending: bool) -> Column {
1914 use polars::prelude::*;
1915 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1916 let opts = RankOptions {
1917 method: RankMethod::Ordinal,
1918 descending,
1919 };
1920 let row_num = self
1921 .expr()
1922 .clone()
1923 .rank(opts, None)
1924 .over(partition_exprs.clone());
1925 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
1926 let cume = row_num / count_expr;
1927 Self::from_expr(cume.cast(DataType::Float64), None)
1928 }
1929
1930 pub fn ntile(&self, n: u32, partition_by: &[&str], descending: bool) -> Column {
1932 use polars::prelude::*;
1933 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1934 let opts = RankOptions {
1935 method: RankMethod::Ordinal,
1936 descending,
1937 };
1938 let rank_expr = self
1939 .expr()
1940 .clone()
1941 .rank(opts, None)
1942 .over(partition_exprs.clone());
1943 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
1944 let n_expr = lit(n as f64);
1945 let rank_f = rank_expr.cast(DataType::Float64);
1946 let count_f = count_expr.cast(DataType::Float64);
1947 let bucket = (rank_f * n_expr / count_f).ceil();
1948 let clamped = bucket.clip(lit(1.0), lit(n as f64));
1949 Self::from_expr(clamped.cast(DataType::Int32), None)
1950 }
1951
1952 pub fn nth_value(&self, n: i64, partition_by: &[&str], descending: bool) -> Column {
1954 use polars::prelude::*;
1955 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
1956 let opts = RankOptions {
1957 method: RankMethod::Ordinal,
1958 descending,
1959 };
1960 let rank_expr = self
1961 .expr()
1962 .clone()
1963 .rank(opts, None)
1964 .over(partition_exprs.clone());
1965 let cond_col = Self::from_expr(rank_expr.eq(lit(n)), None);
1966 let null_col = Self::from_expr(Expr::Literal(LiteralValue::Null), None);
1967 let value_col = Self::from_expr(self.expr().clone(), None);
1968 let when_expr = crate::functions::when(&cond_col)
1969 .then(&value_col)
1970 .otherwise(&null_col)
1971 .into_expr();
1972 let windowed = when_expr.max().over(partition_exprs);
1973 Self::from_expr(windowed, None)
1974 }
1975
1976 pub fn array_size(&self) -> Column {
1978 use polars::prelude::*;
1979 Self::from_expr(
1980 self.expr().clone().list().len().cast(DataType::Int32),
1981 Some("size".to_string()),
1982 )
1983 }
1984
1985 pub fn cardinality(&self) -> Column {
1987 self.array_size()
1988 }
1989
1990 pub fn array_contains(&self, value: Expr) -> Column {
1992 Self::from_expr(self.expr().clone().list().contains(value), None)
1993 }
1994
1995 pub fn array_join(&self, separator: &str) -> Column {
1997 use polars::prelude::*;
1998 Self::from_expr(
1999 self.expr()
2000 .clone()
2001 .list()
2002 .join(lit(separator.to_string()), false),
2003 None,
2004 )
2005 }
2006
2007 pub fn array_max(&self) -> Column {
2009 Self::from_expr(self.expr().clone().list().max(), None)
2010 }
2011
2012 pub fn array_min(&self) -> Column {
2014 Self::from_expr(self.expr().clone().list().min(), None)
2015 }
2016
2017 pub fn element_at(&self, index: i64) -> Column {
2019 use polars::prelude::*;
2020 let idx = if index >= 1 { index - 1 } else { index };
2022 Self::from_expr(self.expr().clone().list().get(lit(idx), true), None)
2023 }
2024
2025 pub fn array_sort(&self) -> Column {
2027 use polars::prelude::SortOptions;
2028 let opts = SortOptions {
2029 descending: false,
2030 nulls_last: true,
2031 ..Default::default()
2032 };
2033 Self::from_expr(self.expr().clone().list().sort(opts), None)
2034 }
2035
2036 pub fn array_distinct(&self) -> Column {
2038 let expr = self.expr().clone().map(
2039 crate::udfs::apply_array_distinct_first_order,
2040 GetOutput::same_type(),
2041 );
2042 Self::from_expr(expr, None)
2043 }
2044
2045 pub fn mode(&self) -> Column {
2048 let vc = self
2052 .expr()
2053 .clone()
2054 .value_counts(true, false, "count", false);
2055 let first_struct = vc.first();
2056 let val_expr = first_struct.struct_().field_by_index(0);
2057 Self::from_expr(val_expr, Some("mode".to_string()))
2058 }
2059
2060 pub fn array_slice(&self, start: i64, length: Option<i64>) -> Column {
2062 use polars::prelude::*;
2063 let start_expr = lit((start - 1).max(0)); let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX));
2065 Self::from_expr(
2066 self.expr().clone().list().slice(start_expr, length_expr),
2067 None,
2068 )
2069 }
2070
2071 pub fn explode(&self) -> Column {
2073 Self::from_expr(self.expr().clone().explode(), None)
2074 }
2075
2076 pub fn explode_outer(&self) -> Column {
2078 Self::from_expr(self.expr().clone().explode(), None)
2079 }
2080
2081 pub fn posexplode_outer(&self) -> (Column, Column) {
2083 self.posexplode()
2084 }
2085
2086 pub fn arrays_zip(&self, other: &Column) -> Column {
2088 let args = [other.expr().clone()];
2089 let expr = self.expr().clone().map_many(
2090 crate::udfs::apply_arrays_zip,
2091 &args,
2092 GetOutput::same_type(),
2093 );
2094 Self::from_expr(expr, None)
2095 }
2096
2097 pub fn arrays_overlap(&self, other: &Column) -> Column {
2099 let args = [other.expr().clone()];
2100 let expr = self.expr().clone().map_many(
2101 crate::udfs::apply_arrays_overlap,
2102 &args,
2103 GetOutput::from_type(DataType::Boolean),
2104 );
2105 Self::from_expr(expr, None)
2106 }
2107
2108 pub fn array_agg(&self) -> Column {
2110 Self::from_expr(self.expr().clone().implode(), None)
2111 }
2112
2113 pub fn array_position(&self, value: Expr) -> Column {
2116 use polars::prelude::{DataType, NULL};
2117 let cond = Self::from_expr(col("").eq(value), None);
2119 let then_val = Self::from_expr(col("").cum_count(false), None);
2120 let else_val = Self::from_expr(lit(NULL), None);
2121 let idx_expr = crate::functions::when(&cond)
2122 .then(&then_val)
2123 .otherwise(&else_val)
2124 .into_expr();
2125 let list_expr = self
2126 .expr()
2127 .clone()
2128 .list()
2129 .eval(idx_expr, false)
2130 .list()
2131 .min()
2132 .fill_null(lit(0i64))
2133 .cast(DataType::Int64);
2134 Self::from_expr(list_expr, Some("array_position".to_string()))
2135 }
2136
2137 pub fn array_compact(&self) -> Column {
2139 let list_expr = self.expr().clone().list().drop_nulls();
2140 Self::from_expr(list_expr, None)
2141 }
2142
2143 pub fn array_remove(&self, value: Expr) -> Column {
2146 use polars::prelude::NULL;
2147 let cond = Self::from_expr(col("").neq(value), None);
2149 let then_val = Self::from_expr(col(""), None);
2150 let else_val = Self::from_expr(lit(NULL), None);
2151 let elem_neq = crate::functions::when(&cond)
2152 .then(&then_val)
2153 .otherwise(&else_val)
2154 .into_expr();
2155 let list_expr = self
2156 .expr()
2157 .clone()
2158 .list()
2159 .eval(elem_neq, false)
2160 .list()
2161 .drop_nulls();
2162 Self::from_expr(list_expr, None)
2163 }
2164
2165 pub fn array_repeat(&self, n: i64) -> Column {
2167 let expr = self.expr().clone().map(
2168 move |c| crate::udfs::apply_array_repeat(c, n),
2169 GetOutput::same_type(),
2170 );
2171 Self::from_expr(expr, None)
2172 }
2173
2174 pub fn array_flatten(&self) -> Column {
2176 let expr = self
2177 .expr()
2178 .clone()
2179 .map(crate::udfs::apply_array_flatten, GetOutput::same_type());
2180 Self::from_expr(expr, None)
2181 }
2182
2183 pub fn array_append(&self, elem: &Column) -> Column {
2185 let args = [elem.expr().clone()];
2186 let expr = self.expr().clone().map_many(
2187 crate::udfs::apply_array_append,
2188 &args,
2189 GetOutput::same_type(),
2190 );
2191 Self::from_expr(expr, None)
2192 }
2193
2194 pub fn array_prepend(&self, elem: &Column) -> Column {
2196 let args = [elem.expr().clone()];
2197 let expr = self.expr().clone().map_many(
2198 crate::udfs::apply_array_prepend,
2199 &args,
2200 GetOutput::same_type(),
2201 );
2202 Self::from_expr(expr, None)
2203 }
2204
2205 pub fn array_insert(&self, pos: &Column, elem: &Column) -> Column {
2207 let args = [pos.expr().clone(), elem.expr().clone()];
2208 let expr = self.expr().clone().map_many(
2209 crate::udfs::apply_array_insert,
2210 &args,
2211 GetOutput::same_type(),
2212 );
2213 Self::from_expr(expr, None)
2214 }
2215
2216 pub fn array_except(&self, other: &Column) -> Column {
2218 let args = [other.expr().clone()];
2219 let expr = self.expr().clone().map_many(
2220 crate::udfs::apply_array_except,
2221 &args,
2222 GetOutput::same_type(),
2223 );
2224 Self::from_expr(expr, None)
2225 }
2226
2227 pub fn array_intersect(&self, other: &Column) -> Column {
2229 let args = [other.expr().clone()];
2230 let expr = self.expr().clone().map_many(
2231 crate::udfs::apply_array_intersect,
2232 &args,
2233 GetOutput::same_type(),
2234 );
2235 Self::from_expr(expr, None)
2236 }
2237
2238 pub fn array_union(&self, other: &Column) -> Column {
2240 let args = [other.expr().clone()];
2241 let expr = self.expr().clone().map_many(
2242 crate::udfs::apply_array_union,
2243 &args,
2244 GetOutput::same_type(),
2245 );
2246 Self::from_expr(expr, None)
2247 }
2248
2249 pub fn zip_with(&self, other: &Column, merge: Expr) -> Column {
2252 let args = [other.expr().clone()];
2253 let zip_expr = self.expr().clone().map_many(
2254 crate::udfs::apply_zip_arrays_to_struct,
2255 &args,
2256 GetOutput::same_type(),
2257 );
2258 let list_expr = zip_expr.list().eval(merge, false);
2259 Self::from_expr(list_expr, None)
2260 }
2261
2262 pub fn array_exists(&self, predicate: Expr) -> Column {
2264 let pred_expr = self
2265 .expr()
2266 .clone()
2267 .list()
2268 .eval(predicate, false)
2269 .list()
2270 .any();
2271 Self::from_expr(pred_expr, Some("exists".to_string()))
2272 }
2273
2274 pub fn array_forall(&self, predicate: Expr) -> Column {
2276 let pred_expr = self
2277 .expr()
2278 .clone()
2279 .list()
2280 .eval(predicate, false)
2281 .list()
2282 .all();
2283 Self::from_expr(pred_expr, Some("forall".to_string()))
2284 }
2285
2286 pub fn array_filter(&self, predicate: Expr) -> Column {
2288 use polars::prelude::NULL;
2289 let then_val = Self::from_expr(col(""), None);
2290 let else_val = Self::from_expr(lit(NULL), None);
2291 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2292 .then(&then_val)
2293 .otherwise(&else_val)
2294 .into_expr();
2295 let list_expr = self
2296 .expr()
2297 .clone()
2298 .list()
2299 .eval(elem_expr, false)
2300 .list()
2301 .drop_nulls();
2302 Self::from_expr(list_expr, None)
2303 }
2304
2305 pub fn array_transform(&self, f: Expr) -> Column {
2307 let list_expr = self.expr().clone().list().eval(f, false);
2308 Self::from_expr(list_expr, None)
2309 }
2310
2311 pub fn array_sum(&self) -> Column {
2313 Self::from_expr(self.expr().clone().list().sum(), None)
2314 }
2315
2316 pub fn array_aggregate(&self, zero: &Column) -> Column {
2318 let sum_expr = self.expr().clone().list().sum();
2319 Self::from_expr(sum_expr + zero.expr().clone(), None)
2320 }
2321
2322 pub fn array_mean(&self) -> Column {
2324 Self::from_expr(self.expr().clone().list().mean(), None)
2325 }
2326
2327 pub fn posexplode(&self) -> (Column, Column) {
2330 let pos_expr = self
2331 .expr()
2332 .clone()
2333 .list()
2334 .eval(col("").cum_count(false), false)
2335 .explode();
2336 let val_expr = self.expr().clone().explode();
2337 (
2338 Self::from_expr(pos_expr, Some("pos".to_string())),
2339 Self::from_expr(val_expr, Some("col".to_string())),
2340 )
2341 }
2342
2343 pub fn map_keys(&self) -> Column {
2345 let elem_key = col("").struct_().field_by_name("key");
2346 let list_expr = self.expr().clone().list().eval(elem_key, false);
2347 Self::from_expr(list_expr, None)
2348 }
2349
2350 pub fn map_values(&self) -> Column {
2352 let elem_val = col("").struct_().field_by_name("value");
2353 let list_expr = self.expr().clone().list().eval(elem_val, false);
2354 Self::from_expr(list_expr, None)
2355 }
2356
2357 pub fn map_entries(&self) -> Column {
2359 Self::from_expr(self.expr().clone(), None)
2360 }
2361
2362 pub fn map_from_arrays(&self, values: &Column) -> Column {
2364 let args = [values.expr().clone()];
2365 let expr = self.expr().clone().map_many(
2366 crate::udfs::apply_map_from_arrays,
2367 &args,
2368 GetOutput::same_type(),
2369 );
2370 Self::from_expr(expr, None)
2371 }
2372
2373 pub fn map_concat(&self, other: &Column) -> Column {
2375 let args = [other.expr().clone()];
2376 let expr = self.expr().clone().map_many(
2377 crate::udfs::apply_map_concat,
2378 &args,
2379 GetOutput::same_type(),
2380 );
2381 Self::from_expr(expr, None)
2382 }
2383
2384 pub fn transform_keys(&self, key_expr: Expr) -> Column {
2386 use polars::prelude::as_struct;
2387 let value = col("").struct_().field_by_name("value");
2388 let new_struct = as_struct(vec![key_expr.alias("key"), value.alias("value")]);
2389 let list_expr = self.expr().clone().list().eval(new_struct, false);
2390 Self::from_expr(list_expr, None)
2391 }
2392
2393 pub fn transform_values(&self, value_expr: Expr) -> Column {
2395 use polars::prelude::as_struct;
2396 let key = col("").struct_().field_by_name("key");
2397 let new_struct = as_struct(vec![key.alias("key"), value_expr.alias("value")]);
2398 let list_expr = self.expr().clone().list().eval(new_struct, false);
2399 Self::from_expr(list_expr, None)
2400 }
2401
2402 pub fn map_zip_with(&self, other: &Column, merge: Expr) -> Column {
2405 use polars::prelude::as_struct;
2406 let args = [other.expr().clone()];
2407 let zip_expr = self.expr().clone().map_many(
2408 crate::udfs::apply_map_zip_to_struct,
2409 &args,
2410 GetOutput::same_type(),
2411 );
2412 let key_field = col("").struct_().field_by_name("key").alias("key");
2413 let value_field = merge.alias("value");
2414 let merge_expr = as_struct(vec![key_field, value_field]);
2415 let list_expr = zip_expr.list().eval(merge_expr, false);
2416 Self::from_expr(list_expr, None)
2417 }
2418
2419 pub fn map_filter(&self, predicate: Expr) -> Column {
2422 use polars::prelude::NULL;
2423 let then_val = Self::from_expr(col(""), None);
2424 let else_val = Self::from_expr(lit(NULL), None);
2425 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2426 .then(&then_val)
2427 .otherwise(&else_val)
2428 .into_expr();
2429 let list_expr = self
2430 .expr()
2431 .clone()
2432 .list()
2433 .eval(elem_expr, false)
2434 .list()
2435 .drop_nulls();
2436 Self::from_expr(list_expr, None)
2437 }
2438
2439 pub fn map_from_entries(&self) -> Column {
2441 Self::from_expr(self.expr().clone(), None)
2442 }
2443
2444 pub fn map_contains_key(&self, key: &Column) -> Column {
2446 let args = [key.expr().clone()];
2447 let expr = self.expr().clone().map_many(
2448 crate::udfs::apply_map_contains_key,
2449 &args,
2450 GetOutput::from_type(DataType::Boolean),
2451 );
2452 Self::from_expr(expr, None)
2453 }
2454
2455 pub fn get(&self, key: &Column) -> Column {
2457 let args = [key.expr().clone()];
2458 let expr =
2459 self.expr()
2460 .clone()
2461 .map_many(crate::udfs::apply_get, &args, GetOutput::same_type());
2462 Self::from_expr(expr, None)
2463 }
2464
2465 pub fn get_json_object(&self, path: &str) -> Column {
2467 let path_expr = polars::prelude::lit(path.to_string());
2468 let out = self.expr().clone().str().json_path_match(path_expr);
2469 Self::from_expr(out, None)
2470 }
2471
2472 pub fn from_json(&self, schema: Option<polars::datatypes::DataType>) -> Column {
2474 let out = self.expr().clone().str().json_decode(schema, None);
2475 Self::from_expr(out, None)
2476 }
2477
2478 pub fn to_json(&self) -> Column {
2480 let out = self.expr().clone().struct_().json_encode();
2481 Self::from_expr(out, None)
2482 }
2483
2484 pub fn json_array_length(&self, path: &str) -> Column {
2486 let path = path.to_string();
2487 let expr = self.expr().clone().map(
2488 move |s| crate::udfs::apply_json_array_length(s, &path),
2489 GetOutput::from_type(DataType::Int64),
2490 );
2491 Self::from_expr(expr, None)
2492 }
2493
2494 pub fn json_object_keys(&self) -> Column {
2496 let expr = self.expr().clone().map(
2497 crate::udfs::apply_json_object_keys,
2498 GetOutput::from_type(DataType::List(Box::new(DataType::String))),
2499 );
2500 Self::from_expr(expr, None)
2501 }
2502
2503 pub fn json_tuple(&self, keys: &[&str]) -> Column {
2505 let keys_vec: Vec<String> = keys.iter().map(|s| (*s).to_string()).collect();
2506 let struct_fields: Vec<polars::datatypes::Field> = keys_vec
2507 .iter()
2508 .map(|k| polars::datatypes::Field::new(k.as_str().into(), DataType::String))
2509 .collect();
2510 let expr = self.expr().clone().map(
2511 move |s| crate::udfs::apply_json_tuple(s, &keys_vec),
2512 GetOutput::from_type(DataType::Struct(struct_fields)),
2513 );
2514 Self::from_expr(expr, None)
2515 }
2516
2517 pub fn from_csv(&self) -> Column {
2519 let expr = self.expr().clone().map(
2520 crate::udfs::apply_from_csv,
2521 GetOutput::from_type(DataType::Struct(vec![])),
2522 );
2523 Self::from_expr(expr, None)
2524 }
2525
2526 pub fn to_csv(&self) -> Column {
2528 let expr = self.expr().clone().map(
2529 crate::udfs::apply_to_csv,
2530 GetOutput::from_type(DataType::String),
2531 );
2532 Self::from_expr(expr, None)
2533 }
2534
2535 pub fn parse_url(&self, part: &str, key: Option<&str>) -> Column {
2538 let part = part.to_string();
2539 let key_owned = key.map(String::from);
2540 let expr = self.expr().clone().map(
2541 move |s| crate::udfs::apply_parse_url(s, &part, key_owned.as_deref()),
2542 GetOutput::from_type(DataType::String),
2543 );
2544 Self::from_expr(expr, None)
2545 }
2546
2547 pub fn hash(&self) -> Column {
2549 let expr = self.expr().clone().map(
2550 crate::udfs::apply_hash_one,
2551 GetOutput::from_type(DataType::Int64),
2552 );
2553 Self::from_expr(expr, None)
2554 }
2555
2556 pub fn isin(&self, other: &Column) -> Column {
2558 let out = self.expr().clone().is_in(other.expr().clone());
2559 Self::from_expr(out, None)
2560 }
2561
2562 pub fn url_decode(&self) -> Column {
2564 let expr = self.expr().clone().map(
2565 crate::udfs::apply_url_decode,
2566 GetOutput::from_type(DataType::String),
2567 );
2568 Self::from_expr(expr, None)
2569 }
2570
2571 pub fn url_encode(&self) -> Column {
2573 let expr = self.expr().clone().map(
2574 crate::udfs::apply_url_encode,
2575 GetOutput::from_type(DataType::String),
2576 );
2577 Self::from_expr(expr, None)
2578 }
2579
2580 pub fn shift_left(&self, n: i32) -> Column {
2582 use polars::prelude::*;
2583 let pow = lit(2i64).pow(lit(n as i64));
2584 Self::from_expr(
2585 (self.expr().clone().cast(DataType::Int64) * pow).cast(DataType::Int64),
2586 None,
2587 )
2588 }
2589
2590 pub fn shift_right(&self, n: i32) -> Column {
2592 use polars::prelude::*;
2593 let pow = lit(2i64).pow(lit(n as i64));
2594 Self::from_expr(
2595 (self.expr().clone().cast(DataType::Int64) / pow).cast(DataType::Int64),
2596 None,
2597 )
2598 }
2599
2600 pub fn shift_right_unsigned(&self, n: i32) -> Column {
2602 let expr = self.expr().clone().map(
2603 move |s| crate::udfs::apply_shift_right_unsigned(s, n),
2604 GetOutput::from_type(DataType::Int64),
2605 );
2606 Self::from_expr(expr, None)
2607 }
2608}
2609
2610#[cfg(test)]
2611mod tests {
2612 use super::Column;
2613 use polars::prelude::{col, df, lit, IntoLazy};
2614
2615 fn test_df() -> polars::prelude::DataFrame {
2617 df!(
2618 "a" => &[1, 2, 3, 4, 5],
2619 "b" => &[10, 20, 30, 40, 50]
2620 )
2621 .unwrap()
2622 }
2623
2624 fn test_df_with_nulls() -> polars::prelude::DataFrame {
2626 df!(
2627 "a" => &[Some(1), Some(2), None, Some(4), None],
2628 "b" => &[Some(10), None, Some(30), None, None]
2629 )
2630 .unwrap()
2631 }
2632
2633 #[test]
2634 fn test_column_new() {
2635 let column = Column::new("age".to_string());
2636 assert_eq!(column.name(), "age");
2637 }
2638
2639 #[test]
2640 fn test_column_from_expr() {
2641 let expr = col("test");
2642 let column = Column::from_expr(expr, Some("test".to_string()));
2643 assert_eq!(column.name(), "test");
2644 }
2645
2646 #[test]
2647 fn test_column_from_expr_default_name() {
2648 let expr = col("test").gt(lit(5));
2649 let column = Column::from_expr(expr, None);
2650 assert_eq!(column.name(), "<expr>");
2651 }
2652
2653 #[test]
2654 fn test_column_alias() {
2655 let column = Column::new("original".to_string());
2656 let aliased = column.alias("new_name");
2657 assert_eq!(aliased.name(), "new_name");
2658 }
2659
2660 #[test]
2661 fn test_column_gt() {
2662 let df = test_df();
2663 let column = Column::new("a".to_string());
2664 let result = column.gt(lit(3));
2665
2666 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2668 assert_eq!(filtered.height(), 2); }
2670
2671 #[test]
2672 fn test_column_lt() {
2673 let df = test_df();
2674 let column = Column::new("a".to_string());
2675 let result = column.lt(lit(3));
2676
2677 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2678 assert_eq!(filtered.height(), 2); }
2680
2681 #[test]
2682 fn test_column_eq() {
2683 let df = test_df();
2684 let column = Column::new("a".to_string());
2685 let result = column.eq(lit(3));
2686
2687 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2688 assert_eq!(filtered.height(), 1); }
2690
2691 #[test]
2692 fn test_column_neq() {
2693 let df = test_df();
2694 let column = Column::new("a".to_string());
2695 let result = column.neq(lit(3));
2696
2697 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2698 assert_eq!(filtered.height(), 4); }
2700
2701 #[test]
2702 fn test_column_gt_eq() {
2703 let df = test_df();
2704 let column = Column::new("a".to_string());
2705 let result = column.gt_eq(lit(3));
2706
2707 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2708 assert_eq!(filtered.height(), 3); }
2710
2711 #[test]
2712 fn test_column_lt_eq() {
2713 let df = test_df();
2714 let column = Column::new("a".to_string());
2715 let result = column.lt_eq(lit(3));
2716
2717 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2718 assert_eq!(filtered.height(), 3); }
2720
2721 #[test]
2722 fn test_column_is_null() {
2723 let df = test_df_with_nulls();
2724 let column = Column::new("a".to_string());
2725 let result = column.is_null();
2726
2727 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2728 assert_eq!(filtered.height(), 2); }
2730
2731 #[test]
2732 fn test_column_is_not_null() {
2733 let df = test_df_with_nulls();
2734 let column = Column::new("a".to_string());
2735 let result = column.is_not_null();
2736
2737 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
2738 assert_eq!(filtered.height(), 3); }
2740
2741 #[test]
2742 fn test_eq_null_safe_both_null() {
2743 let df = df!(
2745 "a" => &[Some(1), None, Some(3)],
2746 "b" => &[Some(1), None, Some(4)]
2747 )
2748 .unwrap();
2749
2750 let col_a = Column::new("a".to_string());
2751 let col_b = Column::new("b".to_string());
2752 let result = col_a.eq_null_safe(&col_b);
2753
2754 let result_df = df
2756 .lazy()
2757 .with_column(result.into_expr().alias("eq_null_safe"))
2758 .collect()
2759 .unwrap();
2760
2761 let eq_col = result_df.column("eq_null_safe").unwrap();
2763 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
2764
2765 assert_eq!(values[0], Some(true));
2769 assert_eq!(values[1], Some(true)); assert_eq!(values[2], Some(false));
2771 }
2772
2773 #[test]
2774 fn test_eq_null_safe_one_null() {
2775 let df = df!(
2777 "a" => &[Some(1), None, Some(3)],
2778 "b" => &[Some(1), Some(2), None]
2779 )
2780 .unwrap();
2781
2782 let col_a = Column::new("a".to_string());
2783 let col_b = Column::new("b".to_string());
2784 let result = col_a.eq_null_safe(&col_b);
2785
2786 let result_df = df
2787 .lazy()
2788 .with_column(result.into_expr().alias("eq_null_safe"))
2789 .collect()
2790 .unwrap();
2791
2792 let eq_col = result_df.column("eq_null_safe").unwrap();
2793 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
2794
2795 assert_eq!(values[0], Some(true));
2799 assert_eq!(values[1], Some(false));
2800 assert_eq!(values[2], Some(false));
2801 }
2802}