1use polars::prelude::{
2 DataType, Expr, Field, PolarsError, PolarsResult, RankMethod, RankOptions, TimeUnit, col, lit,
3};
4
5#[inline]
7pub(crate) fn expect_col(
8 r: PolarsResult<Option<polars::prelude::Column>>,
9) -> PolarsResult<polars::prelude::Column> {
10 r.and_then(|o| o.ok_or_else(|| PolarsError::ComputeError("expected column".into())))
11}
12
13fn like_pattern_to_regex(pattern: &str, escape_char: Option<char>) -> String {
16 let mut out = String::with_capacity(pattern.len() * 2);
17 let mut it = pattern.chars();
18 while let Some(c) = it.next() {
19 if escape_char == Some(c) {
20 if let Some(next) = it.next() {
21 if "\\.*+?[](){}^$|".contains(next) {
23 out.push('\\');
24 }
25 out.push(next);
26 } else {
27 out.push('\\');
28 out.push(c);
29 }
30 } else {
31 match c {
32 '%' => out.push_str(".*"),
33 '_' => out.push('.'),
34 '\\' | '.' | '+' | '*' | '?' | '[' | ']' | '(' | ')' | '{' | '}' | '^' | '$'
35 | '|' => {
36 out.push('\\');
37 out.push(c);
38 }
39 _ => out.push(c),
40 }
41 }
42 }
43 format!("^{out}$")
44}
45
46#[derive(Debug, Clone, Copy)]
48pub enum DeferredRandom {
49 Rand(Option<u64>),
50 Randn(Option<u64>),
51}
52
53#[derive(Debug, Clone)]
57pub struct Column {
58 name: String,
59 expr: Expr, pub(crate) deferred: Option<DeferredRandom>,
62 pub(crate) udf_call: Option<(String, Vec<Column>)>,
64}
65
66impl Column {
67 pub fn new(name: String) -> Self {
69 Column {
70 name: name.clone(),
71 expr: col(&name),
72 deferred: None,
73 udf_call: None,
74 }
75 }
76
77 pub fn from_expr(expr: Expr, name: Option<String>) -> Self {
79 let display_name = name.unwrap_or_else(|| "<expr>".to_string());
80 Column {
81 name: display_name,
82 expr,
83 deferred: None,
84 udf_call: None,
85 }
86 }
87
88 pub fn from_udf_call(name: String, args: Vec<Column>) -> Self {
90 Column {
91 name: format!("{name}()"),
92 expr: lit(0i32), deferred: None,
94 udf_call: Some((name, args)),
95 }
96 }
97
98 pub fn from_rand(seed: Option<u64>) -> Self {
100 let expr = lit(1i64).cum_sum(false).map(
101 move |c| expect_col(crate::udfs::apply_rand_with_seed(c, seed)),
102 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
103 );
104 Column {
105 name: "rand".to_string(),
106 expr,
107 deferred: Some(DeferredRandom::Rand(seed)),
108 udf_call: None,
109 }
110 }
111
112 pub fn from_randn(seed: Option<u64>) -> Self {
114 let expr = lit(1i64).cum_sum(false).map(
115 move |c| expect_col(crate::udfs::apply_randn_with_seed(c, seed)),
116 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
117 );
118 Column {
119 name: "randn".to_string(),
120 expr,
121 deferred: Some(DeferredRandom::Randn(seed)),
122 udf_call: None,
123 }
124 }
125
126 pub fn expr(&self) -> &Expr {
128 &self.expr
129 }
130
131 pub fn into_expr(self) -> Expr {
133 self.expr
134 }
135
136 pub fn name(&self) -> &str {
138 &self.name
139 }
140
141 pub fn alias(&self, name: &str) -> Column {
143 Column {
144 name: name.to_string(),
145 expr: self.expr.clone().alias(name),
146 deferred: self.deferred,
147 udf_call: self.udf_call.clone(),
148 }
149 }
150
151 pub fn asc(&self) -> crate::functions::SortOrder {
153 crate::functions::asc(self)
154 }
155
156 pub fn asc_nulls_first(&self) -> crate::functions::SortOrder {
158 crate::functions::asc_nulls_first(self)
159 }
160
161 pub fn asc_nulls_last(&self) -> crate::functions::SortOrder {
163 crate::functions::asc_nulls_last(self)
164 }
165
166 pub fn desc(&self) -> crate::functions::SortOrder {
168 crate::functions::desc(self)
169 }
170
171 pub fn desc_nulls_first(&self) -> crate::functions::SortOrder {
173 crate::functions::desc_nulls_first(self)
174 }
175
176 pub fn desc_nulls_last(&self) -> crate::functions::SortOrder {
178 crate::functions::desc_nulls_last(self)
179 }
180
181 pub fn is_null(&self) -> Column {
183 Column {
184 name: format!("({} IS NULL)", self.name),
185 expr: self.expr.clone().is_null(),
186 deferred: None,
187 udf_call: None,
188 }
189 }
190
191 pub fn is_not_null(&self) -> Column {
193 Column {
194 name: format!("({} IS NOT NULL)", self.name),
195 expr: self.expr.clone().is_not_null(),
196 deferred: None,
197 udf_call: None,
198 }
199 }
200
201 pub fn isnull(&self) -> Column {
203 self.is_null()
204 }
205
206 pub fn isnotnull(&self) -> Column {
208 self.is_not_null()
209 }
210
211 fn null_boolean_expr() -> Expr {
213 use polars::prelude::*;
214 lit(NULL).cast(DataType::Boolean)
216 }
217
218 pub fn like(&self, pattern: &str, escape_char: Option<char>) -> Column {
221 let regex = like_pattern_to_regex(pattern, escape_char);
222 self.regexp_like(®ex)
223 }
224
225 pub fn ilike(&self, pattern: &str, escape_char: Option<char>) -> Column {
228 use polars::prelude::*;
229 let regex = format!("(?i){}", like_pattern_to_regex(pattern, escape_char));
230 Self::from_expr(self.expr().clone().str().contains(lit(regex), false), None)
231 }
232
233 pub fn eq_pyspark(&self, other: &Column) -> Column {
239 let left_null = self.expr().clone().is_null();
241 let right_null = other.expr().clone().is_null();
242 let either_null = left_null.clone().or(right_null.clone());
243
244 let eq_result = self.expr().clone().eq(other.expr().clone());
246
247 let null_boolean = Self::null_boolean_expr();
249 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
250 .then(&Self::from_expr(null_boolean, None))
251 .otherwise(&Self::from_expr(eq_result, None));
252
253 Self::from_expr(null_aware_expr.into_expr(), None)
254 }
255
256 pub fn ne_pyspark(&self, other: &Column) -> Column {
259 let left_null = self.expr().clone().is_null();
261 let right_null = other.expr().clone().is_null();
262 let either_null = left_null.clone().or(right_null.clone());
263
264 let ne_result = self.expr().clone().neq(other.expr().clone());
266
267 let null_boolean = Self::null_boolean_expr();
269 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
270 .then(&Self::from_expr(null_boolean, None))
271 .otherwise(&Self::from_expr(ne_result, None));
272
273 Self::from_expr(null_aware_expr.into_expr(), None)
274 }
275
276 pub fn eq_null_safe(&self, other: &Column) -> Column {
279 use crate::functions::{lit_bool, when};
280
281 let (left_c, right_c) = crate::type_coercion::coerce_for_pyspark_eq_null_safe(
282 self.expr().clone(),
283 other.expr().clone(),
284 )
285 .unwrap_or_else(|_| (self.expr().clone(), other.expr().clone()));
286
287 let left_null = left_c.clone().is_null();
288 let right_null = right_c.clone().is_null();
289 let both_null = left_null.clone().and(right_null.clone());
290 let either_null = left_null.clone().or(right_null.clone());
291
292 let eq_result = left_c.eq(right_c);
294
295 when(&Self::from_expr(both_null, None))
299 .then(&lit_bool(true))
300 .otherwise(
301 &when(&Self::from_expr(either_null, None))
302 .then(&lit_bool(false))
303 .otherwise(&Self::from_expr(eq_result, None)),
304 )
305 }
306
307 pub fn null_boolean() -> Column {
311 Column::from_expr(Self::null_boolean_expr(), None)
312 }
313
314 pub fn lit_null(dtype: &str) -> Result<Column, String> {
319 use polars::prelude::{NULL, lit};
320 let dt = crate::functions::parse_type_name(dtype)?;
321 Ok(Column::from_expr(lit(NULL).cast(dt), None))
322 }
323
324 pub fn from_bool(b: bool) -> Column {
326 crate::functions::lit_bool(b)
327 }
328
329 pub fn from_i64(n: i64) -> Column {
331 crate::functions::lit_i64(n)
332 }
333
334 pub fn from_string(s: &str) -> Column {
336 crate::functions::lit_str(s)
337 }
338
339 pub fn gt_pyspark(&self, other: &Column) -> Column {
342 let left_null = self.expr().clone().is_null();
344 let right_null = other.expr().clone().is_null();
345 let either_null = left_null.clone().or(right_null.clone());
346
347 let gt_result = self.expr().clone().gt(other.expr().clone());
349
350 let null_boolean = Self::null_boolean_expr();
352 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
353 .then(&Self::from_expr(null_boolean, None))
354 .otherwise(&Self::from_expr(gt_result, None));
355
356 Self::from_expr(null_aware_expr.into_expr(), None)
357 }
358
359 pub fn ge_pyspark(&self, other: &Column) -> Column {
362 let left_null = self.expr().clone().is_null();
364 let right_null = other.expr().clone().is_null();
365 let either_null = left_null.clone().or(right_null.clone());
366
367 let ge_result = self.expr().clone().gt_eq(other.expr().clone());
369
370 let null_boolean = Self::null_boolean_expr();
372 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
373 .then(&Self::from_expr(null_boolean, None))
374 .otherwise(&Self::from_expr(ge_result, None));
375
376 Self::from_expr(null_aware_expr.into_expr(), None)
377 }
378
379 pub fn lt_pyspark(&self, other: &Column) -> Column {
382 let left_null = self.expr().clone().is_null();
384 let right_null = other.expr().clone().is_null();
385 let either_null = left_null.clone().or(right_null.clone());
386
387 let lt_result = self.expr().clone().lt(other.expr().clone());
389
390 let null_boolean = Self::null_boolean_expr();
392 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
393 .then(&Self::from_expr(null_boolean, None))
394 .otherwise(&Self::from_expr(lt_result, None));
395
396 Self::from_expr(null_aware_expr.into_expr(), None)
397 }
398
399 pub fn le_pyspark(&self, other: &Column) -> Column {
402 let left_null = self.expr().clone().is_null();
404 let right_null = other.expr().clone().is_null();
405 let either_null = left_null.clone().or(right_null.clone());
406
407 let le_result = self.expr().clone().lt_eq(other.expr().clone());
409
410 let null_boolean = Self::null_boolean_expr();
412 let null_aware_expr = crate::functions::when(&Self::from_expr(either_null, None))
413 .then(&Self::from_expr(null_boolean, None))
414 .otherwise(&Self::from_expr(le_result, None));
415
416 Self::from_expr(null_aware_expr.into_expr(), None)
417 }
418
419 pub fn gt(&self, other: Expr) -> Column {
425 Self::from_expr(self.expr().clone().gt(other), None)
426 }
427
428 pub fn gt_eq(&self, other: Expr) -> Column {
430 Self::from_expr(self.expr().clone().gt_eq(other), None)
431 }
432
433 pub fn lt(&self, other: Expr) -> Column {
435 Self::from_expr(self.expr().clone().lt(other), None)
436 }
437
438 pub fn lt_eq(&self, other: Expr) -> Column {
440 Self::from_expr(self.expr().clone().lt_eq(other), None)
441 }
442
443 pub fn eq(&self, other: Expr) -> Column {
445 Self::from_expr(self.expr().clone().eq(other), None)
446 }
447
448 pub fn neq(&self, other: Expr) -> Column {
450 Self::from_expr(self.expr().clone().neq(other), None)
451 }
452
453 pub fn upper(&self) -> Column {
464 Self::from_expr(self.expr().clone().str().to_uppercase(), None)
465 }
466
467 pub fn lower(&self) -> Column {
469 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
470 }
471
472 pub fn lcase(&self) -> Column {
474 self.lower()
475 }
476
477 pub fn ucase(&self) -> Column {
479 self.upper()
480 }
481
482 pub fn substr(&self, start: i64, length: Option<i64>) -> Column {
487 use polars::prelude::*;
488 if length.map(|l| l < 1).unwrap_or(false) {
490 return Self::from_expr(lit(""), None);
491 }
492 let len_chars = self.expr().clone().str().len_chars();
493 let offset_expr = if start >= 1 {
495 lit((start - 1).max(0))
496 } else {
497 let from_end = len_chars + lit(start);
498 when(from_end.clone().lt(lit(0i64)))
499 .then(lit(0i64))
500 .otherwise(from_end)
501 };
502 let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX));
503 Self::from_expr(
504 self.expr().clone().str().slice(offset_expr, length_expr),
505 None,
506 )
507 }
508
509 pub fn length(&self) -> Column {
511 Self::from_expr(self.expr().clone().str().len_chars(), None)
512 }
513
514 pub fn bit_length(&self) -> Column {
516 use polars::prelude::*;
517 let len_bytes = self.expr().clone().str().len_bytes().cast(DataType::Int32);
518 Self::from_expr(len_bytes * lit(8i32), None)
519 }
520
521 pub fn octet_length(&self) -> Column {
523 use polars::prelude::*;
524 Self::from_expr(
525 self.expr().clone().str().len_bytes().cast(DataType::Int32),
526 None,
527 )
528 }
529
530 pub fn char_length(&self) -> Column {
532 self.length()
533 }
534
535 pub fn character_length(&self) -> Column {
537 self.length()
538 }
539
540 pub fn encode(&self, charset: &str) -> Column {
542 let charset = charset.to_string();
543 let expr = self.expr().clone().map(
544 move |s| expect_col(crate::udfs::apply_encode(s, &charset)),
545 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
546 );
547 Self::from_expr(expr, None)
548 }
549
550 pub fn decode(&self, charset: &str) -> Column {
552 let charset = charset.to_string();
553 let expr = self.expr().clone().map(
554 move |s| expect_col(crate::udfs::apply_decode(s, &charset)),
555 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
556 );
557 Self::from_expr(expr, None)
558 }
559
560 pub fn to_binary(&self, fmt: &str) -> Column {
562 let fmt = fmt.to_string();
563 let expr = self.expr().clone().map(
564 move |s| expect_col(crate::udfs::apply_to_binary(s, &fmt)),
565 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
566 );
567 Self::from_expr(expr, None)
568 }
569
570 pub fn try_to_binary(&self, fmt: &str) -> Column {
572 let fmt = fmt.to_string();
573 let expr = self.expr().clone().map(
574 move |s| expect_col(crate::udfs::apply_try_to_binary(s, &fmt)),
575 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
576 );
577 Self::from_expr(expr, None)
578 }
579
580 pub fn aes_encrypt(&self, key: &str) -> Column {
582 let key = key.to_string();
583 let expr = self.expr().clone().map(
584 move |s| expect_col(crate::udfs::apply_aes_encrypt(s, &key)),
585 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
586 );
587 Self::from_expr(expr, None)
588 }
589
590 pub fn aes_decrypt(&self, key: &str) -> Column {
592 let key = key.to_string();
593 let expr = self.expr().clone().map(
594 move |s| expect_col(crate::udfs::apply_aes_decrypt(s, &key)),
595 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
596 );
597 Self::from_expr(expr, None)
598 }
599
600 pub fn try_aes_decrypt(&self, key: &str) -> Column {
602 let key = key.to_string();
603 let expr = self.expr().clone().map(
604 move |s| expect_col(crate::udfs::apply_try_aes_decrypt(s, &key)),
605 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
606 );
607 Self::from_expr(expr, None)
608 }
609
610 pub fn typeof_(&self) -> Column {
612 Self::from_expr(
613 self.expr().clone().map(
614 |s| expect_col(crate::udfs::apply_typeof(s)),
615 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
616 ),
617 None,
618 )
619 }
620
621 pub fn trim(&self) -> Column {
623 use polars::prelude::*;
624 Self::from_expr(self.expr().clone().str().strip_chars(lit(" \t\n\r")), None)
625 }
626
627 pub fn ltrim(&self) -> Column {
629 use polars::prelude::*;
630 Self::from_expr(
631 self.expr().clone().str().strip_chars_start(lit(" \t\n\r")),
632 None,
633 )
634 }
635
636 pub fn rtrim(&self) -> Column {
638 use polars::prelude::*;
639 Self::from_expr(
640 self.expr().clone().str().strip_chars_end(lit(" \t\n\r")),
641 None,
642 )
643 }
644
645 pub fn btrim(&self, trim_str: Option<&str>) -> Column {
647 use polars::prelude::*;
648 let chars = trim_str.unwrap_or(" \t\n\r");
649 Self::from_expr(self.expr().clone().str().strip_chars(lit(chars)), None)
650 }
651
652 pub fn locate(&self, substr: &str, pos: i64) -> Column {
654 use polars::prelude::*;
655 if substr.is_empty() {
656 return Self::from_expr(lit(1i64), None);
657 }
658 let start = (pos - 1).max(0);
659 let slice_expr = self.expr().clone().str().slice(lit(start), lit(i64::MAX));
660 let found = slice_expr.str().find_literal(lit(substr.to_string()));
661 Self::from_expr(
662 (found.cast(DataType::Int64) + lit(start + 1)).fill_null(lit(0i64)),
663 None,
664 )
665 }
666
667 pub fn conv(&self, from_base: i32, to_base: i32) -> Column {
669 let expr = self.expr().clone().map(
670 move |s| expect_col(crate::udfs::apply_conv(s, from_base, to_base)),
671 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
672 );
673 Self::from_expr(expr, None)
674 }
675
676 pub fn hex(&self) -> Column {
678 let expr = self.expr().clone().map(
679 |s| expect_col(crate::udfs::apply_hex(s)),
680 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
681 );
682 Self::from_expr(expr, None)
683 }
684
685 pub fn unhex(&self) -> Column {
687 let expr = self.expr().clone().map(
688 |s| expect_col(crate::udfs::apply_unhex(s)),
689 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
690 );
691 Self::from_expr(expr, None)
692 }
693
694 pub fn bin(&self) -> Column {
696 let expr = self.expr().clone().map(
697 |s| expect_col(crate::udfs::apply_bin(s)),
698 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
699 );
700 Self::from_expr(expr, None)
701 }
702
703 pub fn getbit(&self, pos: i64) -> Column {
705 let expr = self.expr().clone().map(
706 move |s| expect_col(crate::udfs::apply_getbit(s, pos)),
707 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
708 );
709 Self::from_expr(expr, None)
710 }
711
712 pub fn bit_and(&self, other: &Column) -> Column {
714 let args = [other.expr().clone()];
715 let expr = self.expr().clone().cast(DataType::Int64).map_many(
716 |cols| expect_col(crate::udfs::apply_bit_and(cols)),
717 &args,
718 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Int64)),
719 );
720 Self::from_expr(expr, None)
721 }
722
723 pub fn bit_or(&self, other: &Column) -> Column {
725 let args = [other.expr().clone()];
726 let expr = self.expr().clone().cast(DataType::Int64).map_many(
727 |cols| expect_col(crate::udfs::apply_bit_or(cols)),
728 &args,
729 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Int64)),
730 );
731 Self::from_expr(expr, None)
732 }
733
734 pub fn bit_xor(&self, other: &Column) -> Column {
736 let args = [other.expr().clone()];
737 let expr = self.expr().clone().cast(DataType::Int64).map_many(
738 |cols| expect_col(crate::udfs::apply_bit_xor(cols)),
739 &args,
740 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Int64)),
741 );
742 Self::from_expr(expr, None)
743 }
744
745 pub fn bit_count(&self) -> Column {
747 let expr = self.expr().clone().map(
748 |s| expect_col(crate::udfs::apply_bit_count(s)),
749 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
750 );
751 Self::from_expr(expr, None)
752 }
753
754 pub fn assert_true(&self, err_msg: Option<&str>) -> Column {
757 let msg = err_msg.map(String::from);
758 let expr = self.expr().clone().map(
759 move |c| expect_col(crate::udfs::apply_assert_true(c, msg.as_deref())),
760 |_schema, field| Ok(field.clone()),
761 );
762 Self::from_expr(expr, None)
763 }
764
765 pub fn bitwise_not(&self) -> Column {
767 let expr = (lit(-1i64) - self.expr().clone().cast(DataType::Int64)).cast(DataType::Int64);
769 Self::from_expr(expr, None)
770 }
771
772 pub fn str_to_map(&self, pair_delim: &str, key_value_delim: &str) -> Column {
774 let pair_delim = pair_delim.to_string();
775 let key_value_delim = key_value_delim.to_string();
776 let expr = self.expr().clone().map(
777 move |s| {
778 expect_col(crate::udfs::apply_str_to_map(
779 s,
780 &pair_delim,
781 &key_value_delim,
782 ))
783 },
784 |_schema, field| Ok(field.clone()),
785 );
786 Self::from_expr(expr, None)
787 }
788
789 fn pattern_has_lookaround(pattern: &str) -> bool {
791 let p = pattern.as_bytes();
792 let n = p.len();
793 let mut i = 0;
794 while i + 2 < n {
795 if p[i] == b'(' && p[i + 1] == b'?' {
796 match p[i + 2] {
797 b'=' | b'!' => return true, b'<' if i + 4 <= n && (p[i + 3] == b'=' || p[i + 3] == b'!') => return true, _ => {}
800 }
801 }
802 i += 1;
803 }
804 false
805 }
806
807 pub fn regexp_extract(&self, pattern: &str, group_index: usize) -> Column {
810 use polars::prelude::*;
811 if Self::pattern_has_lookaround(pattern) {
812 let pat = pattern.to_string();
813 let group = group_index;
814 Self::from_expr(
815 self.expr().clone().map(
816 move |s| {
817 expect_col(crate::udfs::apply_regexp_extract_lookaround(s, &pat, group))
818 },
819 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
820 ),
821 None,
822 )
823 } else {
824 let pat = pattern.to_string();
825 Self::from_expr(
826 self.expr().clone().str().extract(lit(pat), group_index),
827 None,
828 )
829 }
830 }
831
832 pub fn regexp_replace(&self, pattern: &str, replacement: &str) -> Column {
834 use polars::prelude::*;
835 let pat = pattern.to_string();
836 let rep = replacement.to_string();
837 Self::from_expr(
838 self.expr().clone().str().replace(lit(pat), lit(rep), false),
839 None,
840 )
841 }
842
843 pub fn left(&self, n: i64) -> Column {
845 use polars::prelude::*;
846 let len = n.max(0) as u32;
847 Self::from_expr(
848 self.expr().clone().str().slice(lit(0i64), lit(len as i64)),
849 None,
850 )
851 }
852
853 pub fn right(&self, n: i64) -> Column {
855 use polars::prelude::*;
856 let n_val = n.max(0);
857 let n_expr = lit(n_val);
858 let len_chars = self.expr().clone().str().len_chars().cast(DataType::Int64);
859 let start = when((len_chars.clone() - n_expr.clone()).lt_eq(lit(0i64)))
860 .then(lit(0i64))
861 .otherwise(len_chars - n_expr.clone());
862 Self::from_expr(self.expr().clone().str().slice(start, n_expr), None)
863 }
864
865 pub fn replace(&self, search: &str, replacement: &str) -> Column {
867 use polars::prelude::*;
868 Self::from_expr(
869 self.expr().clone().str().replace_all(
870 lit(search.to_string()),
871 lit(replacement.to_string()),
872 true,
873 ),
874 None,
875 )
876 }
877
878 pub fn replace_many(&self, pairs: &[(String, String)]) -> Column {
880 let mut out = self.clone();
881 for (search, replacement) in pairs {
882 out = out.replace(search, replacement);
883 }
884 out
885 }
886
887 pub fn startswith(&self, prefix: &str) -> Column {
889 use polars::prelude::*;
890 Self::from_expr(
891 self.expr()
892 .clone()
893 .str()
894 .starts_with(lit(prefix.to_string())),
895 None,
896 )
897 }
898
899 pub fn endswith(&self, suffix: &str) -> Column {
901 use polars::prelude::*;
902 Self::from_expr(
903 self.expr().clone().str().ends_with(lit(suffix.to_string())),
904 None,
905 )
906 }
907
908 pub fn contains(&self, substring: &str) -> Column {
910 use polars::prelude::*;
911 Self::from_expr(
912 self.expr()
913 .clone()
914 .str()
915 .contains(lit(substring.to_string()), true),
916 None,
917 )
918 }
919
920 pub fn split(&self, delimiter: &str, limit: Option<i32>) -> Column {
924 use polars::prelude::*;
925 let use_limit = limit.is_some_and(|l| l > 0);
926 if use_limit {
927 let delim = delimiter.to_string();
928 let lim = limit.unwrap_or(0);
929 let expr = self.expr().clone().map(
930 move |col| expect_col(crate::udfs::apply_split_with_limit(col, &delim, lim)),
931 |_schema, field| {
932 Ok(Field::new(
933 field.name().clone(),
934 DataType::List(Box::new(DataType::String)),
935 ))
936 },
937 );
938 Self::from_expr(expr, None)
939 } else {
940 Self::from_expr(
941 self.expr().clone().str().split(lit(delimiter.to_string())),
942 None,
943 )
944 }
945 }
946
947 pub fn initcap(&self) -> Column {
950 Self::from_expr(self.expr().clone().str().to_lowercase(), None)
951 }
952
953 pub fn regexp_extract_all(&self, pattern: &str) -> Column {
955 use polars::prelude::*;
956 Self::from_expr(
957 self.expr()
958 .clone()
959 .str()
960 .extract_all(lit(pattern.to_string())),
961 None,
962 )
963 }
964
965 pub fn regexp_like(&self, pattern: &str) -> Column {
967 use polars::prelude::*;
968 Self::from_expr(
969 self.expr()
970 .clone()
971 .str()
972 .contains(lit(pattern.to_string()), false),
973 None,
974 )
975 }
976
977 pub fn regexp_count(&self, pattern: &str) -> Column {
979 use polars::prelude::*;
980 Self::from_expr(
981 self.expr()
982 .clone()
983 .str()
984 .count_matches(lit(pattern.to_string()), false)
985 .cast(DataType::Int64),
986 None,
987 )
988 }
989
990 pub fn regexp_substr(&self, pattern: &str) -> Column {
992 self.regexp_extract(pattern, 0)
993 }
994
995 pub fn regexp_instr(&self, pattern: &str, group_idx: Option<usize>) -> Column {
997 let idx = group_idx.unwrap_or(0);
998 let pattern = pattern.to_string();
999 let expr = self.expr().clone().map(
1000 move |s| expect_col(crate::udfs::apply_regexp_instr(s, pattern.clone(), idx)),
1001 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
1002 );
1003 Self::from_expr(expr, None)
1004 }
1005
1006 pub fn find_in_set(&self, set_column: &Column) -> Column {
1008 let args = [set_column.expr().clone()];
1009 let expr = self.expr().clone().map_many(
1010 |cols| expect_col(crate::udfs::apply_find_in_set(cols)),
1011 &args,
1012 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Int64)),
1013 );
1014 Self::from_expr(expr, None)
1015 }
1016
1017 pub fn repeat(&self, n: i32) -> Column {
1019 use polars::prelude::*;
1020 Self::from_expr(
1022 self.expr()
1023 .clone()
1024 .repeat_by(lit(n as u32))
1025 .list()
1026 .join(lit(""), false),
1027 None,
1028 )
1029 }
1030
1031 pub fn reverse(&self) -> Column {
1033 Self::from_expr(self.expr().clone().str().reverse(), None)
1034 }
1035
1036 pub fn instr(&self, substr: &str) -> Column {
1038 use polars::prelude::*;
1039 let found = self
1040 .expr()
1041 .clone()
1042 .str()
1043 .find_literal(lit(substr.to_string()));
1044 Self::from_expr(
1046 (found.cast(DataType::Int64) + lit(1i64)).fill_null(lit(0i64)),
1047 None,
1048 )
1049 }
1050
1051 pub fn lpad(&self, length: i32, pad: &str) -> Column {
1053 let pad_str = if pad.is_empty() { " " } else { pad };
1054 let fill = pad_str.chars().next().unwrap_or(' ');
1055 Self::from_expr(
1056 self.expr()
1057 .clone()
1058 .str()
1059 .pad_start(lit(length as i64), fill),
1060 None,
1061 )
1062 }
1063
1064 pub fn rpad(&self, length: i32, pad: &str) -> Column {
1066 let pad_str = if pad.is_empty() { " " } else { pad };
1067 let fill = pad_str.chars().next().unwrap_or(' ');
1068 Self::from_expr(
1069 self.expr().clone().str().pad_end(lit(length as i64), fill),
1070 None,
1071 )
1072 }
1073
1074 pub fn translate(&self, from_str: &str, to_str: &str) -> Column {
1076 use polars::prelude::*;
1077 let mut e = self.expr().clone();
1078 let from_chars: Vec<char> = from_str.chars().collect();
1079 let to_chars: Vec<char> = to_str.chars().collect();
1080 for (i, fc) in from_chars.iter().enumerate() {
1081 let f = fc.to_string();
1082 let t = to_chars
1083 .get(i)
1084 .map(|c| c.to_string())
1085 .unwrap_or_else(String::new); e = e.str().replace_all(lit(f), lit(t), true);
1087 }
1088 Self::from_expr(e, None)
1089 }
1090
1091 pub fn mask(
1094 &self,
1095 upper_char: Option<char>,
1096 lower_char: Option<char>,
1097 digit_char: Option<char>,
1098 other_char: Option<char>,
1099 ) -> Column {
1100 use polars::prelude::*;
1101 let upper = upper_char.unwrap_or('X').to_string();
1102 let lower = lower_char.unwrap_or('x').to_string();
1103 let digit = digit_char.unwrap_or('n').to_string();
1104 let other = other_char.map(|c| c.to_string());
1105 let mut e = self
1106 .expr()
1107 .clone()
1108 .str()
1109 .replace_all(lit("[A-Z]".to_string()), lit(upper), false)
1110 .str()
1111 .replace_all(lit("[a-z]".to_string()), lit(lower), false)
1112 .str()
1113 .replace_all(lit(r"\d".to_string()), lit(digit), false);
1114 if let Some(o) = other {
1115 e = e
1116 .str()
1117 .replace_all(lit("[^A-Za-z0-9]".to_string()), lit(o), false);
1118 }
1119 Self::from_expr(e, None)
1120 }
1121
1122 pub fn split_part(&self, delimiter: &str, part_num: i64) -> Column {
1125 use polars::prelude::*;
1126 if part_num == 0 {
1127 return Self::from_expr(lit(NULL), None);
1128 }
1129 let use_regex = delimiter == "|";
1130 if use_regex {
1131 let pattern = delimiter.to_string();
1132 let part = part_num;
1133 let get_expr = self.expr().clone().map(
1134 move |col| expect_col(crate::udfs::apply_split_part_regex(col, &pattern, part)),
1135 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
1136 );
1137 let expr = when(self.expr().clone().is_null())
1138 .then(lit(NULL))
1139 .otherwise(get_expr.fill_null(lit("")));
1140 return Self::from_expr(expr, None);
1141 }
1142 let delim = delimiter.to_string();
1143 let split_expr = self.expr().clone().str().split(lit(delim));
1144 let index = if part_num > 0 {
1145 lit(part_num - 1)
1146 } else {
1147 lit(part_num)
1148 };
1149 let get_expr = split_expr.list().get(index, true).fill_null(lit(""));
1150 let expr = when(self.expr().clone().is_null())
1151 .then(lit(NULL))
1152 .otherwise(get_expr);
1153 Self::from_expr(expr, None)
1154 }
1155
1156 pub fn substring_index(&self, delimiter: &str, count: i64) -> Column {
1158 use polars::prelude::*;
1159 let delim = delimiter.to_string();
1160 let split_expr = self.expr().clone().str().split(lit(delim.clone()));
1161 let n = count.unsigned_abs() as i64;
1162 let expr = if count > 0 {
1163 split_expr
1164 .clone()
1165 .list()
1166 .slice(lit(0i64), lit(n))
1167 .list()
1168 .join(lit(delim), false)
1169 } else {
1170 let len = split_expr.clone().list().len();
1171 let start = when(len.clone().gt(lit(n)))
1172 .then(len.clone() - lit(n))
1173 .otherwise(lit(0i64));
1174 let slice_len = when(len.clone().gt(lit(n))).then(lit(n)).otherwise(len);
1175 split_expr
1176 .list()
1177 .slice(start, slice_len)
1178 .list()
1179 .join(lit(delim), false)
1180 };
1181 Self::from_expr(expr, None)
1182 }
1183
1184 pub fn soundex(&self) -> Column {
1186 let expr = self.expr().clone().map(
1187 |s| expect_col(crate::udfs::apply_soundex(s)),
1188 |_schema, field| Ok(field.clone()),
1189 );
1190 Self::from_expr(expr, None)
1191 }
1192
1193 pub fn levenshtein(&self, other: &Column) -> Column {
1195 let args = [other.expr().clone()];
1196 let expr = self.expr().clone().map_many(
1197 |cols| expect_col(crate::udfs::apply_levenshtein(cols)),
1198 &args,
1199 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Int64)),
1200 );
1201 Self::from_expr(expr, None)
1202 }
1203
1204 pub fn crc32(&self) -> Column {
1206 let expr = self.expr().clone().map(
1207 |s| expect_col(crate::udfs::apply_crc32(s)),
1208 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
1209 );
1210 Self::from_expr(expr, None)
1211 }
1212
1213 pub fn xxhash64(&self) -> Column {
1215 let expr = self.expr().clone().map(
1216 |s| expect_col(crate::udfs::apply_xxhash64(s)),
1217 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
1218 );
1219 Self::from_expr(expr, None)
1220 }
1221
1222 pub fn ascii(&self) -> Column {
1224 let expr = self.expr().clone().map(
1225 |s| expect_col(crate::udfs::apply_ascii(s)),
1226 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
1227 );
1228 Self::from_expr(expr, None)
1229 }
1230
1231 pub fn format_number(&self, decimals: u32) -> Column {
1233 let expr = self.expr().clone().map(
1234 move |s| expect_col(crate::udfs::apply_format_number(s, decimals)),
1235 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
1236 );
1237 Self::from_expr(expr, None)
1238 }
1239
1240 pub fn char(&self) -> Column {
1242 let expr = self.expr().clone().map(
1243 |s| expect_col(crate::udfs::apply_char(s)),
1244 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
1245 );
1246 Self::from_expr(expr, None)
1247 }
1248
1249 pub fn chr(&self) -> Column {
1251 self.char()
1252 }
1253
1254 pub fn base64(&self) -> Column {
1256 let expr = self.expr().clone().map(
1257 |s| expect_col(crate::udfs::apply_base64(s)),
1258 |_schema, field| Ok(field.clone()),
1259 );
1260 Self::from_expr(expr, None)
1261 }
1262
1263 pub fn unbase64(&self) -> Column {
1265 let expr = self.expr().clone().map(
1266 |s| expect_col(crate::udfs::apply_unbase64(s)),
1267 |_schema, field| Ok(field.clone()),
1268 );
1269 Self::from_expr(expr, None)
1270 }
1271
1272 pub fn sha1(&self) -> Column {
1274 let expr = self.expr().clone().map(
1275 |s| expect_col(crate::udfs::apply_sha1(s)),
1276 |_schema, field| Ok(field.clone()),
1277 );
1278 Self::from_expr(expr, None)
1279 }
1280
1281 pub fn sha2(&self, bit_length: i32) -> Column {
1283 let expr = self.expr().clone().map(
1284 move |s| expect_col(crate::udfs::apply_sha2(s, bit_length)),
1285 |_schema, field| Ok(field.clone()),
1286 );
1287 Self::from_expr(expr, None)
1288 }
1289
1290 pub fn md5(&self) -> Column {
1292 let expr = self.expr().clone().map(
1293 |s| expect_col(crate::udfs::apply_md5(s)),
1294 |_schema, field| Ok(field.clone()),
1295 );
1296 Self::from_expr(expr, None)
1297 }
1298
1299 pub fn overlay(&self, replace: &str, pos: i64, length: i64) -> Column {
1301 use polars::prelude::*;
1302 let pos = pos.max(1);
1303 let replace_len = length.max(0);
1304 let start_left = 0i64;
1305 let len_left = (pos - 1).max(0);
1306 let start_right = (pos - 1 + replace_len).max(0);
1307 let len_right = 1_000_000i64; let left = self
1309 .expr()
1310 .clone()
1311 .str()
1312 .slice(lit(start_left), lit(len_left));
1313 let mid = lit(replace.to_string());
1314 let right = self
1315 .expr()
1316 .clone()
1317 .str()
1318 .slice(lit(start_right), lit(len_right));
1319 let exprs = [left, mid, right];
1320 let concat_expr = polars::prelude::concat_str(&exprs, "", false);
1321 Self::from_expr(concat_expr, None)
1322 }
1323
1324 pub fn abs(&self) -> Column {
1328 Self::from_expr(self.expr().clone().abs(), None)
1329 }
1330
1331 pub fn ceil(&self) -> Column {
1333 Self::from_expr(self.expr().clone().ceil(), None)
1334 }
1335
1336 pub fn ceiling(&self) -> Column {
1338 self.ceil()
1339 }
1340
1341 pub fn floor(&self) -> Column {
1343 Self::from_expr(self.expr().clone().floor(), None)
1344 }
1345
1346 pub fn round(&self, decimals: u32) -> Column {
1349 let expr = self.expr().clone().map(
1350 move |s| expect_col(crate::udfs::apply_round(s, decimals)),
1351 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1352 );
1353 Self::from_expr(expr, None)
1354 }
1355
1356 pub fn bround(&self, scale: i32) -> Column {
1358 let expr = self.expr().clone().map(
1359 move |s| expect_col(crate::udfs::apply_bround(s, scale)),
1360 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1361 );
1362 Self::from_expr(expr, None)
1363 }
1364
1365 pub fn negate(&self) -> Column {
1367 use polars::prelude::*;
1368 Self::from_expr(self.expr().clone() * lit(-1), None)
1369 }
1370
1371 pub fn multiply_pyspark(&self, other: &Column) -> Column {
1376 let args = [other.expr().clone()];
1377 let expr = self.expr().clone().map_many(
1378 |cols| expect_col(crate::udfs::apply_pyspark_multiply(cols)),
1379 &args,
1380 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1381 );
1382 Self::from_expr(expr, None)
1383 }
1384
1385 pub fn add_pyspark(&self, other: &Column) -> Column {
1387 let args = [other.expr().clone()];
1388 let expr = self.expr().clone().map_many(
1389 |cols| expect_col(crate::udfs::apply_pyspark_add(cols)),
1390 &args,
1391 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1392 );
1393 Self::from_expr(expr, None)
1394 }
1395
1396 pub fn subtract_pyspark(&self, other: &Column) -> Column {
1398 let args = [other.expr().clone()];
1399 let expr = self.expr().clone().map_many(
1400 |cols| expect_col(crate::udfs::apply_pyspark_subtract(cols)),
1401 &args,
1402 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1403 );
1404 Self::from_expr(expr, None)
1405 }
1406
1407 pub fn divide_pyspark(&self, other: &Column) -> Column {
1409 let args = [other.expr().clone()];
1410 let expr = self.expr().clone().map_many(
1411 |cols| expect_col(crate::udfs::apply_pyspark_divide(cols)),
1412 &args,
1413 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1414 );
1415 Self::from_expr(expr, None)
1416 }
1417
1418 pub fn mod_pyspark(&self, other: &Column) -> Column {
1420 let args = [other.expr().clone()];
1421 let expr = self.expr().clone().map_many(
1422 |cols| expect_col(crate::udfs::apply_pyspark_mod(cols)),
1423 &args,
1424 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1425 );
1426 Self::from_expr(expr, None)
1427 }
1428
1429 pub fn multiply(&self, other: &Column) -> Column {
1431 Self::from_expr(self.expr().clone() * other.expr().clone(), None)
1432 }
1433
1434 pub fn add(&self, other: &Column) -> Column {
1436 Self::from_expr(self.expr().clone() + other.expr().clone(), None)
1437 }
1438
1439 pub fn subtract(&self, other: &Column) -> Column {
1441 Self::from_expr(self.expr().clone() - other.expr().clone(), None)
1442 }
1443
1444 pub fn divide(&self, other: &Column) -> Column {
1446 Self::from_expr(self.expr().clone() / other.expr().clone(), None)
1447 }
1448
1449 pub fn mod_(&self, other: &Column) -> Column {
1451 Self::from_expr(self.expr().clone() % other.expr().clone(), None)
1452 }
1453
1454 pub fn sqrt(&self) -> Column {
1456 Self::from_expr(self.expr().clone().sqrt(), None)
1457 }
1458
1459 pub fn pow(&self, exp: i64) -> Column {
1461 use polars::prelude::*;
1462 Self::from_expr(self.expr().clone().pow(lit(exp)), None)
1463 }
1464
1465 pub fn pow_with(&self, exponent: &Column) -> Column {
1467 Self::from_expr(self.expr().clone().pow(exponent.expr().clone()), None)
1468 }
1469
1470 pub fn power(&self, exp: i64) -> Column {
1472 self.pow(exp)
1473 }
1474
1475 pub fn exp(&self) -> Column {
1477 Self::from_expr(self.expr().clone().exp(), None)
1478 }
1479
1480 pub fn log(&self) -> Column {
1482 Self::from_expr(self.expr().clone().log(lit(std::f64::consts::E)), None)
1483 }
1484
1485 pub fn ln(&self) -> Column {
1487 self.log()
1488 }
1489
1490 pub fn sin(&self) -> Column {
1492 let expr = self.expr().clone().map(
1493 |s| expect_col(crate::udfs::apply_sin(s)),
1494 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1495 );
1496 Self::from_expr(expr, None)
1497 }
1498
1499 pub fn cos(&self) -> Column {
1501 let expr = self.expr().clone().map(
1502 |s| expect_col(crate::udfs::apply_cos(s)),
1503 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1504 );
1505 Self::from_expr(expr, None)
1506 }
1507
1508 pub fn tan(&self) -> Column {
1510 let expr = self.expr().clone().map(
1511 |s| expect_col(crate::udfs::apply_tan(s)),
1512 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1513 );
1514 Self::from_expr(expr, None)
1515 }
1516
1517 pub fn cot(&self) -> Column {
1519 let expr = self.expr().clone().map(
1520 |s| expect_col(crate::udfs::apply_cot(s)),
1521 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1522 );
1523 Self::from_expr(expr, None)
1524 }
1525
1526 pub fn csc(&self) -> Column {
1528 let expr = self.expr().clone().map(
1529 |s| expect_col(crate::udfs::apply_csc(s)),
1530 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1531 );
1532 Self::from_expr(expr, None)
1533 }
1534
1535 pub fn sec(&self) -> Column {
1537 let expr = self.expr().clone().map(
1538 |s| expect_col(crate::udfs::apply_sec(s)),
1539 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1540 );
1541 Self::from_expr(expr, None)
1542 }
1543
1544 pub fn asin(&self) -> Column {
1546 let expr = self.expr().clone().map(
1547 |s| expect_col(crate::udfs::apply_asin(s)),
1548 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1549 );
1550 Self::from_expr(expr, None)
1551 }
1552
1553 pub fn acos(&self) -> Column {
1555 let expr = self.expr().clone().map(
1556 |s| expect_col(crate::udfs::apply_acos(s)),
1557 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1558 );
1559 Self::from_expr(expr, None)
1560 }
1561
1562 pub fn atan(&self) -> Column {
1564 let expr = self.expr().clone().map(
1565 |s| expect_col(crate::udfs::apply_atan(s)),
1566 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1567 );
1568 Self::from_expr(expr, None)
1569 }
1570
1571 pub fn atan2(&self, x: &Column) -> Column {
1573 let args = [x.expr().clone()];
1574 let expr = self.expr().clone().map_many(
1575 |cols| expect_col(crate::udfs::apply_atan2(cols)),
1576 &args,
1577 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1578 );
1579 Self::from_expr(expr, None)
1580 }
1581
1582 pub fn degrees(&self) -> Column {
1584 let expr = self.expr().clone().map(
1585 |s| expect_col(crate::udfs::apply_degrees(s)),
1586 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1587 );
1588 Self::from_expr(expr, None)
1589 }
1590
1591 pub fn to_degrees(&self) -> Column {
1593 self.degrees()
1594 }
1595
1596 pub fn radians(&self) -> Column {
1598 let expr = self.expr().clone().map(
1599 |s| expect_col(crate::udfs::apply_radians(s)),
1600 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1601 );
1602 Self::from_expr(expr, None)
1603 }
1604
1605 pub fn to_radians(&self) -> Column {
1607 self.radians()
1608 }
1609
1610 pub fn signum(&self) -> Column {
1612 let expr = self.expr().clone().map(
1613 |s| expect_col(crate::udfs::apply_signum(s)),
1614 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1615 );
1616 Self::from_expr(expr, None)
1617 }
1618
1619 pub fn cosh(&self) -> Column {
1621 let expr = self.expr().clone().map(
1622 |s| expect_col(crate::udfs::apply_cosh(s)),
1623 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1624 );
1625 Self::from_expr(expr, None)
1626 }
1627 pub fn sinh(&self) -> Column {
1629 let expr = self.expr().clone().map(
1630 |s| expect_col(crate::udfs::apply_sinh(s)),
1631 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1632 );
1633 Self::from_expr(expr, None)
1634 }
1635 pub fn tanh(&self) -> Column {
1637 let expr = self.expr().clone().map(
1638 |s| expect_col(crate::udfs::apply_tanh(s)),
1639 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1640 );
1641 Self::from_expr(expr, None)
1642 }
1643 pub fn acosh(&self) -> Column {
1645 let expr = self.expr().clone().map(
1646 |s| expect_col(crate::udfs::apply_acosh(s)),
1647 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1648 );
1649 Self::from_expr(expr, None)
1650 }
1651 pub fn asinh(&self) -> Column {
1653 let expr = self.expr().clone().map(
1654 |s| expect_col(crate::udfs::apply_asinh(s)),
1655 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1656 );
1657 Self::from_expr(expr, None)
1658 }
1659 pub fn atanh(&self) -> Column {
1661 let expr = self.expr().clone().map(
1662 |s| expect_col(crate::udfs::apply_atanh(s)),
1663 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1664 );
1665 Self::from_expr(expr, None)
1666 }
1667 pub fn cbrt(&self) -> Column {
1669 let expr = self.expr().clone().map(
1670 |s| expect_col(crate::udfs::apply_cbrt(s)),
1671 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1672 );
1673 Self::from_expr(expr, None)
1674 }
1675 pub fn expm1(&self) -> Column {
1677 let expr = self.expr().clone().map(
1678 |s| expect_col(crate::udfs::apply_expm1(s)),
1679 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1680 );
1681 Self::from_expr(expr, None)
1682 }
1683 pub fn log1p(&self) -> Column {
1685 let expr = self.expr().clone().map(
1686 |s| expect_col(crate::udfs::apply_log1p(s)),
1687 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1688 );
1689 Self::from_expr(expr, None)
1690 }
1691 pub fn log10(&self) -> Column {
1693 let expr = self.expr().clone().map(
1694 |s| expect_col(crate::udfs::apply_log10(s)),
1695 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1696 );
1697 Self::from_expr(expr, None)
1698 }
1699 pub fn log2(&self) -> Column {
1701 let expr = self.expr().clone().map(
1702 |s| expect_col(crate::udfs::apply_log2(s)),
1703 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1704 );
1705 Self::from_expr(expr, None)
1706 }
1707 pub fn rint(&self) -> Column {
1709 let expr = self.expr().clone().map(
1710 |s| expect_col(crate::udfs::apply_rint(s)),
1711 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Float64)),
1712 );
1713 Self::from_expr(expr, None)
1714 }
1715
1716 pub fn hypot(&self, other: &Column) -> Column {
1718 let xx = self.expr().clone() * self.expr().clone();
1719 let yy = other.expr().clone() * other.expr().clone();
1720 Self::from_expr((xx + yy).sqrt(), None)
1721 }
1722
1723 pub fn cast_to(&self, type_name: &str) -> Result<Column, String> {
1725 crate::functions::cast(self, type_name)
1726 }
1727
1728 pub fn try_cast_to(&self, type_name: &str) -> Result<Column, String> {
1730 crate::functions::try_cast(self, type_name)
1731 }
1732
1733 pub fn is_nan(&self) -> Column {
1735 Self::from_expr(self.expr().clone().is_nan(), None)
1736 }
1737
1738 pub fn year(&self) -> Column {
1742 Self::from_expr(self.expr().clone().dt().year(), None)
1743 }
1744
1745 pub fn month(&self) -> Column {
1747 Self::from_expr(self.expr().clone().dt().month(), None)
1748 }
1749
1750 pub fn day(&self) -> Column {
1752 Self::from_expr(self.expr().clone().dt().day(), None)
1753 }
1754
1755 pub fn dayofmonth(&self) -> Column {
1757 self.day()
1758 }
1759
1760 pub fn quarter(&self) -> Column {
1762 Self::from_expr(self.expr().clone().dt().quarter(), None)
1763 }
1764
1765 pub fn weekofyear(&self) -> Column {
1767 Self::from_expr(self.expr().clone().dt().week(), None)
1768 }
1769
1770 pub fn week(&self) -> Column {
1772 self.weekofyear()
1773 }
1774
1775 pub fn dayofweek(&self) -> Column {
1778 let w = self.expr().clone().dt().weekday();
1779 let dayofweek = (w % lit(7i32)) + lit(1i32); Self::from_expr(dayofweek, None)
1781 }
1782
1783 pub fn dayofyear(&self) -> Column {
1785 Self::from_expr(
1786 self.expr().clone().dt().ordinal_day().cast(DataType::Int32),
1787 None,
1788 )
1789 }
1790
1791 pub fn to_date(&self) -> Column {
1793 use polars::prelude::DataType;
1794 Self::from_expr(self.expr().clone().cast(DataType::Date), None)
1795 }
1796
1797 pub fn date_format(&self, format: &str) -> Column {
1799 Self::from_expr(self.expr().clone().dt().strftime(format), None)
1800 }
1801
1802 pub fn hour(&self) -> Column {
1804 let expr = self.expr().clone().map(
1805 |s| expect_col(crate::udfs::apply_hour(s)),
1806 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
1807 );
1808 Self::from_expr(expr, None)
1809 }
1810
1811 pub fn minute(&self) -> Column {
1813 let expr = self.expr().clone().map(
1814 |s| expect_col(crate::udfs::apply_minute(s)),
1815 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
1816 );
1817 Self::from_expr(expr, None)
1818 }
1819
1820 pub fn second(&self) -> Column {
1822 let expr = self.expr().clone().map(
1823 |s| expect_col(crate::udfs::apply_second(s)),
1824 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
1825 );
1826 Self::from_expr(expr, None)
1827 }
1828
1829 pub fn extract(&self, field: &str) -> Column {
1831 use polars::prelude::*;
1832 let e = self.expr().clone();
1833 let expr = match field.trim().to_lowercase().as_str() {
1834 "year" => e.dt().year(),
1835 "month" => e.dt().month(),
1836 "day" => e.dt().day(),
1837 "hour" => e.dt().hour(),
1838 "minute" => e.dt().minute(),
1839 "second" => e.dt().second(),
1840 "quarter" => e.dt().quarter(),
1841 "week" | "weekofyear" => e.dt().week(),
1842 "dayofweek" | "dow" => {
1843 let w = e.dt().weekday();
1844 (w % lit(7i32)) + lit(1i32)
1845 }
1846 "dayofyear" | "doy" => e.dt().ordinal_day().cast(DataType::Int32),
1847 _ => e.dt().year(), };
1849 Self::from_expr(expr, None)
1850 }
1851
1852 pub fn unix_micros(&self) -> Column {
1854 use polars::prelude::*;
1855 Self::from_expr(self.expr().clone().cast(DataType::Int64), None)
1856 }
1857
1858 pub fn unix_millis(&self) -> Column {
1860 use polars::prelude::*;
1861 let micros = self.expr().clone().cast(DataType::Int64);
1862 Self::from_expr(micros / lit(1000i64), None)
1863 }
1864
1865 pub fn unix_seconds(&self) -> Column {
1867 use polars::prelude::*;
1868 let micros = self.expr().clone().cast(DataType::Int64);
1869 Self::from_expr(micros / lit(1_000_000i64), None)
1870 }
1871
1872 pub fn dayname(&self) -> Column {
1874 let expr = self.expr().clone().map(
1875 |s| expect_col(crate::udfs::apply_dayname(s)),
1876 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
1877 );
1878 Self::from_expr(expr, None)
1879 }
1880
1881 pub fn weekday(&self) -> Column {
1883 let expr = self.expr().clone().map(
1884 |s| expect_col(crate::udfs::apply_weekday(s)),
1885 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
1886 );
1887 Self::from_expr(expr, None)
1888 }
1889
1890 pub fn date_add(&self, n: i32) -> Column {
1892 use polars::prelude::*;
1893 let date_expr = self.expr().clone().cast(DataType::Date);
1894 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1895 Self::from_expr(date_expr + dur, None)
1896 }
1897
1898 pub fn date_sub(&self, n: i32) -> Column {
1900 use polars::prelude::*;
1901 let date_expr = self.expr().clone().cast(DataType::Date);
1902 let dur = duration(DurationArgs::new().with_days(lit(n as i64)));
1903 Self::from_expr(date_expr - dur, None)
1904 }
1905
1906 pub fn datediff(&self, other: &Column) -> Column {
1908 use polars::prelude::*;
1909 let start = self.expr().clone().cast(DataType::Date);
1910 let end = other.expr().clone().cast(DataType::Date);
1911 Self::from_expr((end - start).dt().total_days(false), None)
1912 }
1913
1914 pub fn last_day(&self) -> Column {
1916 Self::from_expr(self.expr().clone().dt().month_end(), None)
1917 }
1918
1919 pub fn timestampadd(&self, unit: &str, amount: &Column) -> Column {
1921 use polars::prelude::*;
1922 let ts = self.expr().clone();
1923 let amt = amount.expr().clone().cast(DataType::Int64);
1924 let dur = match unit.trim().to_uppercase().as_str() {
1925 "DAY" | "DAYS" => duration(DurationArgs::new().with_days(amt)),
1926 "HOUR" | "HOURS" => duration(DurationArgs::new().with_hours(amt)),
1927 "MINUTE" | "MINUTES" => duration(DurationArgs::new().with_minutes(amt)),
1928 "SECOND" | "SECONDS" => duration(DurationArgs::new().with_seconds(amt)),
1929 "WEEK" | "WEEKS" => duration(DurationArgs::new().with_weeks(amt)),
1930 _ => duration(DurationArgs::new().with_days(amt)),
1931 };
1932 Self::from_expr(ts + dur, None)
1933 }
1934
1935 pub fn timestampdiff(&self, unit: &str, other: &Column) -> Column {
1937 let start = self.expr().clone();
1938 let end = other.expr().clone();
1939 let diff = end - start;
1940 let expr = match unit.trim().to_uppercase().as_str() {
1941 "HOUR" | "HOURS" => diff.dt().total_hours(false),
1942 "MINUTE" | "MINUTES" => diff.dt().total_minutes(false),
1943 "SECOND" | "SECONDS" => diff.dt().total_seconds(false),
1944 "DAY" | "DAYS" => diff.dt().total_days(false),
1945 _ => diff.dt().total_days(false),
1946 };
1947 Self::from_expr(expr, None)
1948 }
1949
1950 pub fn from_utc_timestamp(&self, tz: &str) -> Column {
1952 let tz = tz.to_string();
1953 let expr = self.expr().clone().map(
1954 move |s| expect_col(crate::udfs::apply_from_utc_timestamp(s, &tz)),
1955 |_schema, field| Ok(field.clone()),
1956 );
1957 Self::from_expr(expr, None)
1958 }
1959
1960 pub fn to_utc_timestamp(&self, tz: &str) -> Column {
1962 let tz = tz.to_string();
1963 let expr = self.expr().clone().map(
1964 move |s| expect_col(crate::udfs::apply_to_utc_timestamp(s, &tz)),
1965 |_schema, field| Ok(field.clone()),
1966 );
1967 Self::from_expr(expr, None)
1968 }
1969
1970 pub fn trunc(&self, format: &str) -> Column {
1972 use polars::prelude::*;
1973 Self::from_expr(
1974 self.expr().clone().dt().truncate(lit(format.to_string())),
1975 None,
1976 )
1977 }
1978
1979 pub fn add_months(&self, n: i32) -> Column {
1981 let expr = self.expr().clone().map(
1982 move |col| expect_col(crate::udfs::apply_add_months(col, n)),
1983 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Date)),
1984 );
1985 Self::from_expr(expr, None)
1986 }
1987
1988 pub fn months_between(&self, start: &Column, round_off: bool) -> Column {
1991 let args = [start.expr().clone()];
1992 let expr = self.expr().clone().map_many(
1993 move |cols| expect_col(crate::udfs::apply_months_between(cols, round_off)),
1994 &args,
1995 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
1996 );
1997 Self::from_expr(expr, None)
1998 }
1999
2000 pub fn next_day(&self, day_of_week: &str) -> Column {
2002 let day = day_of_week.to_string();
2003 let expr = self.expr().clone().map(
2004 move |col| expect_col(crate::udfs::apply_next_day(col, &day)),
2005 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Date)),
2006 );
2007 Self::from_expr(expr, None)
2008 }
2009
2010 pub fn unix_timestamp(&self, format: Option<&str>) -> Column {
2012 let fmt = format.map(String::from);
2013 let expr = self.expr().clone().map(
2014 move |col| expect_col(crate::udfs::apply_unix_timestamp(col, fmt.as_deref())),
2015 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
2016 );
2017 Self::from_expr(expr, None)
2018 }
2019
2020 pub fn from_unixtime(&self, format: Option<&str>) -> Column {
2022 let fmt = format.map(String::from);
2023 let expr = self.expr().clone().map(
2024 move |col| expect_col(crate::udfs::apply_from_unixtime(col, fmt.as_deref())),
2025 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
2026 );
2027 Self::from_expr(expr, None)
2028 }
2029
2030 pub fn timestamp_seconds(&self) -> Column {
2032 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1_000_000i64))
2033 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
2034 Self::from_expr(expr, None)
2035 }
2036
2037 pub fn timestamp_millis(&self) -> Column {
2039 let expr = (self.expr().clone().cast(DataType::Int64) * lit(1000i64))
2040 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
2041 Self::from_expr(expr, None)
2042 }
2043
2044 pub fn timestamp_micros(&self) -> Column {
2046 let expr = self
2047 .expr()
2048 .clone()
2049 .cast(DataType::Int64)
2050 .cast(DataType::Datetime(TimeUnit::Microseconds, None));
2051 Self::from_expr(expr, None)
2052 }
2053
2054 pub fn unix_date(&self) -> Column {
2056 let expr = self.expr().clone().map(
2057 |s| expect_col(crate::udfs::apply_unix_date(s)),
2058 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int32)),
2059 );
2060 Self::from_expr(expr, None)
2061 }
2062
2063 pub fn date_from_unix_date(&self) -> Column {
2065 let expr = self.expr().clone().map(
2066 |s| expect_col(crate::udfs::apply_date_from_unix_date(s)),
2067 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Date)),
2068 );
2069 Self::from_expr(expr, None)
2070 }
2071
2072 pub fn pmod(&self, divisor: &Column) -> Column {
2074 let args = [divisor.expr().clone()];
2075 let expr = self.expr().clone().map_many(
2076 |cols| expect_col(crate::udfs::apply_pmod(cols)),
2077 &args,
2078 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Float64)),
2079 );
2080 Self::from_expr(expr, None)
2081 }
2082
2083 pub fn factorial(&self) -> Column {
2085 let expr = self.expr().clone().map(
2086 |s| expect_col(crate::udfs::apply_factorial(s)),
2087 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
2088 );
2089 Self::from_expr(expr, None)
2090 }
2091
2092 pub fn over(&self, partition_by: &[&str]) -> Column {
2097 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2098 Self::from_expr(self.expr().clone().over(partition_exprs), None)
2099 }
2100
2101 pub fn rank(&self, descending: bool) -> Column {
2103 let opts = RankOptions {
2104 method: RankMethod::Min,
2105 descending,
2106 };
2107 Self::from_expr(self.expr().clone().rank(opts, None), None)
2108 }
2109
2110 pub fn dense_rank(&self, descending: bool) -> Column {
2112 let opts = RankOptions {
2113 method: RankMethod::Dense,
2114 descending,
2115 };
2116 Self::from_expr(self.expr().clone().rank(opts, None), None)
2117 }
2118
2119 pub fn row_number(&self, descending: bool) -> Column {
2121 let opts = RankOptions {
2122 method: RankMethod::Ordinal,
2123 descending,
2124 };
2125 Self::from_expr(self.expr().clone().rank(opts, None), None)
2126 }
2127
2128 pub fn lag(&self, n: i64) -> Column {
2130 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(n)), None)
2131 }
2132
2133 pub fn lead(&self, n: i64) -> Column {
2135 Self::from_expr(self.expr().clone().shift(polars::prelude::lit(-n)), None)
2136 }
2137
2138 pub fn first_value(&self) -> Column {
2140 Self::from_expr(self.expr().clone().first(), None)
2141 }
2142
2143 pub fn last_value(&self) -> Column {
2145 Self::from_expr(self.expr().clone().last(), None)
2146 }
2147
2148 pub fn percent_rank(&self, partition_by: &[&str], descending: bool) -> Column {
2150 use polars::prelude::*;
2151 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2152 let opts = RankOptions {
2153 method: RankMethod::Min,
2154 descending,
2155 };
2156 let rank_expr = self
2157 .expr()
2158 .clone()
2159 .rank(opts, None)
2160 .over(partition_exprs.clone());
2161 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2162 let rank_f = (rank_expr - lit(1i64)).cast(DataType::Float64);
2163 let count_f = (count_expr - lit(1i64)).cast(DataType::Float64);
2164 let pct = rank_f / count_f;
2165 Self::from_expr(pct, None)
2166 }
2167
2168 pub fn cume_dist(&self, partition_by: &[&str], descending: bool) -> Column {
2170 use polars::prelude::*;
2171 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2172 let opts = RankOptions {
2173 method: RankMethod::Ordinal,
2174 descending,
2175 };
2176 let row_num = self
2177 .expr()
2178 .clone()
2179 .rank(opts, None)
2180 .over(partition_exprs.clone());
2181 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2182 let cume = row_num / count_expr;
2183 Self::from_expr(cume.cast(DataType::Float64), None)
2184 }
2185
2186 pub fn ntile(&self, n: u32, partition_by: &[&str], descending: bool) -> Column {
2188 use polars::prelude::*;
2189 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2190 let opts = RankOptions {
2191 method: RankMethod::Ordinal,
2192 descending,
2193 };
2194 let rank_expr = self
2195 .expr()
2196 .clone()
2197 .rank(opts, None)
2198 .over(partition_exprs.clone());
2199 let count_expr = self.expr().clone().count().over(partition_exprs.clone());
2200 let n_expr = lit(n as f64);
2201 let rank_f = rank_expr.cast(DataType::Float64);
2202 let count_f = count_expr.cast(DataType::Float64);
2203 let bucket = (rank_f * n_expr / count_f).ceil();
2204 let clamped = bucket.clip(lit(1.0), lit(n as f64));
2205 Self::from_expr(clamped.cast(DataType::Int32), None)
2206 }
2207
2208 pub fn nth_value(&self, n: i64, partition_by: &[&str], descending: bool) -> Column {
2210 use polars::prelude::*;
2211 let partition_exprs: Vec<Expr> = partition_by.iter().map(|s| col(*s)).collect();
2212 let opts = RankOptions {
2213 method: RankMethod::Ordinal,
2214 descending,
2215 };
2216 let rank_expr = self
2217 .expr()
2218 .clone()
2219 .rank(opts, None)
2220 .over(partition_exprs.clone());
2221 let cond_col = Self::from_expr(rank_expr.eq(lit(n)), None);
2222 let null_col = Self::from_expr(lit(NULL), None);
2223 let value_col = Self::from_expr(self.expr().clone(), None);
2224 let when_expr = crate::functions::when(&cond_col)
2225 .then(&value_col)
2226 .otherwise(&null_col)
2227 .into_expr();
2228 let windowed = when_expr.max().over(partition_exprs);
2229 Self::from_expr(windowed, None)
2230 }
2231
2232 pub fn array_size(&self) -> Column {
2234 use polars::prelude::*;
2235 Self::from_expr(
2236 self.expr().clone().list().len().cast(DataType::Int32),
2237 Some("size".to_string()),
2238 )
2239 }
2240
2241 pub fn cardinality(&self) -> Column {
2243 self.array_size()
2244 }
2245
2246 pub fn array_contains(&self, value: Expr) -> Column {
2248 Self::from_expr(self.expr().clone().list().contains(value, false), None)
2249 }
2250
2251 pub fn array_join(&self, separator: &str) -> Column {
2253 use polars::prelude::*;
2254 Self::from_expr(
2255 self.expr()
2256 .clone()
2257 .list()
2258 .join(lit(separator.to_string()), false),
2259 None,
2260 )
2261 }
2262
2263 pub fn array_max(&self) -> Column {
2265 Self::from_expr(self.expr().clone().list().max(), None)
2266 }
2267
2268 pub fn array_min(&self) -> Column {
2270 Self::from_expr(self.expr().clone().list().min(), None)
2271 }
2272
2273 pub fn element_at(&self, index: i64) -> Column {
2275 use polars::prelude::*;
2276 let idx = if index >= 1 { index - 1 } else { index };
2278 Self::from_expr(self.expr().clone().list().get(lit(idx), true), None)
2279 }
2280
2281 pub fn get_item(&self, index: i64) -> Column {
2283 use polars::prelude::*;
2284 Self::from_expr(self.expr().clone().list().get(lit(index), true), None)
2285 }
2286
2287 pub fn get_field(&self, name: &str) -> Column {
2289 Self::from_expr(
2290 self.expr().clone().struct_().field_by_name(name),
2291 Some(name.to_string()),
2292 )
2293 }
2294
2295 pub fn with_field(&self, name: &str, value: &Column) -> Column {
2300 self.try_with_field(name, value)
2301 .expect("with_field: column must be struct type")
2302 }
2303
2304 pub fn try_with_field(
2308 &self,
2309 name: &str,
2310 value: &Column,
2311 ) -> Result<Column, polars::error::PolarsError> {
2312 let name = name.to_string();
2313 let args = [value.expr().clone()];
2314 let expr = self.expr().clone().map_many(
2315 move |cols| {
2316 expect_col(crate::udfs::apply_struct_with_field(
2318 cols[0].clone(),
2319 cols[1].clone(),
2320 &name,
2321 ))
2322 },
2323 &args,
2324 |_schema, fields| Ok(fields[0].clone()),
2325 );
2326 Ok(Self::from_expr(expr, None))
2327 }
2328
2329 pub fn array_sort(&self) -> Column {
2331 use polars::prelude::SortOptions;
2332 let opts = SortOptions {
2333 descending: false,
2334 nulls_last: true,
2335 ..Default::default()
2336 };
2337 Self::from_expr(self.expr().clone().list().sort(opts), None)
2338 }
2339
2340 pub fn array_distinct(&self) -> Column {
2342 let expr = self.expr().clone().map(
2343 |s| expect_col(crate::udfs::apply_array_distinct_first_order(s)),
2344 |_schema, field| Ok(field.clone()),
2345 );
2346 Self::from_expr(expr, None)
2347 }
2348
2349 pub fn mode(&self) -> Column {
2352 let vc = self
2356 .expr()
2357 .clone()
2358 .value_counts(true, false, "count", false);
2359 let first_struct = vc.first();
2360 let val_expr = first_struct.struct_().field_by_index(0);
2361 Self::from_expr(val_expr, Some("mode".to_string()))
2362 }
2363
2364 pub fn array_slice(&self, start: i64, length: Option<i64>) -> Column {
2366 use polars::prelude::*;
2367 let start_expr = lit((start - 1).max(0)); let length_expr = length.map(lit).unwrap_or_else(|| lit(i64::MAX));
2369 Self::from_expr(
2370 self.expr().clone().list().slice(start_expr, length_expr),
2371 None,
2372 )
2373 }
2374
2375 pub fn explode(&self) -> Column {
2377 use polars::prelude::ExplodeOptions;
2378 Self::from_expr(
2379 self.expr().clone().explode(ExplodeOptions {
2380 empty_as_null: false,
2381 keep_nulls: false,
2382 }),
2383 None,
2384 )
2385 }
2386
2387 pub fn explode_outer(&self) -> Column {
2389 use polars::prelude::ExplodeOptions;
2390 Self::from_expr(
2391 self.expr().clone().explode(ExplodeOptions {
2392 empty_as_null: true,
2393 keep_nulls: true,
2394 }),
2395 None,
2396 )
2397 }
2398
2399 pub fn posexplode_outer(&self) -> (Column, Column) {
2401 self.posexplode()
2402 }
2403
2404 pub fn arrays_zip(&self, other: &Column) -> Column {
2406 let args = [other.expr().clone()];
2407 let expr = self.expr().clone().map_many(
2408 |cols| expect_col(crate::udfs::apply_arrays_zip(cols)),
2409 &args,
2410 |_schema, fields| Ok(fields[0].clone()),
2411 );
2412 Self::from_expr(expr, None)
2413 }
2414
2415 pub fn arrays_overlap(&self, other: &Column) -> Column {
2417 let args = [other.expr().clone()];
2418 let expr = self.expr().clone().map_many(
2419 |cols| expect_col(crate::udfs::apply_arrays_overlap(cols)),
2420 &args,
2421 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Boolean)),
2422 );
2423 Self::from_expr(expr, None)
2424 }
2425
2426 pub fn array_agg(&self) -> Column {
2428 Self::from_expr(self.expr().clone().implode(), None)
2429 }
2430
2431 pub fn array_position(&self, value: Expr) -> Column {
2434 use polars::prelude::{DataType, NULL};
2435 let cond = Self::from_expr(col("").eq(value), None);
2437 let then_val = Self::from_expr(col("").cum_count(false), None);
2438 let else_val = Self::from_expr(lit(NULL), None);
2439 let idx_expr = crate::functions::when(&cond)
2440 .then(&then_val)
2441 .otherwise(&else_val)
2442 .into_expr();
2443 let list_expr = self
2444 .expr()
2445 .clone()
2446 .list()
2447 .eval(idx_expr)
2448 .list()
2449 .min()
2450 .fill_null(lit(0i64))
2451 .cast(DataType::Int64);
2452 Self::from_expr(list_expr, Some("array_position".to_string()))
2453 }
2454
2455 pub fn array_compact(&self) -> Column {
2457 let list_expr = self.expr().clone().list().drop_nulls();
2458 Self::from_expr(list_expr, None)
2459 }
2460
2461 pub fn array_remove(&self, value: Expr) -> Column {
2464 use polars::prelude::NULL;
2465 let cond = Self::from_expr(col("").neq(value), None);
2467 let then_val = Self::from_expr(col(""), None);
2468 let else_val = Self::from_expr(lit(NULL), None);
2469 let elem_neq = crate::functions::when(&cond)
2470 .then(&then_val)
2471 .otherwise(&else_val)
2472 .into_expr();
2473 let list_expr = self
2474 .expr()
2475 .clone()
2476 .list()
2477 .eval(elem_neq)
2478 .list()
2479 .drop_nulls();
2480 Self::from_expr(list_expr, None)
2481 }
2482
2483 pub fn array_repeat(&self, n: i64) -> Column {
2485 let expr = self.expr().clone().map(
2486 move |c| expect_col(crate::udfs::apply_array_repeat(c, n)),
2487 |_schema, field| Ok(field.clone()),
2488 );
2489 Self::from_expr(expr, None)
2490 }
2491
2492 pub fn array_flatten(&self) -> Column {
2494 let expr = self.expr().clone().map(
2495 |s| expect_col(crate::udfs::apply_array_flatten(s)),
2496 |_schema, field| Ok(field.clone()),
2497 );
2498 Self::from_expr(expr, None)
2499 }
2500
2501 pub fn array_append(&self, elem: &Column) -> Column {
2503 let args = [elem.expr().clone()];
2504 let expr = self.expr().clone().map_many(
2505 |cols| expect_col(crate::udfs::apply_array_append(cols)),
2506 &args,
2507 |_schema, fields| Ok(fields[0].clone()),
2508 );
2509 Self::from_expr(expr, None)
2510 }
2511
2512 pub fn array_prepend(&self, elem: &Column) -> Column {
2514 let args = [elem.expr().clone()];
2515 let expr = self.expr().clone().map_many(
2516 |cols| expect_col(crate::udfs::apply_array_prepend(cols)),
2517 &args,
2518 |_schema, fields| Ok(fields[0].clone()),
2519 );
2520 Self::from_expr(expr, None)
2521 }
2522
2523 pub fn array_insert(&self, pos: &Column, elem: &Column) -> Column {
2525 let args = [pos.expr().clone(), elem.expr().clone()];
2526 let expr = self.expr().clone().map_many(
2527 |cols| expect_col(crate::udfs::apply_array_insert(cols)),
2528 &args,
2529 |_schema, fields| Ok(fields[0].clone()),
2530 );
2531 Self::from_expr(expr, None)
2532 }
2533
2534 pub fn array_except(&self, other: &Column) -> Column {
2536 let args = [other.expr().clone()];
2537 let expr = self.expr().clone().map_many(
2538 |cols| expect_col(crate::udfs::apply_array_except(cols)),
2539 &args,
2540 |_schema, fields| Ok(fields[0].clone()),
2541 );
2542 Self::from_expr(expr, None)
2543 }
2544
2545 pub fn array_intersect(&self, other: &Column) -> Column {
2547 let args = [other.expr().clone()];
2548 let expr = self.expr().clone().map_many(
2549 |cols| expect_col(crate::udfs::apply_array_intersect(cols)),
2550 &args,
2551 |_schema, fields| Ok(fields[0].clone()),
2552 );
2553 Self::from_expr(expr, None)
2554 }
2555
2556 pub fn array_union(&self, other: &Column) -> Column {
2558 let args = [other.expr().clone()];
2559 let expr = self.expr().clone().map_many(
2560 |cols| expect_col(crate::udfs::apply_array_union(cols)),
2561 &args,
2562 |_schema, fields| Ok(fields[0].clone()),
2563 );
2564 Self::from_expr(expr, None)
2565 }
2566
2567 pub fn zip_with(&self, other: &Column, merge: Expr) -> Column {
2570 let args = [other.expr().clone()];
2571 let zip_expr = self.expr().clone().map_many(
2572 |cols| expect_col(crate::udfs::apply_zip_arrays_to_struct(cols)),
2573 &args,
2574 |_schema, fields| {
2575 let left_inner = match &fields[0].dtype {
2576 DataType::List(inner) => *inner.clone(),
2577 _ => DataType::Unknown(Default::default()),
2578 };
2579 let right_inner = match fields.get(1).map(|f| &f.dtype) {
2580 Some(DataType::List(inner)) => *inner.clone(),
2581 _ => DataType::Unknown(Default::default()),
2582 };
2583 let struct_dtype = DataType::Struct(vec![
2584 Field::new("left".into(), left_inner),
2585 Field::new("right".into(), right_inner),
2586 ]);
2587 Ok(Field::new(
2588 fields[0].name().clone(),
2589 DataType::List(Box::new(struct_dtype)),
2590 ))
2591 },
2592 );
2593 let list_expr = zip_expr.list().eval(merge);
2594 Self::from_expr(list_expr, None)
2595 }
2596
2597 pub fn array_exists(&self, predicate: Expr) -> Column {
2599 let pred_expr = self.expr().clone().list().eval(predicate).list().any();
2600 Self::from_expr(pred_expr, Some("exists".to_string()))
2601 }
2602
2603 pub fn array_forall(&self, predicate: Expr) -> Column {
2605 let pred_expr = self.expr().clone().list().eval(predicate).list().all();
2606 Self::from_expr(pred_expr, Some("forall".to_string()))
2607 }
2608
2609 pub fn array_filter(&self, predicate: Expr) -> Column {
2611 use polars::prelude::NULL;
2612 let then_val = Self::from_expr(col(""), None);
2613 let else_val = Self::from_expr(lit(NULL), None);
2614 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2615 .then(&then_val)
2616 .otherwise(&else_val)
2617 .into_expr();
2618 let list_expr = self
2619 .expr()
2620 .clone()
2621 .list()
2622 .eval(elem_expr)
2623 .list()
2624 .drop_nulls();
2625 Self::from_expr(list_expr, None)
2626 }
2627
2628 pub fn array_transform(&self, f: Expr) -> Column {
2630 let list_expr = self.expr().clone().list().eval(f);
2631 Self::from_expr(list_expr, None)
2632 }
2633
2634 pub fn array_sum(&self) -> Column {
2636 Self::from_expr(self.expr().clone().list().sum(), None)
2637 }
2638
2639 pub fn array_aggregate(&self, zero: &Column) -> Column {
2641 let sum_expr = self.expr().clone().list().sum();
2642 Self::from_expr(sum_expr + zero.expr().clone(), None)
2643 }
2644
2645 pub fn array_mean(&self) -> Column {
2647 Self::from_expr(self.expr().clone().list().mean(), None)
2648 }
2649
2650 pub fn posexplode(&self) -> (Column, Column) {
2653 use polars::prelude::ExplodeOptions;
2654 let opts = ExplodeOptions {
2655 empty_as_null: false,
2656 keep_nulls: false,
2657 };
2658 let pos_expr = self
2659 .expr()
2660 .clone()
2661 .list()
2662 .eval(col("").cum_count(false))
2663 .explode(opts);
2664 let val_expr = self.expr().clone().explode(opts);
2665 (
2666 Self::from_expr(pos_expr, Some("pos".to_string())),
2667 Self::from_expr(val_expr, Some("col".to_string())),
2668 )
2669 }
2670
2671 pub fn map_keys(&self) -> Column {
2673 let elem_key = col("").struct_().field_by_name("key");
2674 let list_expr = self.expr().clone().list().eval(elem_key);
2675 Self::from_expr(list_expr, None)
2676 }
2677
2678 pub fn map_values(&self) -> Column {
2680 let elem_val = col("").struct_().field_by_name("value");
2681 let list_expr = self.expr().clone().list().eval(elem_val);
2682 Self::from_expr(list_expr, None)
2683 }
2684
2685 pub fn map_entries(&self) -> Column {
2687 Self::from_expr(self.expr().clone(), None)
2688 }
2689
2690 pub fn map_from_arrays(&self, values: &Column) -> Column {
2692 let args = [values.expr().clone()];
2693 let expr = self.expr().clone().map_many(
2694 |cols| expect_col(crate::udfs::apply_map_from_arrays(cols)),
2695 &args,
2696 |_schema, fields| Ok(fields[0].clone()),
2697 );
2698 Self::from_expr(expr, None)
2699 }
2700
2701 pub fn map_concat(&self, other: &Column) -> Column {
2703 let args = [other.expr().clone()];
2704 let expr = self.expr().clone().map_many(
2705 |cols| expect_col(crate::udfs::apply_map_concat(cols)),
2706 &args,
2707 |_schema, fields| Ok(fields[0].clone()),
2708 );
2709 Self::from_expr(expr, None)
2710 }
2711
2712 pub fn transform_keys(&self, key_expr: Expr) -> Column {
2714 use polars::prelude::as_struct;
2715 let value = col("").struct_().field_by_name("value");
2716 let new_struct = as_struct(vec![key_expr.alias("key"), value.alias("value")]);
2717 let list_expr = self.expr().clone().list().eval(new_struct);
2718 Self::from_expr(list_expr, None)
2719 }
2720
2721 pub fn transform_values(&self, value_expr: Expr) -> Column {
2723 use polars::prelude::as_struct;
2724 let key = col("").struct_().field_by_name("key");
2725 let new_struct = as_struct(vec![key.alias("key"), value_expr.alias("value")]);
2726 let list_expr = self.expr().clone().list().eval(new_struct);
2727 Self::from_expr(list_expr, None)
2728 }
2729
2730 pub fn map_zip_with(&self, other: &Column, merge: Expr) -> Column {
2733 use polars::prelude::as_struct;
2734 let args = [other.expr().clone()];
2735 let zip_expr = self.expr().clone().map_many(
2736 |cols| expect_col(crate::udfs::apply_map_zip_to_struct(cols)),
2737 &args,
2738 |_schema, fields| {
2739 let list_inner = match &fields[0].dtype {
2740 DataType::List(inner) => *inner.clone(),
2741 _ => return Ok(fields[0].clone()),
2742 };
2743 let (key_dtype, value_dtype) = match &list_inner {
2744 DataType::Struct(struct_fields) => {
2745 let k = struct_fields
2746 .iter()
2747 .find(|f| f.name.as_str() == "key")
2748 .map(|f| f.dtype.clone())
2749 .unwrap_or(DataType::String);
2750 let v = struct_fields
2751 .iter()
2752 .find(|f| f.name.as_str() == "value")
2753 .map(|f| f.dtype.clone())
2754 .unwrap_or(DataType::String);
2755 (k, v)
2756 }
2757 _ => (DataType::String, DataType::String),
2758 };
2759 let out_struct = DataType::Struct(vec![
2760 Field::new("key".into(), key_dtype),
2761 Field::new("value1".into(), value_dtype.clone()),
2762 Field::new("value2".into(), value_dtype),
2763 ]);
2764 Ok(Field::new(
2765 fields[0].name().clone(),
2766 DataType::List(Box::new(out_struct)),
2767 ))
2768 },
2769 );
2770 let key_field = col("").struct_().field_by_name("key").alias("key");
2771 let value_field = merge.alias("value");
2772 let merge_expr = as_struct(vec![key_field, value_field]);
2773 let list_expr = zip_expr.list().eval(merge_expr);
2774 Self::from_expr(list_expr, None)
2775 }
2776
2777 pub fn map_filter(&self, predicate: Expr) -> Column {
2780 use polars::prelude::NULL;
2781 let then_val = Self::from_expr(col(""), None);
2782 let else_val = Self::from_expr(lit(NULL), None);
2783 let elem_expr = crate::functions::when(&Self::from_expr(predicate, None))
2784 .then(&then_val)
2785 .otherwise(&else_val)
2786 .into_expr();
2787 let list_expr = self
2788 .expr()
2789 .clone()
2790 .list()
2791 .eval(elem_expr)
2792 .list()
2793 .drop_nulls();
2794 Self::from_expr(list_expr, None)
2795 }
2796
2797 pub fn map_from_entries(&self) -> Column {
2799 Self::from_expr(self.expr().clone(), None)
2800 }
2801
2802 pub fn map_contains_key(&self, key: &Column) -> Column {
2804 let args = [key.expr().clone()];
2805 let expr = self.expr().clone().map_many(
2806 |cols| expect_col(crate::udfs::apply_map_contains_key(cols)),
2807 &args,
2808 |_schema, fields| Ok(Field::new(fields[0].name().clone(), DataType::Boolean)),
2809 );
2810 Self::from_expr(expr, None)
2811 }
2812
2813 pub fn get(&self, key: &Column) -> Column {
2815 let args = [key.expr().clone()];
2816 let expr = self.expr().clone().map_many(
2817 |cols| expect_col(crate::udfs::apply_get(cols)),
2818 &args,
2819 |_schema, fields| Ok(fields[0].clone()),
2820 );
2821 Self::from_expr(expr, None)
2822 }
2823
2824 pub fn get_json_object(&self, path: &str) -> Column {
2826 let path_expr = polars::prelude::lit(path.to_string());
2827 let out = self.expr().clone().str().json_path_match(path_expr);
2828 Self::from_expr(out, None)
2829 }
2830
2831 pub fn from_json(&self, schema: Option<polars::datatypes::DataType>) -> Column {
2833 use polars::prelude::DataType;
2834 let dtype = schema.unwrap_or(DataType::String);
2835 let out = self.expr().clone().str().json_decode(dtype);
2836 Self::from_expr(out, None)
2837 }
2838
2839 pub fn to_json(&self) -> Column {
2841 let out = self.expr().clone().struct_().json_encode();
2842 Self::from_expr(out, None)
2843 }
2844
2845 pub fn json_array_length(&self, path: &str) -> Column {
2847 let path = path.to_string();
2848 let expr = self.expr().clone().map(
2849 move |s| expect_col(crate::udfs::apply_json_array_length(s, &path)),
2850 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
2851 );
2852 Self::from_expr(expr, None)
2853 }
2854
2855 pub fn json_object_keys(&self) -> Column {
2857 let expr = self.expr().clone().map(
2858 |s| expect_col(crate::udfs::apply_json_object_keys(s)),
2859 |_schema, field| {
2860 Ok(Field::new(
2861 field.name().clone(),
2862 DataType::List(Box::new(DataType::String)),
2863 ))
2864 },
2865 );
2866 Self::from_expr(expr, None)
2867 }
2868
2869 pub fn json_tuple(&self, keys: &[&str]) -> Column {
2871 let keys_vec: Vec<String> = keys.iter().map(|s| (*s).to_string()).collect();
2872 let struct_fields: Vec<polars::datatypes::Field> = keys_vec
2873 .iter()
2874 .map(|k| polars::datatypes::Field::new(k.as_str().into(), DataType::String))
2875 .collect();
2876 let expr = self.expr().clone().map(
2877 move |s| expect_col(crate::udfs::apply_json_tuple(s, &keys_vec)),
2878 move |_schema, field| {
2879 Ok(Field::new(
2880 field.name().clone(),
2881 DataType::Struct(struct_fields.clone()),
2882 ))
2883 },
2884 );
2885 Self::from_expr(expr, None)
2886 }
2887
2888 pub fn from_csv(&self) -> Column {
2890 let expr = self.expr().clone().map(
2891 |s| expect_col(crate::udfs::apply_from_csv(s)),
2892 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Struct(vec![]))),
2893 );
2894 Self::from_expr(expr, None)
2895 }
2896
2897 pub fn to_csv(&self) -> Column {
2899 let expr = self.expr().clone().map(
2900 |s| expect_col(crate::udfs::apply_to_csv(s)),
2901 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
2902 );
2903 Self::from_expr(expr, None)
2904 }
2905
2906 pub fn parse_url(&self, part: &str, key: Option<&str>) -> Column {
2909 let part = part.to_string();
2910 let key_owned = key.map(String::from);
2911 let expr = self.expr().clone().map(
2912 move |s| expect_col(crate::udfs::apply_parse_url(s, &part, key_owned.as_deref())),
2913 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
2914 );
2915 Self::from_expr(expr, None)
2916 }
2917
2918 pub fn hash(&self) -> Column {
2920 let expr = self.expr().clone().map(
2921 |s| expect_col(crate::udfs::apply_hash_one(s)),
2922 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
2923 );
2924 Self::from_expr(expr, None)
2925 }
2926
2927 pub fn isin(&self, other: &Column) -> Column {
2929 let out = self.expr().clone().is_in(other.expr().clone(), false);
2930 Self::from_expr(out, None)
2931 }
2932
2933 pub fn url_decode(&self) -> Column {
2935 let expr = self.expr().clone().map(
2936 |s| expect_col(crate::udfs::apply_url_decode(s)),
2937 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
2938 );
2939 Self::from_expr(expr, None)
2940 }
2941
2942 pub fn url_encode(&self) -> Column {
2944 let expr = self.expr().clone().map(
2945 |s| expect_col(crate::udfs::apply_url_encode(s)),
2946 |_schema, field| Ok(Field::new(field.name().clone(), DataType::String)),
2947 );
2948 Self::from_expr(expr, None)
2949 }
2950
2951 pub fn shift_left(&self, n: i32) -> Column {
2953 use polars::prelude::*;
2954 let pow = lit(2i64).pow(lit(n as i64));
2955 Self::from_expr(
2956 (self.expr().clone().cast(DataType::Int64) * pow).cast(DataType::Int64),
2957 None,
2958 )
2959 }
2960
2961 pub fn shift_right(&self, n: i32) -> Column {
2963 use polars::prelude::*;
2964 let pow = lit(2i64).pow(lit(n as i64));
2965 Self::from_expr(
2966 (self.expr().clone().cast(DataType::Int64) / pow).cast(DataType::Int64),
2967 None,
2968 )
2969 }
2970
2971 pub fn shift_right_unsigned(&self, n: i32) -> Column {
2973 let expr = self.expr().clone().map(
2974 move |s| expect_col(crate::udfs::apply_shift_right_unsigned(s, n)),
2975 |_schema, field| Ok(Field::new(field.name().clone(), DataType::Int64)),
2976 );
2977 Self::from_expr(expr, None)
2978 }
2979}
2980
2981#[cfg(test)]
2982mod tests {
2983 use super::Column;
2984 use polars::prelude::{IntoLazy, col, df, lit};
2985
2986 fn test_df() -> polars::prelude::DataFrame {
2988 df!(
2989 "a" => &[1, 2, 3, 4, 5],
2990 "b" => &[10, 20, 30, 40, 50]
2991 )
2992 .unwrap()
2993 }
2994
2995 fn test_df_with_nulls() -> polars::prelude::DataFrame {
2997 df!(
2998 "a" => &[Some(1), Some(2), None, Some(4), None],
2999 "b" => &[Some(10), None, Some(30), None, None]
3000 )
3001 .unwrap()
3002 }
3003
3004 #[test]
3005 fn test_column_new() {
3006 let column = Column::new("age".to_string());
3007 assert_eq!(column.name(), "age");
3008 }
3009
3010 #[test]
3011 fn test_column_from_expr() {
3012 let expr = col("test");
3013 let column = Column::from_expr(expr, Some("test".to_string()));
3014 assert_eq!(column.name(), "test");
3015 }
3016
3017 #[test]
3018 fn test_column_from_expr_default_name() {
3019 let expr = col("test").gt(lit(5));
3020 let column = Column::from_expr(expr, None);
3021 assert_eq!(column.name(), "<expr>");
3022 }
3023
3024 #[test]
3025 fn test_column_alias() {
3026 let column = Column::new("original".to_string());
3027 let aliased = column.alias("new_name");
3028 assert_eq!(aliased.name(), "new_name");
3029 }
3030
3031 #[test]
3032 fn test_column_gt() {
3033 let df = test_df();
3034 let column = Column::new("a".to_string());
3035 let result = column.gt(lit(3));
3036
3037 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3039 assert_eq!(filtered.height(), 2); }
3041
3042 #[test]
3043 fn test_column_lt() {
3044 let df = test_df();
3045 let column = Column::new("a".to_string());
3046 let result = column.lt(lit(3));
3047
3048 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3049 assert_eq!(filtered.height(), 2); }
3051
3052 #[test]
3053 fn test_column_eq() {
3054 let df = test_df();
3055 let column = Column::new("a".to_string());
3056 let result = column.eq(lit(3));
3057
3058 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3059 assert_eq!(filtered.height(), 1); }
3061
3062 #[test]
3063 fn test_column_neq() {
3064 let df = test_df();
3065 let column = Column::new("a".to_string());
3066 let result = column.neq(lit(3));
3067
3068 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3069 assert_eq!(filtered.height(), 4); }
3071
3072 #[test]
3073 fn test_column_gt_eq() {
3074 let df = test_df();
3075 let column = Column::new("a".to_string());
3076 let result = column.gt_eq(lit(3));
3077
3078 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3079 assert_eq!(filtered.height(), 3); }
3081
3082 #[test]
3083 fn test_column_lt_eq() {
3084 let df = test_df();
3085 let column = Column::new("a".to_string());
3086 let result = column.lt_eq(lit(3));
3087
3088 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3089 assert_eq!(filtered.height(), 3); }
3091
3092 #[test]
3093 fn test_column_is_null() {
3094 let df = test_df_with_nulls();
3095 let column = Column::new("a".to_string());
3096 let result = column.is_null();
3097
3098 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3099 assert_eq!(filtered.height(), 2); }
3101
3102 #[test]
3103 fn test_column_is_not_null() {
3104 let df = test_df_with_nulls();
3105 let column = Column::new("a".to_string());
3106 let result = column.is_not_null();
3107
3108 let filtered = df.lazy().filter(result.into_expr()).collect().unwrap();
3109 assert_eq!(filtered.height(), 3); }
3111
3112 #[test]
3113 fn test_null_boolean_column_produces_null_bool_series() {
3114 let df = test_df();
3115 let expr = Column::null_boolean().into_expr();
3116 let out = df
3117 .lazy()
3118 .select([expr.alias("null_bool")])
3119 .collect()
3120 .unwrap();
3121 let s = out.column("null_bool").unwrap();
3122 assert_eq!(s.dtype(), &polars::prelude::DataType::Boolean);
3123 assert_eq!(s.null_count(), s.len());
3124 }
3125
3126 #[test]
3127 fn test_eq_null_safe_both_null() {
3128 let df = df!(
3130 "a" => &[Some(1), None, Some(3)],
3131 "b" => &[Some(1), None, Some(4)]
3132 )
3133 .unwrap();
3134
3135 let col_a = Column::new("a".to_string());
3136 let col_b = Column::new("b".to_string());
3137 let result = col_a.eq_null_safe(&col_b);
3138
3139 let result_df = df
3141 .lazy()
3142 .with_column(result.into_expr().alias("eq_null_safe"))
3143 .collect()
3144 .unwrap();
3145
3146 let eq_col = result_df.column("eq_null_safe").unwrap();
3148 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
3149
3150 assert_eq!(values[0], Some(true));
3154 assert_eq!(values[1], Some(true)); assert_eq!(values[2], Some(false));
3156 }
3157
3158 #[test]
3159 fn test_eq_null_safe_one_null() {
3160 let df = df!(
3162 "a" => &[Some(1), None, Some(3)],
3163 "b" => &[Some(1), Some(2), None]
3164 )
3165 .unwrap();
3166
3167 let col_a = Column::new("a".to_string());
3168 let col_b = Column::new("b".to_string());
3169 let result = col_a.eq_null_safe(&col_b);
3170
3171 let result_df = df
3172 .lazy()
3173 .with_column(result.into_expr().alias("eq_null_safe"))
3174 .collect()
3175 .unwrap();
3176
3177 let eq_col = result_df.column("eq_null_safe").unwrap();
3178 let values: Vec<Option<bool>> = eq_col.bool().unwrap().into_iter().collect();
3179
3180 assert_eq!(values[0], Some(true));
3184 assert_eq!(values[1], Some(false));
3185 assert_eq!(values[2], Some(false));
3186 }
3187}