1use std::collections::HashMap;
2
3use polars::prelude::*;
4use polars_ops::prelude::UnpivotDF;
5
6use crate::error::{Error, Result};
7use crate::Value;
8
9#[derive(Debug, Clone, PartialEq)]
11pub enum ColumnDataType {
12 Int32,
14 Int64,
16 Float32,
18 Float64,
20 String,
22 Boolean,
24 Date,
26 DateTime,
28}
29
30impl ColumnDataType {
31 #[allow(clippy::should_implement_trait)]
33 pub fn from_str(s: &str) -> Result<Self> {
34 match s.to_lowercase().as_str() {
35 "int32" | "i32" => Ok(ColumnDataType::Int32),
36 "int64" | "i64" => Ok(ColumnDataType::Int64),
37 "float32" | "f32" => Ok(ColumnDataType::Float32),
38 "float64" | "f64" => Ok(ColumnDataType::Float64),
39 "string" | "str" | "utf8" => Ok(ColumnDataType::String),
40 "bool" | "boolean" => Ok(ColumnDataType::Boolean),
41 "date" => Ok(ColumnDataType::Date),
42 "datetime" => Ok(ColumnDataType::DateTime),
43 _ => Err(Error::operation(format!("Unknown data type: {s}"))),
44 }
45 }
46
47 #[must_use]
49 pub fn to_polars_dtype(&self) -> DataType {
50 match self {
51 ColumnDataType::Int32 => DataType::Int32,
52 ColumnDataType::Int64 => DataType::Int64,
53 ColumnDataType::Float32 => DataType::Float32,
54 ColumnDataType::Float64 => DataType::Float64,
55 ColumnDataType::String => DataType::String,
56 ColumnDataType::Boolean => DataType::Boolean,
57 ColumnDataType::Date => DataType::Date,
58 ColumnDataType::DateTime => {
59 DataType::Datetime(polars::prelude::TimeUnit::Milliseconds, None)
60 }
61 }
62 }
63}
64
65pub struct Transform;
67
68impl Transform {
69 pub fn select(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
71 df.select(columns)
72 .map_err(|e| Error::operation(format!("Failed to select columns: {e}")))
73 }
74
75 pub fn select_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
77 let cols: Vec<Expr> = columns.iter().map(col).collect();
78 Ok(lf.select(&cols))
79 }
80
81 pub fn filter(df: &DataFrame, mask: &Series) -> Result<DataFrame> {
83 if mask.dtype() != &DataType::Boolean {
84 return Err(Error::operation("Filter mask must be boolean".to_string()));
85 }
86
87 let mask = mask
88 .bool()
89 .map_err(|e| Error::operation(format!("Failed to cast mask to boolean: {e}")))?;
90
91 df.filter(mask)
92 .map_err(|e| Error::operation(format!("Failed to filter DataFrame: {e}")))
93 }
94
95 pub fn filter_lazy(lf: LazyFrame, predicate: Expr) -> Result<LazyFrame> {
97 Ok(lf.filter(predicate))
98 }
99
100 pub fn sort(df: &DataFrame, by_columns: &[String], descending: Vec<bool>) -> Result<DataFrame> {
102 df.sort(
103 by_columns,
104 SortMultipleOptions::default().with_order_descending_multi(descending),
105 )
106 .map_err(|e| Error::operation(format!("Failed to sort DataFrame: {e}")))
107 }
108
109 pub fn sort_lazy(
111 lf: LazyFrame,
112 by_columns: &[String],
113 descending: &[bool],
114 ) -> Result<LazyFrame> {
115 let exprs: Vec<Expr> = by_columns.iter().map(col).collect();
116 let options =
117 SortMultipleOptions::default().with_order_descending_multi(descending.to_vec());
118 Ok(lf.sort_by_exprs(&exprs, options))
119 }
120
121 pub fn rename(df: &DataFrame, mapping: &HashMap<String, String>) -> Result<DataFrame> {
123 let mut result = df.clone();
124
125 for (old_name, new_name) in mapping {
126 result
127 .rename(old_name.as_str(), new_name.as_str().into())
128 .map_err(|e| {
129 Error::operation(format!("Failed to rename column '{old_name}': {e}"))
130 })?;
131 }
132
133 Ok(result)
134 }
135
136 pub fn rename_lazy(lf: LazyFrame, mapping: &HashMap<String, String>) -> Result<LazyFrame> {
138 let mut result = lf;
139
140 for (old_name, new_name) in mapping {
141 result = result.rename([old_name.as_str()], [new_name.as_str()], true);
142 }
143
144 Ok(result)
145 }
146
147 pub fn with_column(df: &DataFrame, name: &str, series: Series) -> Result<DataFrame> {
149 let mut result = df.clone();
150 result
151 .with_column(series.with_name(name.into()))
152 .map_err(|e| Error::operation(format!("Failed to add column '{name}': {e}")))?;
153 Ok(result)
154 }
155
156 pub fn with_column_lazy(lf: LazyFrame, expr: Expr) -> Result<LazyFrame> {
158 Ok(lf.with_column(expr))
159 }
160
161 pub fn drop(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
163 let mut result = df.clone();
164 for column in columns {
165 result = result
166 .drop(column)
167 .map_err(|e| Error::operation(format!("Failed to drop column '{column}': {e}")))?;
168 }
169 Ok(result)
170 }
171
172 pub fn drop_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
174 let df = lf
176 .collect()
177 .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
178 let mut result = df;
179 for column in columns {
180 result = result
181 .drop(column)
182 .map_err(|e| Error::operation(format!("Failed to drop column '{column}': {e}")))?;
183 }
184 Ok(result.lazy())
185 }
186
187 pub fn unique(
189 df: &DataFrame,
190 subset: Option<&[String]>,
191 keep: UniqueKeepStrategy,
192 ) -> Result<DataFrame> {
193 let result = df
194 .unique::<String, String>(subset, keep, None)
195 .map_err(|e| Error::operation(format!("Failed to get unique values: {e}")))?;
196 Ok(result)
197 }
198
199 pub fn unique_lazy(
201 lf: LazyFrame,
202 subset: Option<&[String]>,
203 keep: UniqueKeepStrategy,
204 ) -> Result<LazyFrame> {
205 let df = lf
207 .collect()
208 .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
209 let result = df
210 .unique::<String, String>(subset, keep, None)
211 .map_err(|e| Error::operation(format!("Failed to get unique values: {e}")))?;
212 Ok(result.lazy())
213 }
214
215 pub fn limit(df: &DataFrame, n: usize) -> Result<DataFrame> {
217 Ok(df.head(Some(n)))
218 }
219
220 pub fn limit_lazy(lf: LazyFrame, n: u32) -> Result<LazyFrame> {
222 Ok(lf.limit(n))
223 }
224
225 pub fn skip(df: &DataFrame, n: usize) -> Result<DataFrame> {
227 #[allow(clippy::cast_possible_wrap)]
228 {
229 Ok(df.slice(n as i64, df.height().saturating_sub(n)))
230 }
231 }
232
233 pub fn skip_lazy(lf: LazyFrame, n: u32) -> Result<LazyFrame> {
235 Ok(lf.slice(i64::from(n), u32::MAX))
236 }
237
238 pub fn slice(df: &DataFrame, offset: i64, length: usize) -> Result<DataFrame> {
240 Ok(df.slice(offset, length))
241 }
242
243 pub fn slice_lazy(lf: LazyFrame, offset: i64, length: u32) -> Result<LazyFrame> {
245 Ok(lf.slice(offset, length))
246 }
247
248 pub fn reverse(df: &DataFrame) -> Result<DataFrame> {
250 #[allow(clippy::cast_possible_truncation)]
251 let indices: Vec<IdxSize> = (0..df.height() as IdxSize).rev().collect();
252 let ca = IdxCa::from_vec("".into(), indices);
253
254 df.take(&ca)
255 .map_err(|e| Error::operation(format!("Failed to reverse DataFrame: {e}")))
256 }
257
258 pub fn reverse_lazy(lf: LazyFrame) -> Result<LazyFrame> {
260 Ok(lf.reverse())
261 }
262
263 pub fn sample(
265 df: &DataFrame,
266 n: usize,
267 with_replacement: bool,
268 seed: Option<u64>,
269 ) -> Result<DataFrame> {
270 #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
271 let n_values = vec![n as u32];
272 let n_series = Series::new("n".into(), n_values);
273 df.sample_n(&n_series, with_replacement, true, seed)
274 .map_err(|e| Error::operation(format!("Failed to sample DataFrame: {e}")))
275 }
276
277 pub fn fill_null(df: &DataFrame, value: FillNullStrategy) -> Result<DataFrame> {
279 let columns = df
280 .get_columns()
281 .iter()
282 .map(|s| {
283 s.fill_null(value)
284 .map_err(|e| Error::operation(format!("Failed to fill null values: {e}")))
285 })
286 .collect::<Result<Vec<_>>>()?;
287
288 let cols: Vec<_> = columns.into_iter().collect();
289 DataFrame::new(cols).map_err(|e| {
290 Error::operation(format!("Failed to create DataFrame after fill_null: {e}"))
291 })
292 }
293
294 #[allow(clippy::needless_pass_by_value)]
296 pub fn fill_null_lazy(mut lf: LazyFrame, value: Expr) -> Result<LazyFrame> {
297 let schema = lf
298 .collect_schema()
299 .map_err(|e| Error::operation(format!("Failed to collect schema: {e}")))?;
300 let columns = schema
301 .iter()
302 .map(|(name, _)| col(name.as_str()).fill_null(value.clone()))
303 .collect::<Vec<_>>();
304
305 Ok(lf.with_columns(&columns))
306 }
307
308 pub fn drop_nulls(df: &DataFrame, subset: Option<&[String]>) -> Result<DataFrame> {
310 df.drop_nulls(subset)
311 .map_err(|e| Error::operation(format!("Failed to drop null values: {e}")))
312 }
313
314 pub fn drop_nulls_lazy(lf: LazyFrame, _subset: Option<Vec<Expr>>) -> Result<LazyFrame> {
316 Ok(lf.drop_nulls(None))
318 }
319
320 pub fn cast(df: &DataFrame, column: &str, dtype: &DataType) -> Result<DataFrame> {
322 let mut result = df.clone();
323 let series = result
324 .column(column)
325 .map_err(|e| Error::operation(format!("Column '{column}' not found: {e}")))?
326 .cast(dtype)
327 .map_err(|e| Error::operation(format!("Failed to cast column '{column}': {e}")))?;
328
329 result
330 .with_column(series)
331 .map_err(|e| Error::operation(format!("Failed to update column: {e}")))?;
332
333 Ok(result)
334 }
335
336 pub fn cast_lazy(lf: LazyFrame, column: &str, dtype: DataType) -> Result<LazyFrame> {
338 Ok(lf.with_column(col(column).cast(dtype)))
339 }
340
341 pub fn explode(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
343 df.explode(columns)
344 .map_err(|e| Error::operation(format!("Failed to explode columns: {e}")))
345 }
346
347 pub fn explode_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
349 let df = lf
351 .collect()
352 .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
353 let result = df
354 .explode(columns)
355 .map_err(|e| Error::operation(format!("Failed to explode columns: {e}")))?;
356 Ok(result.lazy())
357 }
358
359 pub fn melt(
361 df: &DataFrame,
362 id_vars: &[String],
363 value_vars: &[String],
364 _variable_name: Option<&str>,
365 _value_name: Option<&str>,
366 ) -> Result<DataFrame> {
367 if id_vars.is_empty() {
368 df.unpivot([] as [&str; 0], value_vars)
369 .map_err(|e| Error::operation(format!("Failed to melt DataFrame: {e}")))
370 } else {
371 df.unpivot(id_vars, value_vars)
372 .map_err(|e| Error::operation(format!("Failed to melt DataFrame: {e}")))
373 }
374 }
375
376 pub fn melt_lazy(
378 lf: LazyFrame,
379 id_vars: &[String],
380 value_vars: &[String],
381 _variable_name: Option<&str>,
382 _value_name: Option<&str>,
383 ) -> Result<LazyFrame> {
384 let df = lf
386 .collect()
387 .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
388 let result = if id_vars.is_empty() {
389 df.unpivot([] as [&str; 0], value_vars)
390 .map_err(|e| Error::operation(format!("Failed to melt LazyFrame: {e}")))?
391 } else {
392 df.unpivot(id_vars, value_vars)
393 .map_err(|e| Error::operation(format!("Failed to melt LazyFrame: {e}")))?
394 };
395 Ok(result.lazy())
396 }
397
398 pub fn pivot(
400 df: &DataFrame,
401 values: &[String],
402 index: &[String],
403 columns: &[String],
404 aggregate_fn: Option<&str>,
405 ) -> Result<DataFrame> {
406 let values_expr: Vec<Expr> = values.iter().map(col).collect();
407 let index_expr: Vec<Expr> = index.iter().map(col).collect();
408 let _columns_expr = col(columns[0].as_str()); let agg_expr = match aggregate_fn {
411 Some("sum") => values_expr[0].clone().sum(),
412 Some("mean") => values_expr[0].clone().mean(),
413 Some("count") => values_expr[0].clone().count(),
414 Some("min") => values_expr[0].clone().min(),
415 Some("max") => values_expr[0].clone().max(),
416 _ => values_expr[0].clone().first(), };
418
419 df.clone()
421 .lazy()
422 .group_by(index_expr)
423 .agg([agg_expr])
424 .collect()
425 .map_err(|e| Error::operation(format!("Failed to pivot DataFrame: {e}")))
426 }
427
428 pub fn map_rows<F, T>(_df: &DataFrame, _f: F) -> Result<DataFrame>
430 where
431 F: Fn(usize) -> Result<T>,
432 T: Into<Series>,
433 {
434 Err(Error::operation("Row-wise map operations are not supported. Use vectorized operations or process data differently.".to_string()))
437 }
438
439 #[allow(clippy::needless_pass_by_value)]
441 pub fn map_columns(df: &DataFrame, expr: Expr) -> Result<DataFrame> {
442 let columns = df
443 .get_columns()
444 .iter()
445 .map(|s| {
446 let lazy_df = DataFrame::new(vec![s.clone()])
447 .map_err(|e| {
448 Error::operation(format!("Failed to create temporary DataFrame: {e}"))
449 })?
450 .lazy();
451
452 let result = lazy_df
453 .select(&[expr.clone().alias(s.name().as_str())])
454 .collect()
455 .map_err(|e| Error::operation(format!("Failed to apply expression: {e}")))?;
456
457 result
458 .column(s.name())
459 .map_err(|e| Error::operation(format!("Failed to get result column: {e}")))
460 .cloned()
461 })
462 .collect::<Result<Vec<_>>>()?;
463
464 let cols: Vec<_> = columns.into_iter().collect();
465 DataFrame::new(cols)
466 .map_err(|e| Error::operation(format!("Failed to create result DataFrame: {e}")))
467 }
468
469 pub fn transpose(df: &DataFrame, keep_names_as: Option<&str>) -> Result<DataFrame> {
471 let mut df_mut = df.clone();
472 let transposed = df_mut
473 .transpose(keep_names_as, None)
474 .map_err(|e| Error::operation(format!("Failed to transpose: {e}")))?;
475 Ok(transposed)
476 }
477}
478
479#[allow(clippy::needless_pass_by_value)]
481pub fn cast_column(value: &Value, column: &str, target_type: ColumnDataType) -> Result<Value> {
482 match value {
483 Value::DataFrame(df) => {
484 let dtype = target_type.to_polars_dtype();
485 let mut result = df.clone();
486 let series = result
487 .column(column)
488 .map_err(|e| Error::operation(format!("Column '{column}' not found: {e}")))?
489 .cast(&dtype)
490 .map_err(|e| Error::operation(format!("Failed to cast column '{column}': {e}")))?;
491
492 result
493 .with_column(series)
494 .map_err(|e| Error::operation(format!("Failed to update column: {e}")))?;
495
496 Ok(Value::DataFrame(result))
497 }
498 _ => Err(Error::operation(
499 "cast_column can only be applied to DataFrames".to_string(),
500 )),
501 }
502}
503
504#[cfg(test)]
505mod tests {
506 use polars::prelude::{
507 col, lit, DataFrame, DataType, FillNullStrategy, Series, UniqueKeepStrategy,
508 };
509
510 use super::*;
511
512 #[test]
513 fn test_select() {
514 let df = DataFrame::new(vec![
515 Series::new(PlSmallStr::from("a"), &[1i32, 2, 3]).into(),
516 Series::new(PlSmallStr::from("b"), &[4i32, 5, 6]).into(),
517 Series::new(PlSmallStr::from("c"), &[7i32, 8, 9]).into(),
518 ])
519 .unwrap();
520
521 let result = Transform::select(&df, &["a".to_string(), "c".to_string()]).unwrap();
522 assert_eq!(result.width(), 2);
523 assert!(result.column("a").is_ok());
524 assert!(result.column("c").is_ok());
525 assert!(result.column("b").is_err());
526 }
527
528 #[test]
529 fn test_filter() {
530 let df = DataFrame::new(vec![
531 Series::new(PlSmallStr::from("a"), &[1i32, 2, 3, 4, 5]).into(),
532 Series::new(PlSmallStr::from("b"), &[10i32, 20, 30, 40, 50]).into(),
533 ])
534 .unwrap();
535
536 let mask = Series::new(PlSmallStr::from("mask"), &[true, false, true, false, true]);
537 let result = Transform::filter(&df, &mask).unwrap();
538
539 assert_eq!(result.height(), 3);
540 assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(1));
541 assert_eq!(result.column("a").unwrap().i32().unwrap().get(1), Some(3));
542 assert_eq!(result.column("a").unwrap().i32().unwrap().get(2), Some(5));
543 }
544
545 #[test]
546 fn test_sort() {
547 let df = DataFrame::new(vec![
548 Series::new(PlSmallStr::from("a"), &[3, 1, 4, 1, 5]).into(),
549 Series::new(PlSmallStr::from("b"), &[30, 10, 40, 15, 50]).into(),
550 ])
551 .unwrap();
552
553 let result = Transform::sort(&df, &["a".to_string()], vec![false]).unwrap();
554
555 let col_a = result.column("a").unwrap().i32().unwrap();
556 assert_eq!(col_a.get(0), Some(1));
557 assert_eq!(col_a.get(1), Some(1));
558 assert_eq!(col_a.get(2), Some(3));
559 assert_eq!(col_a.get(3), Some(4));
560 assert_eq!(col_a.get(4), Some(5));
561 }
562
563 #[test]
564 fn test_rename() {
565 let df = DataFrame::new(vec![Series::new(
566 <&str as Into<String>>::into("old_name").into(),
567 &[1i32, 2, 3],
568 )
569 .into()])
570 .unwrap();
571
572 let mut mapping = HashMap::new();
573 mapping.insert("old_name".to_string(), "new_name".to_string());
574
575 let result = Transform::rename(&df, &mapping).unwrap();
576 assert!(result.column("new_name").is_ok());
577 assert!(result.column("old_name").is_err());
578 }
579
580 #[test]
581 fn test_unique() {
582 let df = DataFrame::new(vec![
583 Series::new(PlSmallStr::from("a"), &[1, 2, 2, 3, 3, 3]).into(),
584 Series::new(PlSmallStr::from("b"), &[10, 20, 20, 30, 30, 30]).into(),
585 ])
586 .unwrap();
587
588 let result = Transform::unique(&df, None, UniqueKeepStrategy::First).unwrap();
589 assert_eq!(result.height(), 3);
590 }
591
592 #[test]
593 fn test_limit_and_skip() {
594 let df = DataFrame::new(vec![
595 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
596 ])
597 .unwrap();
598
599 let limited = Transform::limit(&df, 3).unwrap();
600 assert_eq!(limited.height(), 3);
601
602 let skipped = Transform::skip(&df, 2).unwrap();
603 assert_eq!(skipped.height(), 3);
604 assert_eq!(skipped.column("a").unwrap().i32().unwrap().get(0), Some(3));
605 }
606
607 #[test]
608 fn test_drop_nulls() {
609 let df = DataFrame::new(vec![
610 Series::new(
611 PlSmallStr::from("a"),
612 &[Some(1), None, Some(3), None, Some(5)],
613 )
614 .into(),
615 Series::new(
616 PlSmallStr::from("b"),
617 &[Some(10), Some(20), None, Some(40), Some(50)],
618 )
619 .into(),
620 ])
621 .unwrap();
622
623 let result = Transform::drop_nulls(&df, None).unwrap();
624 assert_eq!(result.height(), 2); }
626
627 #[test]
628 fn test_column_datatype_from_str() {
629 assert_eq!(
630 ColumnDataType::from_str("int32").unwrap(),
631 ColumnDataType::Int32
632 );
633 assert_eq!(
634 ColumnDataType::from_str("i32").unwrap(),
635 ColumnDataType::Int32
636 );
637 assert_eq!(
638 ColumnDataType::from_str("int64").unwrap(),
639 ColumnDataType::Int64
640 );
641 assert_eq!(
642 ColumnDataType::from_str("i64").unwrap(),
643 ColumnDataType::Int64
644 );
645 assert_eq!(
646 ColumnDataType::from_str("float32").unwrap(),
647 ColumnDataType::Float32
648 );
649 assert_eq!(
650 ColumnDataType::from_str("f32").unwrap(),
651 ColumnDataType::Float32
652 );
653 assert_eq!(
654 ColumnDataType::from_str("float64").unwrap(),
655 ColumnDataType::Float64
656 );
657 assert_eq!(
658 ColumnDataType::from_str("f64").unwrap(),
659 ColumnDataType::Float64
660 );
661 assert_eq!(
662 ColumnDataType::from_str("string").unwrap(),
663 ColumnDataType::String
664 );
665 assert_eq!(
666 ColumnDataType::from_str("str").unwrap(),
667 ColumnDataType::String
668 );
669 assert_eq!(
670 ColumnDataType::from_str("utf8").unwrap(),
671 ColumnDataType::String
672 );
673 assert_eq!(
674 ColumnDataType::from_str("bool").unwrap(),
675 ColumnDataType::Boolean
676 );
677 assert_eq!(
678 ColumnDataType::from_str("boolean").unwrap(),
679 ColumnDataType::Boolean
680 );
681 assert_eq!(
682 ColumnDataType::from_str("date").unwrap(),
683 ColumnDataType::Date
684 );
685 assert_eq!(
686 ColumnDataType::from_str("datetime").unwrap(),
687 ColumnDataType::DateTime
688 );
689
690 assert_eq!(
692 ColumnDataType::from_str("INT32").unwrap(),
693 ColumnDataType::Int32
694 );
695 assert_eq!(
696 ColumnDataType::from_str("Float64").unwrap(),
697 ColumnDataType::Float64
698 );
699
700 assert!(ColumnDataType::from_str("invalid").is_err());
702 assert!(ColumnDataType::from_str("").is_err());
703 }
704
705 #[test]
706 fn test_column_datatype_to_polars_dtype() {
707 assert_eq!(ColumnDataType::Int32.to_polars_dtype(), DataType::Int32);
708 assert_eq!(ColumnDataType::Int64.to_polars_dtype(), DataType::Int64);
709 assert_eq!(ColumnDataType::Float32.to_polars_dtype(), DataType::Float32);
710 assert_eq!(ColumnDataType::Float64.to_polars_dtype(), DataType::Float64);
711 assert_eq!(ColumnDataType::String.to_polars_dtype(), DataType::String);
712 assert_eq!(ColumnDataType::Boolean.to_polars_dtype(), DataType::Boolean);
713 assert_eq!(ColumnDataType::Date.to_polars_dtype(), DataType::Date);
714 assert_eq!(
715 ColumnDataType::DateTime.to_polars_dtype(),
716 DataType::Datetime(polars::prelude::TimeUnit::Milliseconds, None)
717 );
718 }
719
720 #[test]
721 fn test_select_lazy() {
722 let df = DataFrame::new(vec![
723 Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
724 Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
725 Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
726 ])
727 .unwrap();
728 let lf = df.lazy();
729
730 let result = Transform::select_lazy(lf, &["a".to_string(), "c".to_string()]).unwrap();
731 let collected = result.collect().unwrap();
732 assert_eq!(collected.width(), 2);
733 assert!(collected.column("a").is_ok());
734 assert!(collected.column("c").is_ok());
735 assert!(collected.column("b").is_err());
736 }
737
738 #[test]
739 fn test_filter_lazy() {
740 let df = DataFrame::new(vec![
741 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
742 Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
743 ])
744 .unwrap();
745 let lf = df.lazy();
746
747 let predicate = col("a").gt(lit(3));
748 let result = Transform::filter_lazy(lf, predicate).unwrap();
749 let collected = result.collect().unwrap();
750
751 assert_eq!(collected.height(), 2);
752 let col_a = collected.column("a").unwrap().i32().unwrap();
753 assert_eq!(col_a.get(0), Some(4));
754 assert_eq!(col_a.get(1), Some(5));
755 }
756
757 #[test]
758 fn test_sort_lazy() {
759 let df = DataFrame::new(vec![
760 Series::new(PlSmallStr::from("a"), &[3i32, 1, 4, 1, 5]).into(),
761 Series::new(PlSmallStr::from("b"), &[30i32, 10, 40, 15, 50]).into(),
762 ])
763 .unwrap();
764 let lf = df.lazy();
765
766 let result = Transform::sort_lazy(lf, &["a".to_string()], &[false]).unwrap();
767 let collected = result.collect().unwrap();
768
769 let col_a = collected.column("a").unwrap().i32().unwrap();
770 assert_eq!(col_a.get(0), Some(1));
771 assert_eq!(col_a.get(1), Some(1));
772 assert_eq!(col_a.get(2), Some(3));
773 assert_eq!(col_a.get(3), Some(4));
774 assert_eq!(col_a.get(4), Some(5));
775 }
776
777 #[test]
778 fn test_rename_lazy() {
779 let df = DataFrame::new(vec![
780 Series::new(PlSmallStr::from("old_name"), &[1, 2, 3]).into()
781 ])
782 .unwrap();
783 let lf = df.lazy();
784
785 let mut mapping = HashMap::new();
786 mapping.insert("old_name".to_string(), "new_name".to_string());
787
788 let result = Transform::rename_lazy(lf, &mapping).unwrap();
789 let collected = result.collect().unwrap();
790 assert!(collected.column("new_name").is_ok());
791 assert!(collected.column("old_name").is_err());
792 }
793
794 #[test]
795 fn test_with_column() {
796 let df = DataFrame::new(vec![
797 Series::new("a".into(), &[1, 2, 3]).into(),
798 Series::new("b".into(), &[4, 5, 6]).into(),
799 ])
800 .unwrap();
801
802 let new_series = Series::new("c".into(), &[7, 8, 9]);
803 let result = Transform::with_column(&df, "c", new_series).unwrap();
804
805 assert_eq!(result.width(), 3);
806 assert!(result.column("a").is_ok());
807 assert!(result.column("b").is_ok());
808 assert!(result.column("c").is_ok());
809 assert_eq!(result.column("c").unwrap().i32().unwrap().get(0), Some(7));
810 }
811
812 #[test]
813 fn test_with_column_lazy() {
814 let df = DataFrame::new(vec![
815 Series::new("a".into(), &[1, 2, 3]).into(),
816 Series::new("b".into(), &[4, 5, 6]).into(),
817 ])
818 .unwrap();
819 let lf = df.lazy();
820
821 let expr = lit(10).alias("c");
822 let result = Transform::with_column_lazy(lf, expr).unwrap();
823 let collected = result.collect().unwrap();
824
825 assert_eq!(collected.width(), 3);
826 assert!(collected.column("c").is_ok());
827 assert_eq!(
828 collected.column("c").unwrap().i32().unwrap().get(0),
829 Some(10)
830 );
831 }
832
833 #[test]
834 fn test_drop() {
835 let df = DataFrame::new(vec![
836 Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
837 Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
838 Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
839 ])
840 .unwrap();
841
842 let result = Transform::drop(&df, &["b".to_string()]).unwrap();
843 assert_eq!(result.width(), 2);
844 assert!(result.column("a").is_ok());
845 assert!(result.column("b").is_err());
846 assert!(result.column("c").is_ok());
847 }
848
849 #[test]
850 fn test_drop_lazy() {
851 let df = DataFrame::new(vec![
852 Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
853 Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
854 Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
855 ])
856 .unwrap();
857 let lf = df.lazy();
858
859 let result = Transform::drop_lazy(lf, &["b".to_string()]).unwrap();
860 let collected = result.collect().unwrap();
861 assert_eq!(collected.width(), 2);
862 assert!(collected.column("a").is_ok());
863 assert!(collected.column("b").is_err());
864 assert!(collected.column("c").is_ok());
865 }
866
867 #[test]
868 fn test_unique_lazy() {
869 let df = DataFrame::new(vec![
870 Series::new(PlSmallStr::from("a"), &[1, 2, 2, 3, 3, 3]).into(),
871 Series::new(PlSmallStr::from("b"), &[10, 20, 20, 30, 30, 30]).into(),
872 ])
873 .unwrap();
874 let lf = df.lazy();
875
876 let result = Transform::unique_lazy(lf, None, UniqueKeepStrategy::First).unwrap();
877 let collected = result.collect().unwrap();
878 assert_eq!(collected.height(), 3);
879 }
880
881 #[test]
882 fn test_limit_lazy() {
883 let df = DataFrame::new(vec![
884 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
885 ])
886 .unwrap();
887 let lf = df.lazy();
888
889 let result = Transform::limit_lazy(lf, 3).unwrap();
890 let collected = result.collect().unwrap();
891 assert_eq!(collected.height(), 3);
892 }
893
894 #[test]
895 fn test_skip_lazy() {
896 let df = DataFrame::new(vec![
897 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
898 ])
899 .unwrap();
900 let lf = df.lazy();
901
902 let result = Transform::skip_lazy(lf, 2).unwrap();
903 let collected = result.collect().unwrap();
904 assert_eq!(collected.height(), 3);
905 assert_eq!(
906 collected.column("a").unwrap().i32().unwrap().get(0),
907 Some(3)
908 );
909 }
910
911 #[test]
912 fn test_slice() {
913 let df = DataFrame::new(vec![
914 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
915 ])
916 .unwrap();
917
918 let result = Transform::slice(&df, 1, 3).unwrap();
919 assert_eq!(result.height(), 3);
920 assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(2));
921 assert_eq!(result.column("a").unwrap().i32().unwrap().get(1), Some(3));
922 assert_eq!(result.column("a").unwrap().i32().unwrap().get(2), Some(4));
923 }
924
925 #[test]
926 fn test_slice_lazy() {
927 let df = DataFrame::new(vec![
928 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
929 ])
930 .unwrap();
931 let lf = df.lazy();
932
933 let result = Transform::slice_lazy(lf, 1, 3).unwrap();
934 let collected = result.collect().unwrap();
935 assert_eq!(collected.height(), 3);
936 assert_eq!(
937 collected.column("a").unwrap().i32().unwrap().get(0),
938 Some(2)
939 );
940 }
941
942 #[test]
943 fn test_reverse() {
944 let df = DataFrame::new(vec![
945 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
946 Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
947 ])
948 .unwrap();
949
950 let result = Transform::reverse(&df).unwrap();
951 assert_eq!(result.height(), 5);
952 assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(5));
953 assert_eq!(result.column("a").unwrap().i32().unwrap().get(4), Some(1));
954 assert_eq!(result.column("b").unwrap().i32().unwrap().get(0), Some(50));
955 assert_eq!(result.column("b").unwrap().i32().unwrap().get(4), Some(10));
956 }
957
958 #[test]
959 fn test_reverse_lazy() {
960 let df = DataFrame::new(vec![
961 Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
962 Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
963 ])
964 .unwrap();
965 let lf = df.lazy();
966
967 let result = Transform::reverse_lazy(lf).unwrap();
968 let collected = result.collect().unwrap();
969 assert_eq!(collected.height(), 5);
970 assert_eq!(
971 collected.column("a").unwrap().i32().unwrap().get(0),
972 Some(5)
973 );
974 assert_eq!(
975 collected.column("a").unwrap().i32().unwrap().get(4),
976 Some(1)
977 );
978 }
979
980 #[test]
981 fn test_sample() {
982 let df = DataFrame::new(vec![Column::new(
983 "a".into(),
984 &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
985 )])
986 .unwrap();
987
988 let result = Transform::sample(&df, 3, false, Some(42)).unwrap();
989 assert_eq!(result.height(), 3);
990 assert!(result.column("a").is_ok());
991 }
992
993 #[test]
994 fn test_fill_null() {
995 let df = DataFrame::new(vec![
996 Series::new("a".into(), &[Some(1), None, Some(3)]).into(),
997 Series::new("b".into(), &[Some(1.0), Some(2.0), None]).into(),
998 ])
999 .unwrap();
1000
1001 let result = Transform::fill_null(&df, FillNullStrategy::Forward(None)).unwrap();
1002 assert_eq!(result.height(), 3);
1003 let col_a = result.column("a").unwrap();
1005 assert!(col_a.null_count() == 0);
1006 }
1007
1008 #[test]
1009 fn test_fill_null_lazy() {
1010 let df = DataFrame::new(vec![
1011 Series::new(PlSmallStr::from("old_name"), &[1, 2, 3]).into()
1012 ])
1013 .unwrap();
1014 let lf = df.lazy();
1015
1016 let value = lit(0);
1017 let result = Transform::fill_null_lazy(lf, value).unwrap();
1018 let collected = result.collect().unwrap();
1019 assert_eq!(collected.height(), 3);
1020 let col_a = collected.column("old_name").unwrap();
1021 assert!(col_a.null_count() == 0);
1022 }
1023
1024 #[test]
1025 fn test_drop_nulls_lazy() {
1026 let df = DataFrame::new(vec![
1027 Series::new(
1028 PlSmallStr::from("a"),
1029 &[Some(1), None, Some(3), None, Some(5)],
1030 )
1031 .into(),
1032 Series::new(
1033 PlSmallStr::from("b"),
1034 &[Some(10), Some(20), None, Some(40), Some(50)],
1035 )
1036 .into(),
1037 ])
1038 .unwrap();
1039 let lf = df.lazy();
1040
1041 let result = Transform::drop_nulls_lazy(lf, None).unwrap();
1042 let collected = result.collect().unwrap();
1043 assert_eq!(collected.height(), 2); }
1045
1046 #[test]
1047 fn test_cast() {
1048 let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1049
1050 let result = Transform::cast(&df, "a", &DataType::Int32).unwrap();
1051 assert_eq!(result.height(), 3);
1052 let col_a = result.column("a").unwrap();
1053 assert_eq!(col_a.dtype(), &DataType::Int32);
1054 assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1055 }
1056
1057 #[test]
1058 fn test_cast_lazy() {
1059 let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1060 let lf = df.lazy();
1061
1062 let result = Transform::cast_lazy(lf, "a", DataType::Int32).unwrap();
1063 let collected = result.collect().unwrap();
1064 assert_eq!(collected.height(), 3);
1065 let col_a = collected.column("a").unwrap();
1066 assert_eq!(col_a.dtype(), &DataType::Int32);
1067 assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1068 }
1069
1070 #[test]
1071 #[ignore = "explode operation not supported for binary dtype in this Polars version"]
1072 fn test_explode() {
1073 let values = vec![vec![1, 2], vec![3], vec![4, 5, 6]];
1074 let list_series = Series::new("list_col".into(), values);
1075 let df = DataFrame::new(vec![
1076 list_series.into(),
1077 Series::new("other".into(), &[10, 20, 30]).into(),
1078 ])
1079 .unwrap();
1080
1081 let result = Transform::explode(&df, &["list_col".to_string()]).unwrap();
1082 assert_eq!(result.height(), 6); }
1084
1085 #[test]
1086 #[ignore = "explode operation not supported for binary dtype in this Polars version"]
1087 fn test_explode_lazy() {
1088 let values = vec![vec![1, 2], vec![3], vec![4, 5, 6]];
1089 let list_series = Series::new("list_col".into(), values);
1090 let df = DataFrame::new(vec![
1091 list_series.into(),
1092 Series::new("other".into(), &[10, 20, 30]).into(),
1093 ])
1094 .unwrap();
1095 let lf = df.lazy();
1096
1097 let result = Transform::explode_lazy(lf, &["list_col".to_string()]).unwrap();
1098 let collected = result.collect().unwrap();
1099 assert_eq!(collected.height(), 6); }
1101
1102 #[test]
1103 fn test_melt() {
1104 let df = DataFrame::new(vec![
1105 Series::new("id".into(), &[1, 2, 3]).into(),
1106 Series::new("a".into(), &[10, 20, 30]).into(),
1107 Series::new("b".into(), &[100, 200, 300]).into(),
1108 ])
1109 .unwrap();
1110
1111 let result = Transform::melt(
1112 &df,
1113 &["id".to_string()],
1114 &["a".to_string(), "b".to_string()],
1115 Some("variable"),
1116 Some("value"),
1117 )
1118 .unwrap();
1119
1120 assert_eq!(result.height(), 3); assert!(result.column("variable").is_ok());
1122 assert!(result.column("value").is_ok());
1123 }
1124
1125 #[test]
1126 fn test_melt_lazy() {
1127 let df = DataFrame::new(vec![
1128 Series::new("id".into(), &[1, 2, 3]).into(),
1129 Series::new("a".into(), &[10, 20, 30]).into(),
1130 Series::new("b".into(), &[100, 200, 300]).into(),
1131 ])
1132 .unwrap();
1133 let lf = df.lazy();
1134
1135 let result = Transform::melt_lazy(
1136 lf,
1137 &["id".to_string()],
1138 &["a".to_string(), "b".to_string()],
1139 Some("variable"),
1140 Some("value"),
1141 )
1142 .unwrap();
1143 let collected = result.collect().unwrap();
1144
1145 assert_eq!(collected.height(), 3); assert!(collected.column("variable").is_ok());
1147 assert!(collected.column("value").is_ok());
1148 }
1149
1150 #[test]
1151 fn test_map_columns() {
1152 let df = DataFrame::new(vec![
1153 Series::new("a".into(), &[1, 2, 3]).into(),
1154 Series::new("b".into(), &[4, 5, 6]).into(),
1155 ])
1156 .unwrap();
1157
1158 let expr = col("*") + lit(10);
1160 let result = Transform::map_columns(&df, expr).unwrap();
1161
1162 assert_eq!(result.height(), 3);
1163 assert_eq!(result.width(), 2);
1164 let col_a = result.column("a").unwrap().i32().unwrap();
1166 assert_eq!(col_a.get(0), Some(11));
1167 assert_eq!(col_a.get(1), Some(12));
1168 assert_eq!(col_a.get(2), Some(13));
1169 }
1170
1171 #[test]
1172 fn test_cast_column() {
1173 let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1174 let value = Value::DataFrame(df);
1175
1176 let result = cast_column(&value, "a", ColumnDataType::Int32).unwrap();
1177 match result {
1178 Value::DataFrame(result_df) => {
1179 let col_a = result_df.column("a").unwrap();
1180 assert_eq!(col_a.dtype(), &DataType::Int32);
1181 assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1182 }
1183 _ => panic!("Expected DataFrame"),
1184 }
1185 }
1186
1187 #[test]
1188 fn test_cast_column_invalid_type() {
1189 let value = Value::Int(42);
1190 let result = cast_column(&value, "a", ColumnDataType::Int32);
1191 assert!(result.is_err());
1192 }
1193}