dsq_core/ops/
transform.rs

1use std::collections::HashMap;
2
3use polars::prelude::*;
4use polars_ops::prelude::UnpivotDF;
5
6use crate::error::{Error, Result};
7use crate::Value;
8
9/// Data type for columns in `DataFrames`
10#[derive(Debug, Clone, PartialEq)]
11pub enum ColumnDataType {
12    /// 32-bit signed integer
13    Int32,
14    /// 64-bit signed integer
15    Int64,
16    /// 32-bit floating point
17    Float32,
18    /// 64-bit floating point
19    Float64,
20    /// UTF-8 string
21    String,
22    /// Boolean value
23    Boolean,
24    /// Date (without time)
25    Date,
26    /// Date and time
27    DateTime,
28}
29
30impl ColumnDataType {
31    /// Create a `ColumnDataType` from a string representation
32    #[allow(clippy::should_implement_trait)]
33    pub fn from_str(s: &str) -> Result<Self> {
34        match s.to_lowercase().as_str() {
35            "int32" | "i32" => Ok(ColumnDataType::Int32),
36            "int64" | "i64" => Ok(ColumnDataType::Int64),
37            "float32" | "f32" => Ok(ColumnDataType::Float32),
38            "float64" | "f64" => Ok(ColumnDataType::Float64),
39            "string" | "str" | "utf8" => Ok(ColumnDataType::String),
40            "bool" | "boolean" => Ok(ColumnDataType::Boolean),
41            "date" => Ok(ColumnDataType::Date),
42            "datetime" => Ok(ColumnDataType::DateTime),
43            _ => Err(Error::operation(format!("Unknown data type: {s}"))),
44        }
45    }
46
47    /// Convert to Polars `DataType`
48    #[must_use]
49    pub fn to_polars_dtype(&self) -> DataType {
50        match self {
51            ColumnDataType::Int32 => DataType::Int32,
52            ColumnDataType::Int64 => DataType::Int64,
53            ColumnDataType::Float32 => DataType::Float32,
54            ColumnDataType::Float64 => DataType::Float64,
55            ColumnDataType::String => DataType::String,
56            ColumnDataType::Boolean => DataType::Boolean,
57            ColumnDataType::Date => DataType::Date,
58            ColumnDataType::DateTime => {
59                DataType::Datetime(polars::prelude::TimeUnit::Milliseconds, None)
60            }
61        }
62    }
63}
64
65/// Transform operations for `DataFrames`
66pub struct Transform;
67
68impl Transform {
69    /// Select specific columns from a `DataFrame`
70    pub fn select(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
71        df.select(columns)
72            .map_err(|e| Error::operation(format!("Failed to select columns: {e}")))
73    }
74
75    /// Select specific columns from a `LazyFrame`
76    pub fn select_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
77        let cols: Vec<Expr> = columns.iter().map(col).collect();
78        Ok(lf.select(&cols))
79    }
80
81    /// Filter `DataFrame` based on a condition
82    pub fn filter(df: &DataFrame, mask: &Series) -> Result<DataFrame> {
83        if mask.dtype() != &DataType::Boolean {
84            return Err(Error::operation("Filter mask must be boolean".to_string()));
85        }
86
87        let mask = mask
88            .bool()
89            .map_err(|e| Error::operation(format!("Failed to cast mask to boolean: {e}")))?;
90
91        df.filter(mask)
92            .map_err(|e| Error::operation(format!("Failed to filter DataFrame: {e}")))
93    }
94
95    /// Filter `LazyFrame` based on an expression
96    pub fn filter_lazy(lf: LazyFrame, predicate: Expr) -> Result<LazyFrame> {
97        Ok(lf.filter(predicate))
98    }
99
100    /// Sort `DataFrame` by columns
101    pub fn sort(df: &DataFrame, by_columns: &[String], descending: Vec<bool>) -> Result<DataFrame> {
102        df.sort(
103            by_columns,
104            SortMultipleOptions::default().with_order_descending_multi(descending),
105        )
106        .map_err(|e| Error::operation(format!("Failed to sort DataFrame: {e}")))
107    }
108
109    /// Sort `LazyFrame` by columns
110    pub fn sort_lazy(
111        lf: LazyFrame,
112        by_columns: &[String],
113        descending: &[bool],
114    ) -> Result<LazyFrame> {
115        let exprs: Vec<Expr> = by_columns.iter().map(col).collect();
116        let options =
117            SortMultipleOptions::default().with_order_descending_multi(descending.to_vec());
118        Ok(lf.sort_by_exprs(&exprs, options))
119    }
120
121    /// Rename columns in a `DataFrame`
122    pub fn rename(df: &DataFrame, mapping: &HashMap<String, String>) -> Result<DataFrame> {
123        let mut result = df.clone();
124
125        for (old_name, new_name) in mapping {
126            result
127                .rename(old_name.as_str(), new_name.as_str().into())
128                .map_err(|e| {
129                    Error::operation(format!("Failed to rename column '{old_name}': {e}"))
130                })?;
131        }
132
133        Ok(result)
134    }
135
136    /// Rename columns in a `LazyFrame`
137    pub fn rename_lazy(lf: LazyFrame, mapping: &HashMap<String, String>) -> Result<LazyFrame> {
138        let mut result = lf;
139
140        for (old_name, new_name) in mapping {
141            result = result.rename([old_name.as_str()], [new_name.as_str()], true);
142        }
143
144        Ok(result)
145    }
146
147    /// Add a new column to a `DataFrame`
148    pub fn with_column(df: &DataFrame, name: &str, series: Series) -> Result<DataFrame> {
149        let mut result = df.clone();
150        result
151            .with_column(series.with_name(name.into()))
152            .map_err(|e| Error::operation(format!("Failed to add column '{name}': {e}")))?;
153        Ok(result)
154    }
155
156    /// Add a new column expression to a `LazyFrame`
157    pub fn with_column_lazy(lf: LazyFrame, expr: Expr) -> Result<LazyFrame> {
158        Ok(lf.with_column(expr))
159    }
160
161    /// Drop columns from a `DataFrame`
162    pub fn drop(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
163        let mut result = df.clone();
164        for column in columns {
165            result = result
166                .drop(column)
167                .map_err(|e| Error::operation(format!("Failed to drop column '{column}': {e}")))?;
168        }
169        Ok(result)
170    }
171
172    /// Drop columns from a `LazyFrame`
173    pub fn drop_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
174        // Collect to DataFrame, drop columns, then convert back to LazyFrame
175        let df = lf
176            .collect()
177            .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
178        let mut result = df;
179        for column in columns {
180            result = result
181                .drop(column)
182                .map_err(|e| Error::operation(format!("Failed to drop column '{column}': {e}")))?;
183        }
184        Ok(result.lazy())
185    }
186
187    /// Get unique values in a `DataFrame`
188    pub fn unique(
189        df: &DataFrame,
190        subset: Option<&[String]>,
191        keep: UniqueKeepStrategy,
192    ) -> Result<DataFrame> {
193        let result = df
194            .unique::<String, String>(subset, keep, None)
195            .map_err(|e| Error::operation(format!("Failed to get unique values: {e}")))?;
196        Ok(result)
197    }
198
199    /// Get unique values in a `LazyFrame`
200    pub fn unique_lazy(
201        lf: LazyFrame,
202        subset: Option<&[String]>,
203        keep: UniqueKeepStrategy,
204    ) -> Result<LazyFrame> {
205        // Collect to DataFrame, get unique, then convert back to LazyFrame
206        let df = lf
207            .collect()
208            .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
209        let result = df
210            .unique::<String, String>(subset, keep, None)
211            .map_err(|e| Error::operation(format!("Failed to get unique values: {e}")))?;
212        Ok(result.lazy())
213    }
214
215    /// Limit the number of rows
216    pub fn limit(df: &DataFrame, n: usize) -> Result<DataFrame> {
217        Ok(df.head(Some(n)))
218    }
219
220    /// Limit the number of rows in a `LazyFrame`
221    pub fn limit_lazy(lf: LazyFrame, n: u32) -> Result<LazyFrame> {
222        Ok(lf.limit(n))
223    }
224
225    /// Skip the first n rows
226    pub fn skip(df: &DataFrame, n: usize) -> Result<DataFrame> {
227        #[allow(clippy::cast_possible_wrap)]
228        {
229            Ok(df.slice(n as i64, df.height().saturating_sub(n)))
230        }
231    }
232
233    /// Skip the first n rows in a `LazyFrame`
234    pub fn skip_lazy(lf: LazyFrame, n: u32) -> Result<LazyFrame> {
235        Ok(lf.slice(i64::from(n), u32::MAX))
236    }
237
238    /// Slice a `DataFrame`
239    pub fn slice(df: &DataFrame, offset: i64, length: usize) -> Result<DataFrame> {
240        Ok(df.slice(offset, length))
241    }
242
243    /// Slice a `LazyFrame`
244    pub fn slice_lazy(lf: LazyFrame, offset: i64, length: u32) -> Result<LazyFrame> {
245        Ok(lf.slice(offset, length))
246    }
247
248    /// Reverse the order of rows
249    pub fn reverse(df: &DataFrame) -> Result<DataFrame> {
250        #[allow(clippy::cast_possible_truncation)]
251        let indices: Vec<IdxSize> = (0..df.height() as IdxSize).rev().collect();
252        let ca = IdxCa::from_vec("".into(), indices);
253
254        df.take(&ca)
255            .map_err(|e| Error::operation(format!("Failed to reverse DataFrame: {e}")))
256    }
257
258    /// Reverse the order of rows in a `LazyFrame`
259    pub fn reverse_lazy(lf: LazyFrame) -> Result<LazyFrame> {
260        Ok(lf.reverse())
261    }
262
263    /// Sample rows from a `DataFrame`
264    pub fn sample(
265        df: &DataFrame,
266        n: usize,
267        with_replacement: bool,
268        seed: Option<u64>,
269    ) -> Result<DataFrame> {
270        #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
271        let n_values = vec![n as u32];
272        let n_series = Series::new("n".into(), n_values);
273        df.sample_n(&n_series, with_replacement, true, seed)
274            .map_err(|e| Error::operation(format!("Failed to sample DataFrame: {e}")))
275    }
276
277    /// Fill null values
278    pub fn fill_null(df: &DataFrame, value: FillNullStrategy) -> Result<DataFrame> {
279        let columns = df
280            .get_columns()
281            .iter()
282            .map(|s| {
283                s.fill_null(value)
284                    .map_err(|e| Error::operation(format!("Failed to fill null values: {e}")))
285            })
286            .collect::<Result<Vec<_>>>()?;
287
288        let cols: Vec<_> = columns.into_iter().collect();
289        DataFrame::new(cols).map_err(|e| {
290            Error::operation(format!("Failed to create DataFrame after fill_null: {e}"))
291        })
292    }
293
294    /// Fill null values in a `LazyFrame`
295    #[allow(clippy::needless_pass_by_value)]
296    pub fn fill_null_lazy(mut lf: LazyFrame, value: Expr) -> Result<LazyFrame> {
297        let schema = lf
298            .collect_schema()
299            .map_err(|e| Error::operation(format!("Failed to collect schema: {e}")))?;
300        let columns = schema
301            .iter()
302            .map(|(name, _)| col(name.as_str()).fill_null(value.clone()))
303            .collect::<Vec<_>>();
304
305        Ok(lf.with_columns(&columns))
306    }
307
308    /// Drop rows with null values
309    pub fn drop_nulls(df: &DataFrame, subset: Option<&[String]>) -> Result<DataFrame> {
310        df.drop_nulls(subset)
311            .map_err(|e| Error::operation(format!("Failed to drop null values: {e}")))
312    }
313
314    /// Drop rows with null values in a `LazyFrame`
315    pub fn drop_nulls_lazy(lf: LazyFrame, _subset: Option<Vec<Expr>>) -> Result<LazyFrame> {
316        // drop_nulls expects Option<Selector> in 0.51
317        Ok(lf.drop_nulls(None))
318    }
319
320    /// Cast column types
321    pub fn cast(df: &DataFrame, column: &str, dtype: &DataType) -> Result<DataFrame> {
322        let mut result = df.clone();
323        let series = result
324            .column(column)
325            .map_err(|e| Error::operation(format!("Column '{column}' not found: {e}")))?
326            .cast(dtype)
327            .map_err(|e| Error::operation(format!("Failed to cast column '{column}': {e}")))?;
328
329        result
330            .with_column(series)
331            .map_err(|e| Error::operation(format!("Failed to update column: {e}")))?;
332
333        Ok(result)
334    }
335
336    /// Cast column types in a `LazyFrame`
337    pub fn cast_lazy(lf: LazyFrame, column: &str, dtype: DataType) -> Result<LazyFrame> {
338        Ok(lf.with_column(col(column).cast(dtype)))
339    }
340
341    /// Explode list columns
342    pub fn explode(df: &DataFrame, columns: &[String]) -> Result<DataFrame> {
343        df.explode(columns)
344            .map_err(|e| Error::operation(format!("Failed to explode columns: {e}")))
345    }
346
347    /// Explode list columns in a `LazyFrame`
348    pub fn explode_lazy(lf: LazyFrame, columns: &[String]) -> Result<LazyFrame> {
349        // Collect to DataFrame, explode columns, then convert back to LazyFrame
350        let df = lf
351            .collect()
352            .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
353        let result = df
354            .explode(columns)
355            .map_err(|e| Error::operation(format!("Failed to explode columns: {e}")))?;
356        Ok(result.lazy())
357    }
358
359    /// Melt `DataFrame` from wide to long format
360    pub fn melt(
361        df: &DataFrame,
362        id_vars: &[String],
363        value_vars: &[String],
364        _variable_name: Option<&str>,
365        _value_name: Option<&str>,
366    ) -> Result<DataFrame> {
367        if id_vars.is_empty() {
368            df.unpivot([] as [&str; 0], value_vars)
369                .map_err(|e| Error::operation(format!("Failed to melt DataFrame: {e}")))
370        } else {
371            df.unpivot(id_vars, value_vars)
372                .map_err(|e| Error::operation(format!("Failed to melt DataFrame: {e}")))
373        }
374    }
375
376    /// Melt `LazyFrame` from wide to long format
377    pub fn melt_lazy(
378        lf: LazyFrame,
379        id_vars: &[String],
380        value_vars: &[String],
381        _variable_name: Option<&str>,
382        _value_name: Option<&str>,
383    ) -> Result<LazyFrame> {
384        // Collect to DataFrame, melt, then convert back to LazyFrame
385        let df = lf
386            .collect()
387            .map_err(|e| Error::operation(format!("Failed to collect LazyFrame: {e}")))?;
388        let result = if id_vars.is_empty() {
389            df.unpivot([] as [&str; 0], value_vars)
390                .map_err(|e| Error::operation(format!("Failed to melt LazyFrame: {e}")))?
391        } else {
392            df.unpivot(id_vars, value_vars)
393                .map_err(|e| Error::operation(format!("Failed to melt LazyFrame: {e}")))?
394        };
395        Ok(result.lazy())
396    }
397
398    /// Pivot `DataFrame` from long to wide format
399    pub fn pivot(
400        df: &DataFrame,
401        values: &[String],
402        index: &[String],
403        columns: &[String],
404        aggregate_fn: Option<&str>,
405    ) -> Result<DataFrame> {
406        let values_expr: Vec<Expr> = values.iter().map(col).collect();
407        let index_expr: Vec<Expr> = index.iter().map(col).collect();
408        let _columns_expr = col(columns[0].as_str()); // Simplified to single column
409
410        let agg_expr = match aggregate_fn {
411            Some("sum") => values_expr[0].clone().sum(),
412            Some("mean") => values_expr[0].clone().mean(),
413            Some("count") => values_expr[0].clone().count(),
414            Some("min") => values_expr[0].clone().min(),
415            Some("max") => values_expr[0].clone().max(),
416            _ => values_expr[0].clone().first(), // Default to first
417        };
418
419        // Pivot not available in Polars 0.35 LazyFrame, use group_by instead
420        df.clone()
421            .lazy()
422            .group_by(index_expr)
423            .agg([agg_expr])
424            .collect()
425            .map_err(|e| Error::operation(format!("Failed to pivot DataFrame: {e}")))
426    }
427
428    /// Apply a function to each row
429    pub fn map_rows<F, T>(_df: &DataFrame, _f: F) -> Result<DataFrame>
430    where
431        F: Fn(usize) -> Result<T>,
432        T: Into<Series>,
433    {
434        // Row-wise operations on DataFrames are complex and not efficiently supported in Polars
435        // Consider using vectorized operations instead
436        Err(Error::operation("Row-wise map operations are not supported. Use vectorized operations or process data differently.".to_string()))
437    }
438
439    /// Apply a transformation expression to all columns
440    #[allow(clippy::needless_pass_by_value)]
441    pub fn map_columns(df: &DataFrame, expr: Expr) -> Result<DataFrame> {
442        let columns = df
443            .get_columns()
444            .iter()
445            .map(|s| {
446                let lazy_df = DataFrame::new(vec![s.clone()])
447                    .map_err(|e| {
448                        Error::operation(format!("Failed to create temporary DataFrame: {e}"))
449                    })?
450                    .lazy();
451
452                let result = lazy_df
453                    .select(&[expr.clone().alias(s.name().as_str())])
454                    .collect()
455                    .map_err(|e| Error::operation(format!("Failed to apply expression: {e}")))?;
456
457                result
458                    .column(s.name())
459                    .map_err(|e| Error::operation(format!("Failed to get result column: {e}")))
460                    .cloned()
461            })
462            .collect::<Result<Vec<_>>>()?;
463
464        let cols: Vec<_> = columns.into_iter().collect();
465        DataFrame::new(cols)
466            .map_err(|e| Error::operation(format!("Failed to create result DataFrame: {e}")))
467    }
468
469    /// Transpose a `DataFrame`
470    pub fn transpose(df: &DataFrame, keep_names_as: Option<&str>) -> Result<DataFrame> {
471        let mut df_mut = df.clone();
472        let transposed = df_mut
473            .transpose(keep_names_as, None)
474            .map_err(|e| Error::operation(format!("Failed to transpose: {e}")))?;
475        Ok(transposed)
476    }
477}
478
479/// Cast a column to a specific data type
480#[allow(clippy::needless_pass_by_value)]
481pub fn cast_column(value: &Value, column: &str, target_type: ColumnDataType) -> Result<Value> {
482    match value {
483        Value::DataFrame(df) => {
484            let dtype = target_type.to_polars_dtype();
485            let mut result = df.clone();
486            let series = result
487                .column(column)
488                .map_err(|e| Error::operation(format!("Column '{column}' not found: {e}")))?
489                .cast(&dtype)
490                .map_err(|e| Error::operation(format!("Failed to cast column '{column}': {e}")))?;
491
492            result
493                .with_column(series)
494                .map_err(|e| Error::operation(format!("Failed to update column: {e}")))?;
495
496            Ok(Value::DataFrame(result))
497        }
498        _ => Err(Error::operation(
499            "cast_column can only be applied to DataFrames".to_string(),
500        )),
501    }
502}
503
504#[cfg(test)]
505mod tests {
506    use polars::prelude::{
507        col, lit, DataFrame, DataType, FillNullStrategy, Series, UniqueKeepStrategy,
508    };
509
510    use super::*;
511
512    #[test]
513    fn test_select() {
514        let df = DataFrame::new(vec![
515            Series::new(PlSmallStr::from("a"), &[1i32, 2, 3]).into(),
516            Series::new(PlSmallStr::from("b"), &[4i32, 5, 6]).into(),
517            Series::new(PlSmallStr::from("c"), &[7i32, 8, 9]).into(),
518        ])
519        .unwrap();
520
521        let result = Transform::select(&df, &["a".to_string(), "c".to_string()]).unwrap();
522        assert_eq!(result.width(), 2);
523        assert!(result.column("a").is_ok());
524        assert!(result.column("c").is_ok());
525        assert!(result.column("b").is_err());
526    }
527
528    #[test]
529    fn test_filter() {
530        let df = DataFrame::new(vec![
531            Series::new(PlSmallStr::from("a"), &[1i32, 2, 3, 4, 5]).into(),
532            Series::new(PlSmallStr::from("b"), &[10i32, 20, 30, 40, 50]).into(),
533        ])
534        .unwrap();
535
536        let mask = Series::new(PlSmallStr::from("mask"), &[true, false, true, false, true]);
537        let result = Transform::filter(&df, &mask).unwrap();
538
539        assert_eq!(result.height(), 3);
540        assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(1));
541        assert_eq!(result.column("a").unwrap().i32().unwrap().get(1), Some(3));
542        assert_eq!(result.column("a").unwrap().i32().unwrap().get(2), Some(5));
543    }
544
545    #[test]
546    fn test_sort() {
547        let df = DataFrame::new(vec![
548            Series::new(PlSmallStr::from("a"), &[3, 1, 4, 1, 5]).into(),
549            Series::new(PlSmallStr::from("b"), &[30, 10, 40, 15, 50]).into(),
550        ])
551        .unwrap();
552
553        let result = Transform::sort(&df, &["a".to_string()], vec![false]).unwrap();
554
555        let col_a = result.column("a").unwrap().i32().unwrap();
556        assert_eq!(col_a.get(0), Some(1));
557        assert_eq!(col_a.get(1), Some(1));
558        assert_eq!(col_a.get(2), Some(3));
559        assert_eq!(col_a.get(3), Some(4));
560        assert_eq!(col_a.get(4), Some(5));
561    }
562
563    #[test]
564    fn test_rename() {
565        let df = DataFrame::new(vec![Series::new(
566            <&str as Into<String>>::into("old_name").into(),
567            &[1i32, 2, 3],
568        )
569        .into()])
570        .unwrap();
571
572        let mut mapping = HashMap::new();
573        mapping.insert("old_name".to_string(), "new_name".to_string());
574
575        let result = Transform::rename(&df, &mapping).unwrap();
576        assert!(result.column("new_name").is_ok());
577        assert!(result.column("old_name").is_err());
578    }
579
580    #[test]
581    fn test_unique() {
582        let df = DataFrame::new(vec![
583            Series::new(PlSmallStr::from("a"), &[1, 2, 2, 3, 3, 3]).into(),
584            Series::new(PlSmallStr::from("b"), &[10, 20, 20, 30, 30, 30]).into(),
585        ])
586        .unwrap();
587
588        let result = Transform::unique(&df, None, UniqueKeepStrategy::First).unwrap();
589        assert_eq!(result.height(), 3);
590    }
591
592    #[test]
593    fn test_limit_and_skip() {
594        let df = DataFrame::new(vec![
595            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
596        ])
597        .unwrap();
598
599        let limited = Transform::limit(&df, 3).unwrap();
600        assert_eq!(limited.height(), 3);
601
602        let skipped = Transform::skip(&df, 2).unwrap();
603        assert_eq!(skipped.height(), 3);
604        assert_eq!(skipped.column("a").unwrap().i32().unwrap().get(0), Some(3));
605    }
606
607    #[test]
608    fn test_drop_nulls() {
609        let df = DataFrame::new(vec![
610            Series::new(
611                PlSmallStr::from("a"),
612                &[Some(1), None, Some(3), None, Some(5)],
613            )
614            .into(),
615            Series::new(
616                PlSmallStr::from("b"),
617                &[Some(10), Some(20), None, Some(40), Some(50)],
618            )
619            .into(),
620        ])
621        .unwrap();
622
623        let result = Transform::drop_nulls(&df, None).unwrap();
624        assert_eq!(result.height(), 2); // Only rows without any nulls
625    }
626
627    #[test]
628    fn test_column_datatype_from_str() {
629        assert_eq!(
630            ColumnDataType::from_str("int32").unwrap(),
631            ColumnDataType::Int32
632        );
633        assert_eq!(
634            ColumnDataType::from_str("i32").unwrap(),
635            ColumnDataType::Int32
636        );
637        assert_eq!(
638            ColumnDataType::from_str("int64").unwrap(),
639            ColumnDataType::Int64
640        );
641        assert_eq!(
642            ColumnDataType::from_str("i64").unwrap(),
643            ColumnDataType::Int64
644        );
645        assert_eq!(
646            ColumnDataType::from_str("float32").unwrap(),
647            ColumnDataType::Float32
648        );
649        assert_eq!(
650            ColumnDataType::from_str("f32").unwrap(),
651            ColumnDataType::Float32
652        );
653        assert_eq!(
654            ColumnDataType::from_str("float64").unwrap(),
655            ColumnDataType::Float64
656        );
657        assert_eq!(
658            ColumnDataType::from_str("f64").unwrap(),
659            ColumnDataType::Float64
660        );
661        assert_eq!(
662            ColumnDataType::from_str("string").unwrap(),
663            ColumnDataType::String
664        );
665        assert_eq!(
666            ColumnDataType::from_str("str").unwrap(),
667            ColumnDataType::String
668        );
669        assert_eq!(
670            ColumnDataType::from_str("utf8").unwrap(),
671            ColumnDataType::String
672        );
673        assert_eq!(
674            ColumnDataType::from_str("bool").unwrap(),
675            ColumnDataType::Boolean
676        );
677        assert_eq!(
678            ColumnDataType::from_str("boolean").unwrap(),
679            ColumnDataType::Boolean
680        );
681        assert_eq!(
682            ColumnDataType::from_str("date").unwrap(),
683            ColumnDataType::Date
684        );
685        assert_eq!(
686            ColumnDataType::from_str("datetime").unwrap(),
687            ColumnDataType::DateTime
688        );
689
690        // Test case insensitive
691        assert_eq!(
692            ColumnDataType::from_str("INT32").unwrap(),
693            ColumnDataType::Int32
694        );
695        assert_eq!(
696            ColumnDataType::from_str("Float64").unwrap(),
697            ColumnDataType::Float64
698        );
699
700        // Test invalid
701        assert!(ColumnDataType::from_str("invalid").is_err());
702        assert!(ColumnDataType::from_str("").is_err());
703    }
704
705    #[test]
706    fn test_column_datatype_to_polars_dtype() {
707        assert_eq!(ColumnDataType::Int32.to_polars_dtype(), DataType::Int32);
708        assert_eq!(ColumnDataType::Int64.to_polars_dtype(), DataType::Int64);
709        assert_eq!(ColumnDataType::Float32.to_polars_dtype(), DataType::Float32);
710        assert_eq!(ColumnDataType::Float64.to_polars_dtype(), DataType::Float64);
711        assert_eq!(ColumnDataType::String.to_polars_dtype(), DataType::String);
712        assert_eq!(ColumnDataType::Boolean.to_polars_dtype(), DataType::Boolean);
713        assert_eq!(ColumnDataType::Date.to_polars_dtype(), DataType::Date);
714        assert_eq!(
715            ColumnDataType::DateTime.to_polars_dtype(),
716            DataType::Datetime(polars::prelude::TimeUnit::Milliseconds, None)
717        );
718    }
719
720    #[test]
721    fn test_select_lazy() {
722        let df = DataFrame::new(vec![
723            Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
724            Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
725            Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
726        ])
727        .unwrap();
728        let lf = df.lazy();
729
730        let result = Transform::select_lazy(lf, &["a".to_string(), "c".to_string()]).unwrap();
731        let collected = result.collect().unwrap();
732        assert_eq!(collected.width(), 2);
733        assert!(collected.column("a").is_ok());
734        assert!(collected.column("c").is_ok());
735        assert!(collected.column("b").is_err());
736    }
737
738    #[test]
739    fn test_filter_lazy() {
740        let df = DataFrame::new(vec![
741            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
742            Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
743        ])
744        .unwrap();
745        let lf = df.lazy();
746
747        let predicate = col("a").gt(lit(3));
748        let result = Transform::filter_lazy(lf, predicate).unwrap();
749        let collected = result.collect().unwrap();
750
751        assert_eq!(collected.height(), 2);
752        let col_a = collected.column("a").unwrap().i32().unwrap();
753        assert_eq!(col_a.get(0), Some(4));
754        assert_eq!(col_a.get(1), Some(5));
755    }
756
757    #[test]
758    fn test_sort_lazy() {
759        let df = DataFrame::new(vec![
760            Series::new(PlSmallStr::from("a"), &[3i32, 1, 4, 1, 5]).into(),
761            Series::new(PlSmallStr::from("b"), &[30i32, 10, 40, 15, 50]).into(),
762        ])
763        .unwrap();
764        let lf = df.lazy();
765
766        let result = Transform::sort_lazy(lf, &["a".to_string()], &[false]).unwrap();
767        let collected = result.collect().unwrap();
768
769        let col_a = collected.column("a").unwrap().i32().unwrap();
770        assert_eq!(col_a.get(0), Some(1));
771        assert_eq!(col_a.get(1), Some(1));
772        assert_eq!(col_a.get(2), Some(3));
773        assert_eq!(col_a.get(3), Some(4));
774        assert_eq!(col_a.get(4), Some(5));
775    }
776
777    #[test]
778    fn test_rename_lazy() {
779        let df = DataFrame::new(vec![
780            Series::new(PlSmallStr::from("old_name"), &[1, 2, 3]).into()
781        ])
782        .unwrap();
783        let lf = df.lazy();
784
785        let mut mapping = HashMap::new();
786        mapping.insert("old_name".to_string(), "new_name".to_string());
787
788        let result = Transform::rename_lazy(lf, &mapping).unwrap();
789        let collected = result.collect().unwrap();
790        assert!(collected.column("new_name").is_ok());
791        assert!(collected.column("old_name").is_err());
792    }
793
794    #[test]
795    fn test_with_column() {
796        let df = DataFrame::new(vec![
797            Series::new("a".into(), &[1, 2, 3]).into(),
798            Series::new("b".into(), &[4, 5, 6]).into(),
799        ])
800        .unwrap();
801
802        let new_series = Series::new("c".into(), &[7, 8, 9]);
803        let result = Transform::with_column(&df, "c", new_series).unwrap();
804
805        assert_eq!(result.width(), 3);
806        assert!(result.column("a").is_ok());
807        assert!(result.column("b").is_ok());
808        assert!(result.column("c").is_ok());
809        assert_eq!(result.column("c").unwrap().i32().unwrap().get(0), Some(7));
810    }
811
812    #[test]
813    fn test_with_column_lazy() {
814        let df = DataFrame::new(vec![
815            Series::new("a".into(), &[1, 2, 3]).into(),
816            Series::new("b".into(), &[4, 5, 6]).into(),
817        ])
818        .unwrap();
819        let lf = df.lazy();
820
821        let expr = lit(10).alias("c");
822        let result = Transform::with_column_lazy(lf, expr).unwrap();
823        let collected = result.collect().unwrap();
824
825        assert_eq!(collected.width(), 3);
826        assert!(collected.column("c").is_ok());
827        assert_eq!(
828            collected.column("c").unwrap().i32().unwrap().get(0),
829            Some(10)
830        );
831    }
832
833    #[test]
834    fn test_drop() {
835        let df = DataFrame::new(vec![
836            Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
837            Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
838            Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
839        ])
840        .unwrap();
841
842        let result = Transform::drop(&df, &["b".to_string()]).unwrap();
843        assert_eq!(result.width(), 2);
844        assert!(result.column("a").is_ok());
845        assert!(result.column("b").is_err());
846        assert!(result.column("c").is_ok());
847    }
848
849    #[test]
850    fn test_drop_lazy() {
851        let df = DataFrame::new(vec![
852            Series::new(PlSmallStr::from("a"), &[1, 2, 3]).into(),
853            Series::new(PlSmallStr::from("b"), &[4, 5, 6]).into(),
854            Series::new(PlSmallStr::from("c"), &[7, 8, 9]).into(),
855        ])
856        .unwrap();
857        let lf = df.lazy();
858
859        let result = Transform::drop_lazy(lf, &["b".to_string()]).unwrap();
860        let collected = result.collect().unwrap();
861        assert_eq!(collected.width(), 2);
862        assert!(collected.column("a").is_ok());
863        assert!(collected.column("b").is_err());
864        assert!(collected.column("c").is_ok());
865    }
866
867    #[test]
868    fn test_unique_lazy() {
869        let df = DataFrame::new(vec![
870            Series::new(PlSmallStr::from("a"), &[1, 2, 2, 3, 3, 3]).into(),
871            Series::new(PlSmallStr::from("b"), &[10, 20, 20, 30, 30, 30]).into(),
872        ])
873        .unwrap();
874        let lf = df.lazy();
875
876        let result = Transform::unique_lazy(lf, None, UniqueKeepStrategy::First).unwrap();
877        let collected = result.collect().unwrap();
878        assert_eq!(collected.height(), 3);
879    }
880
881    #[test]
882    fn test_limit_lazy() {
883        let df = DataFrame::new(vec![
884            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
885        ])
886        .unwrap();
887        let lf = df.lazy();
888
889        let result = Transform::limit_lazy(lf, 3).unwrap();
890        let collected = result.collect().unwrap();
891        assert_eq!(collected.height(), 3);
892    }
893
894    #[test]
895    fn test_skip_lazy() {
896        let df = DataFrame::new(vec![
897            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
898        ])
899        .unwrap();
900        let lf = df.lazy();
901
902        let result = Transform::skip_lazy(lf, 2).unwrap();
903        let collected = result.collect().unwrap();
904        assert_eq!(collected.height(), 3);
905        assert_eq!(
906            collected.column("a").unwrap().i32().unwrap().get(0),
907            Some(3)
908        );
909    }
910
911    #[test]
912    fn test_slice() {
913        let df = DataFrame::new(vec![
914            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
915        ])
916        .unwrap();
917
918        let result = Transform::slice(&df, 1, 3).unwrap();
919        assert_eq!(result.height(), 3);
920        assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(2));
921        assert_eq!(result.column("a").unwrap().i32().unwrap().get(1), Some(3));
922        assert_eq!(result.column("a").unwrap().i32().unwrap().get(2), Some(4));
923    }
924
925    #[test]
926    fn test_slice_lazy() {
927        let df = DataFrame::new(vec![
928            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into()
929        ])
930        .unwrap();
931        let lf = df.lazy();
932
933        let result = Transform::slice_lazy(lf, 1, 3).unwrap();
934        let collected = result.collect().unwrap();
935        assert_eq!(collected.height(), 3);
936        assert_eq!(
937            collected.column("a").unwrap().i32().unwrap().get(0),
938            Some(2)
939        );
940    }
941
942    #[test]
943    fn test_reverse() {
944        let df = DataFrame::new(vec![
945            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
946            Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
947        ])
948        .unwrap();
949
950        let result = Transform::reverse(&df).unwrap();
951        assert_eq!(result.height(), 5);
952        assert_eq!(result.column("a").unwrap().i32().unwrap().get(0), Some(5));
953        assert_eq!(result.column("a").unwrap().i32().unwrap().get(4), Some(1));
954        assert_eq!(result.column("b").unwrap().i32().unwrap().get(0), Some(50));
955        assert_eq!(result.column("b").unwrap().i32().unwrap().get(4), Some(10));
956    }
957
958    #[test]
959    fn test_reverse_lazy() {
960        let df = DataFrame::new(vec![
961            Series::new(PlSmallStr::from("a"), &[1, 2, 3, 4, 5]).into(),
962            Series::new(PlSmallStr::from("b"), &[10, 20, 30, 40, 50]).into(),
963        ])
964        .unwrap();
965        let lf = df.lazy();
966
967        let result = Transform::reverse_lazy(lf).unwrap();
968        let collected = result.collect().unwrap();
969        assert_eq!(collected.height(), 5);
970        assert_eq!(
971            collected.column("a").unwrap().i32().unwrap().get(0),
972            Some(5)
973        );
974        assert_eq!(
975            collected.column("a").unwrap().i32().unwrap().get(4),
976            Some(1)
977        );
978    }
979
980    #[test]
981    fn test_sample() {
982        let df = DataFrame::new(vec![Column::new(
983            "a".into(),
984            &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
985        )])
986        .unwrap();
987
988        let result = Transform::sample(&df, 3, false, Some(42)).unwrap();
989        assert_eq!(result.height(), 3);
990        assert!(result.column("a").is_ok());
991    }
992
993    #[test]
994    fn test_fill_null() {
995        let df = DataFrame::new(vec![
996            Series::new("a".into(), &[Some(1), None, Some(3)]).into(),
997            Series::new("b".into(), &[Some(1.0), Some(2.0), None]).into(),
998        ])
999        .unwrap();
1000
1001        let result = Transform::fill_null(&df, FillNullStrategy::Forward(None)).unwrap();
1002        assert_eq!(result.height(), 3);
1003        // Check that nulls are filled
1004        let col_a = result.column("a").unwrap();
1005        assert!(col_a.null_count() == 0);
1006    }
1007
1008    #[test]
1009    fn test_fill_null_lazy() {
1010        let df = DataFrame::new(vec![
1011            Series::new(PlSmallStr::from("old_name"), &[1, 2, 3]).into()
1012        ])
1013        .unwrap();
1014        let lf = df.lazy();
1015
1016        let value = lit(0);
1017        let result = Transform::fill_null_lazy(lf, value).unwrap();
1018        let collected = result.collect().unwrap();
1019        assert_eq!(collected.height(), 3);
1020        let col_a = collected.column("old_name").unwrap();
1021        assert!(col_a.null_count() == 0);
1022    }
1023
1024    #[test]
1025    fn test_drop_nulls_lazy() {
1026        let df = DataFrame::new(vec![
1027            Series::new(
1028                PlSmallStr::from("a"),
1029                &[Some(1), None, Some(3), None, Some(5)],
1030            )
1031            .into(),
1032            Series::new(
1033                PlSmallStr::from("b"),
1034                &[Some(10), Some(20), None, Some(40), Some(50)],
1035            )
1036            .into(),
1037        ])
1038        .unwrap();
1039        let lf = df.lazy();
1040
1041        let result = Transform::drop_nulls_lazy(lf, None).unwrap();
1042        let collected = result.collect().unwrap();
1043        assert_eq!(collected.height(), 2); // Only rows without any nulls
1044    }
1045
1046    #[test]
1047    fn test_cast() {
1048        let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1049
1050        let result = Transform::cast(&df, "a", &DataType::Int32).unwrap();
1051        assert_eq!(result.height(), 3);
1052        let col_a = result.column("a").unwrap();
1053        assert_eq!(col_a.dtype(), &DataType::Int32);
1054        assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1055    }
1056
1057    #[test]
1058    fn test_cast_lazy() {
1059        let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1060        let lf = df.lazy();
1061
1062        let result = Transform::cast_lazy(lf, "a", DataType::Int32).unwrap();
1063        let collected = result.collect().unwrap();
1064        assert_eq!(collected.height(), 3);
1065        let col_a = collected.column("a").unwrap();
1066        assert_eq!(col_a.dtype(), &DataType::Int32);
1067        assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1068    }
1069
1070    #[test]
1071    #[ignore = "explode operation not supported for binary dtype in this Polars version"]
1072    fn test_explode() {
1073        let values = vec![vec![1, 2], vec![3], vec![4, 5, 6]];
1074        let list_series = Series::new("list_col".into(), values);
1075        let df = DataFrame::new(vec![
1076            list_series.into(),
1077            Series::new("other".into(), &[10, 20, 30]).into(),
1078        ])
1079        .unwrap();
1080
1081        let result = Transform::explode(&df, &["list_col".to_string()]).unwrap();
1082        assert_eq!(result.height(), 6); // 2 + 1 + 3 = 6
1083    }
1084
1085    #[test]
1086    #[ignore = "explode operation not supported for binary dtype in this Polars version"]
1087    fn test_explode_lazy() {
1088        let values = vec![vec![1, 2], vec![3], vec![4, 5, 6]];
1089        let list_series = Series::new("list_col".into(), values);
1090        let df = DataFrame::new(vec![
1091            list_series.into(),
1092            Series::new("other".into(), &[10, 20, 30]).into(),
1093        ])
1094        .unwrap();
1095        let lf = df.lazy();
1096
1097        let result = Transform::explode_lazy(lf, &["list_col".to_string()]).unwrap();
1098        let collected = result.collect().unwrap();
1099        assert_eq!(collected.height(), 6); // 2 + 1 + 3 = 6
1100    }
1101
1102    #[test]
1103    fn test_melt() {
1104        let df = DataFrame::new(vec![
1105            Series::new("id".into(), &[1, 2, 3]).into(),
1106            Series::new("a".into(), &[10, 20, 30]).into(),
1107            Series::new("b".into(), &[100, 200, 300]).into(),
1108        ])
1109        .unwrap();
1110
1111        let result = Transform::melt(
1112            &df,
1113            &["id".to_string()],
1114            &["a".to_string(), "b".to_string()],
1115            Some("variable"),
1116            Some("value"),
1117        )
1118        .unwrap();
1119
1120        assert_eq!(result.height(), 3); // Current unpivot behavior
1121        assert!(result.column("variable").is_ok());
1122        assert!(result.column("value").is_ok());
1123    }
1124
1125    #[test]
1126    fn test_melt_lazy() {
1127        let df = DataFrame::new(vec![
1128            Series::new("id".into(), &[1, 2, 3]).into(),
1129            Series::new("a".into(), &[10, 20, 30]).into(),
1130            Series::new("b".into(), &[100, 200, 300]).into(),
1131        ])
1132        .unwrap();
1133        let lf = df.lazy();
1134
1135        let result = Transform::melt_lazy(
1136            lf,
1137            &["id".to_string()],
1138            &["a".to_string(), "b".to_string()],
1139            Some("variable"),
1140            Some("value"),
1141        )
1142        .unwrap();
1143        let collected = result.collect().unwrap();
1144
1145        assert_eq!(collected.height(), 3); // Current unpivot behavior
1146        assert!(collected.column("variable").is_ok());
1147        assert!(collected.column("value").is_ok());
1148    }
1149
1150    #[test]
1151    fn test_map_columns() {
1152        let df = DataFrame::new(vec![
1153            Series::new("a".into(), &[1, 2, 3]).into(),
1154            Series::new("b".into(), &[4, 5, 6]).into(),
1155        ])
1156        .unwrap();
1157
1158        // Use col("*") to reference all columns in the temporary per-column DataFrame
1159        let expr = col("*") + lit(10);
1160        let result = Transform::map_columns(&df, expr).unwrap();
1161
1162        assert_eq!(result.height(), 3);
1163        assert_eq!(result.width(), 2);
1164        // Check that values are increased by 10
1165        let col_a = result.column("a").unwrap().i32().unwrap();
1166        assert_eq!(col_a.get(0), Some(11));
1167        assert_eq!(col_a.get(1), Some(12));
1168        assert_eq!(col_a.get(2), Some(13));
1169    }
1170
1171    #[test]
1172    fn test_cast_column() {
1173        let df = DataFrame::new(vec![Series::new("a".into(), &[1.0, 2.0, 3.0]).into()]).unwrap();
1174        let value = Value::DataFrame(df);
1175
1176        let result = cast_column(&value, "a", ColumnDataType::Int32).unwrap();
1177        match result {
1178            Value::DataFrame(result_df) => {
1179                let col_a = result_df.column("a").unwrap();
1180                assert_eq!(col_a.dtype(), &DataType::Int32);
1181                assert_eq!(col_a.i32().unwrap().get(0), Some(1));
1182            }
1183            _ => panic!("Expected DataFrame"),
1184        }
1185    }
1186
1187    #[test]
1188    fn test_cast_column_invalid_type() {
1189        let value = Value::Int(42);
1190        let result = cast_column(&value, "a", ColumnDataType::Int32);
1191        assert!(result.is_err());
1192    }
1193}