Struct DataFrame

Source
pub struct DataFrame { /* private fields */ }
Expand description

DataFrame struct: Column-oriented 2D data structure

Implementations§

Source§

impl DataFrame

Source

pub fn new() -> Self

Create a new empty DataFrame

Examples found in repository?
examples/dataframe_window_example.rs (line 82)
81fn create_sample_dataframe() -> Result<DataFrame> {
82    let mut df = DataFrame::new();
83
84    // Add date column
85    let dates = vec![
86        "2023-01-01",
87        "2023-01-02",
88        "2023-01-03",
89        "2023-01-04",
90        "2023-01-05",
91        "2023-01-06",
92        "2023-01-07",
93        "2023-01-08",
94        "2023-01-09",
95        "2023-01-10",
96    ];
97    let date_series = Series::new(dates, Some("Date".to_string()))?;
98    df.add_column("Date".to_string(), date_series)?;
99
100    // Add product column
101    let products = vec![
102        "ProductA", "ProductB", "ProductA", "ProductC", "ProductB", "ProductA", "ProductC",
103        "ProductA", "ProductB", "ProductC",
104    ];
105    let product_series = Series::new(products, Some("Product".to_string()))?;
106    df.add_column("Product".to_string(), product_series)?;
107
108    // Add price column
109    let prices = vec![
110        "100", "150", "110", "200", "160", "120", "210", "115", "165", "220",
111    ];
112    let price_series = Series::new(prices, Some("Price".to_string()))?;
113    df.add_column("Price".to_string(), price_series)?;
114
115    // Add quantity column
116    let quantities = vec!["5", "3", "6", "2", "4", "7", "3", "8", "5", "4"];
117    let quantity_series = Series::new(quantities, Some("Quantity".to_string()))?;
118    df.add_column("Quantity".to_string(), quantity_series)?;
119
120    Ok(df)
121}
More examples
Hide additional examples
examples/gpu_dataframe_api_example.rs (line 124)
96fn create_sample_dataframe(size: usize) -> Result<DataFrame> {
97    // Create data for columns
98    let mut x1 = Vec::with_capacity(size);
99    let mut x2 = Vec::with_capacity(size);
100    let mut x3 = Vec::with_capacity(size);
101    let mut x4 = Vec::with_capacity(size);
102    let mut y = Vec::with_capacity(size);
103
104    for i in 0..size {
105        // Create features with some correlation to the target
106        let x1_val = (i % 100) as f64 / 100.0;
107        let x2_val = ((i * 2) % 100) as f64 / 100.0;
108        let x3_val = ((i * 3) % 100) as f64 / 100.0;
109        let x4_val = ((i * 5) % 100) as f64 / 100.0;
110
111        // Create a target variable that depends on the features
112        let y_val = 2.0 * x1_val + 1.5 * x2_val - 0.5 * x3_val
113            + 3.0 * x4_val
114            + 0.1 * (rand::random::<f64>() - 0.5); // Add some noise
115
116        x1.push(x1_val);
117        x2.push(x2_val);
118        x3.push(x3_val);
119        x4.push(x4_val);
120        y.push(y_val);
121    }
122
123    // Create DataFrame
124    let mut df = DataFrame::new();
125    df.add_column("x1".to_string(), Series::new(x1, Some("x1".to_string()))?)?;
126    df.add_column("x2".to_string(), Series::new(x2, Some("x2".to_string()))?)?;
127    df.add_column("x3".to_string(), Series::new(x3, Some("x3".to_string()))?)?;
128    df.add_column("x4".to_string(), Series::new(x4, Some("x4".to_string()))?)?;
129    df.add_column("y".to_string(), Series::new(y, Some("y".to_string()))?)?;
130
131    Ok(df)
132}
examples/stats_example.rs (line 27)
22fn descriptive_stats_example() -> Result<()> {
23    println!("1. Descriptive Statistics Sample");
24    println!("-----------------");
25
26    // Create dataset
27    let mut df = DataFrame::new();
28    let values = Series::new(
29        vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30        Some("Values".to_string()),
31    )?;
32
33    df.add_column("Values".to_string(), values)?;
34
35    // Descriptive statistics
36    let stats = pandrs::stats::describe(
37        df.get_column("Values")
38            .unwrap()
39            .values()
40            .iter()
41            .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42            .collect::<Vec<f64>>(),
43    )?;
44
45    // Display results
46    println!("Count: {}", stats.count);
47    println!("Mean: {:.2}", stats.mean);
48    println!("Standard Deviation: {:.2}", stats.std);
49    println!("Min: {:.2}", stats.min);
50    println!("First Quartile: {:.2}", stats.q1);
51    println!("Median: {:.2}", stats.median);
52    println!("Third Quartile: {:.2}", stats.q3);
53    println!("Max: {:.2}", stats.max);
54
55    // Covariance and correlation coefficient
56    let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57    let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59    let cov = pandrs::stats::covariance(&data1, &data2)?;
60    let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62    println!("\nCovariance and Correlation Coefficient:");
63    println!("Covariance: {:.4}", cov);
64    println!("Correlation Coefficient: {:.4}", corr);
65
66    println!();
67    Ok(())
68}
69
70fn ttest_example() -> Result<()> {
71    println!("2. t-test Sample");
72    println!("--------------");
73
74    // Create sample data
75    let group1 = vec![5.2, 5.8, 6.1, 5.5, 5.9, 6.2, 5.7, 6.0, 5.6, 5.8];
76    let group2 = vec![4.8, 5.1, 5.3, 4.9, 5.0, 5.2, 4.7, 5.1, 4.9, 5.0];
77
78    // Perform t-test with significance level 0.05 (5%)
79    let alpha = 0.05;
80
81    // t-test assuming equal variances
82    let result_equal = pandrs::stats::ttest(&group1, &group2, alpha, true)?;
83
84    println!("t-test result assuming equal variances:");
85    print_ttest_result(&result_equal);
86
87    // Welch's t-test (not assuming equal variances)
88    let result_welch = pandrs::stats::ttest(&group1, &group2, alpha, false)?;
89
90    println!("\nWelch's t-test result (not assuming equal variances):");
91    print_ttest_result(&result_welch);
92
93    println!();
94    Ok(())
95}
96
97fn print_ttest_result(result: &TTestResult) {
98    println!("t-statistic: {:.4}", result.statistic);
99    println!("p-value: {:.4}", result.pvalue);
100    println!("Degrees of Freedom: {}", result.df);
101    println!(
102        "Significant: {}",
103        if result.significant { "Yes" } else { "No" }
104    );
105}
106
107fn regression_example() -> Result<()> {
108    println!("3. Regression Analysis Sample");
109    println!("-----------------");
110
111    // Create dataset
112    let mut df = DataFrame::new();
113
114    // Explanatory variables
115    let x1 = Series::new(
116        vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
117        Some("x1".to_string()),
118    )?;
119    let x2 = Series::new(
120        vec![5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0, 32.0],
121        Some("x2".to_string()),
122    )?;
123
124    // Dependent variable (y = 2*x1 + 1.5*x2 + 3 + noise)
125    let mut y_values = Vec::with_capacity(10);
126    let mut rng = rand::rng();
127
128    for i in 0..10 {
129        let noise = rng.random_range(-1.0..1.0);
130        let y_val = 2.0 * (i as f64 + 1.0) + 1.5 * (5.0 + 3.0 * i as f64) + 3.0 + noise;
131        y_values.push(y_val);
132    }
133
134    let y = Series::new(y_values, Some("y".to_string()))?;
135
136    // Add to DataFrame
137    df.add_column("x1".to_string(), x1)?;
138    df.add_column("x2".to_string(), x2)?;
139    df.add_column("y".to_string(), y)?;
140
141    // Perform regression analysis
142    let model = pandrs::stats::linear_regression(&df, "y", &["x1", "x2"])?;
143
144    // Display results
145    println!(
146        "Linear Regression Model: y = {:.4} + {:.4} × x1 + {:.4} × x2",
147        model.intercept, model.coefficients[0], model.coefficients[1]
148    );
149    println!("R²: {:.4}", model.r_squared);
150    println!("Adjusted R²: {:.4}", model.adj_r_squared);
151    println!("p-values of regression coefficients: {:?}", model.p_values);
152
153    // Simple regression example
154    println!("\nSimple Regression Model (x1 only):");
155    let model_simple = pandrs::stats::linear_regression(&df, "y", &["x1"])?;
156    println!(
157        "Linear Regression Model: y = {:.4} + {:.4} × x1",
158        model_simple.intercept, model_simple.coefficients[0]
159    );
160    println!("R²: {:.4}", model_simple.r_squared);
161
162    Ok(())
163}
examples/basic_usage.rs (line 32)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
examples/parquet_example.rs (line 6)
4fn main() -> Result<(), Box<dyn Error>> {
5    // Create a sample DataFrame
6    let mut df = DataFrame::new();
7
8    // Add an integer column
9    let int_data = Series::new(vec![1, 2, 3, 4, 5], Some("id".to_string()))?;
10    df.add_column("id".to_string(), int_data)?;
11
12    // Add a floating-point column
13    let float_data = Series::new(vec![1.1, 2.2, 3.3, 4.4, 5.5], Some("value".to_string()))?;
14    df.add_column("value".to_string(), float_data)?;
15
16    // Add a string column
17    let string_data = Series::new(
18        vec![
19            "A".to_string(),
20            "B".to_string(),
21            "C".to_string(),
22            "D".to_string(),
23            "E".to_string(),
24        ],
25        Some("category".to_string()),
26    )?;
27    df.add_column("category".to_string(), string_data)?;
28
29    println!("Original DataFrame:");
30    println!("{:?}", df);
31
32    // Parquet support is still under development
33    println!("\nNote: Parquet support is currently under development.");
34    println!("It is planned to be available in a future release.");
35
36    /*
37    // Although Parquet functionality is not yet implemented, dependencies have been introduced.
38    // The following code is expected to work in a future version.
39
40    // Write the DataFrame to a Parquet file
41    let path = "example.parquet";
42    match write_parquet(&df, path, Some(ParquetCompression::Snappy)) {
43        Ok(_) => {
44            println!("DataFrame written to {}", path);
45
46            // Read the DataFrame from the Parquet file
47            match read_parquet(path) {
48                Ok(df_read) => {
49                    println!("\nDataFrame read from Parquet file:");
50                    println!("{:?}", df_read);
51
52                    // Verify the results
53                    assert_eq!(df.row_count(), df_read.row_count());
54                    assert_eq!(df.column_count(), df_read.column_count());
55
56                    println!("\nVerification successful: Data matches");
57                },
58                Err(e) => println!("Error reading Parquet file: {}", e),
59            }
60        },
61        Err(e) => println!("Error writing Parquet file: {}", e),
62    }
63    */
64
65    Ok(())
66}
examples/benchmark_million.rs (line 56)
6fn main() -> Result<()> {
7    println!("=== Benchmark with One Million Rows ===\n");
8
9    // Benchmark function
10    fn bench<F>(name: &str, f: F) -> Duration
11    where
12        F: FnOnce() -> (),
13    {
14        println!("Running: {}", name);
15        let start = Instant::now();
16        f();
17        let duration = start.elapsed();
18        println!("  Completed: {:?}\n", duration);
19        duration
20    }
21
22    // Benchmark for creating a DataFrame with one million rows
23    println!("--- DataFrame with One Million Rows ---");
24
25    bench("Creating Series x3 (One Million Rows)", || {
26        let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27        let _ = Series::new(
28            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29            Some("B".to_string()),
30        )
31        .unwrap();
32        let _ = Series::new(
33            (0..1_000_000)
34                .map(|i| format!("val_{}", i))
35                .collect::<Vec<_>>(),
36            Some("C".to_string()),
37        )
38        .unwrap();
39    });
40
41    let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42        let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43        let col_b = Series::new(
44            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45            Some("B".to_string()),
46        )
47        .unwrap();
48        let col_c = Series::new(
49            (0..1_000_000)
50                .map(|i| format!("val_{}", i))
51                .collect::<Vec<_>>(),
52            Some("C".to_string()),
53        )
54        .unwrap();
55
56        let mut df = DataFrame::new();
57        df.add_column("A".to_string(), col_a).unwrap();
58        df.add_column("B".to_string(), col_b).unwrap();
59        df.add_column("C".to_string(), col_c).unwrap();
60    });
61
62    bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63        let mut data = HashMap::new();
64        data.insert(
65            "A".to_string(),
66            (0..1_000_000).map(|n| n.to_string()).collect(),
67        );
68        data.insert(
69            "B".to_string(),
70            (0..1_000_000)
71                .map(|n| format!("{:.1}", n as f64 * 0.5))
72                .collect(),
73        );
74        data.insert(
75            "C".to_string(),
76            (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77        );
78
79        let _ = DataFrame::from_map(data, None).unwrap();
80    });
81
82    println!(
83        "Time to create DataFrame with one million rows in pure Rust: {:?}",
84        large_duration
85    );
86
87    Ok(())
88}
Source

pub fn with_index(index: Index<String>) -> Self

Create a new DataFrame with a simple index

Examples found in repository?
examples/multi_index_example.rs (line 76)
4fn main() -> Result<()> {
5    println!("=== Example of Using MultiIndex ===\n");
6
7    // =========================================
8    // Creating a MultiIndex
9    // =========================================
10
11    println!("--- Creating MultiIndex from Tuples ---");
12
13    // Create MultiIndex from tuples (vector of vectors)
14    let tuples = vec![
15        vec!["A".to_string(), "a".to_string()],
16        vec!["A".to_string(), "b".to_string()],
17        vec!["B".to_string(), "a".to_string()],
18        vec!["B".to_string(), "b".to_string()],
19    ];
20
21    let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22    let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24    println!("MultiIndex: {:?}\n", multi_idx);
25    println!("Number of Levels: {}", multi_idx.n_levels());
26    println!("Number of Rows: {}\n", multi_idx.len());
27
28    // =========================================
29    // Operations on MultiIndex
30    // =========================================
31
32    println!("--- Retrieving Level Values ---");
33    let level0_values = multi_idx.get_level_values(0)?;
34    println!("Values in Level 0: {:?}", level0_values);
35
36    let level1_values = multi_idx.get_level_values(1)?;
37    println!("Values in Level 1: {:?}", level1_values);
38
39    println!("--- Swapping Levels ---");
40    let swapped = multi_idx.swaplevel(0, 1)?;
41    println!("After Swapping Levels: {:?}\n", swapped);
42
43    // =========================================
44    // DataFrame with MultiIndex
45    // =========================================
46
47    println!("--- DataFrame with MultiIndex ---");
48
49    // Create DataFrame
50    let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52    // Add data
53    let data = vec![
54        "data1".to_string(),
55        "data2".to_string(),
56        "data3".to_string(),
57        "data4".to_string(),
58    ];
59    df.add_column(
60        "data".to_string(),
61        pandrs::Series::new(data, Some("data".to_string()))?,
62    )?;
63
64    println!("DataFrame: {:?}\n", df);
65    println!("Number of Rows: {}", df.row_count());
66    println!("Number of Columns: {}", df.column_count());
67
68    // =========================================
69    // Conversion Between Simple Index and MultiIndex
70    // =========================================
71
72    println!("\n--- Example of Index Conversion ---");
73
74    // Create DataFrame from simple index
75    let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76    let mut simple_df = DataFrame::with_index(simple_idx);
77
78    // Add data
79    let values = vec![100, 200, 300];
80    let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81    simple_df.add_column(
82        "values".to_string(),
83        pandrs::Series::new(str_values, Some("values".to_string()))?,
84    )?;
85
86    println!("Simple Index DataFrame: {:?}", simple_df);
87
88    // Prepare for conversion to MultiIndex
89    let tuples = vec![
90        vec!["Category".to_string(), "X".to_string()],
91        vec!["Category".to_string(), "Y".to_string()],
92        vec!["Category".to_string(), "Z".to_string()],
93    ];
94
95    // Create and set MultiIndex
96    let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97    simple_df.set_multi_index(new_multi_idx)?;
98
99    println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101    println!("\n=== Sample Complete ===");
102    Ok(())
103}
Source

pub fn with_multi_index(multi_index: MultiIndex<String>) -> Self

Create a new DataFrame with a multi index

Examples found in repository?
examples/multi_index_example.rs (line 50)
4fn main() -> Result<()> {
5    println!("=== Example of Using MultiIndex ===\n");
6
7    // =========================================
8    // Creating a MultiIndex
9    // =========================================
10
11    println!("--- Creating MultiIndex from Tuples ---");
12
13    // Create MultiIndex from tuples (vector of vectors)
14    let tuples = vec![
15        vec!["A".to_string(), "a".to_string()],
16        vec!["A".to_string(), "b".to_string()],
17        vec!["B".to_string(), "a".to_string()],
18        vec!["B".to_string(), "b".to_string()],
19    ];
20
21    let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22    let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24    println!("MultiIndex: {:?}\n", multi_idx);
25    println!("Number of Levels: {}", multi_idx.n_levels());
26    println!("Number of Rows: {}\n", multi_idx.len());
27
28    // =========================================
29    // Operations on MultiIndex
30    // =========================================
31
32    println!("--- Retrieving Level Values ---");
33    let level0_values = multi_idx.get_level_values(0)?;
34    println!("Values in Level 0: {:?}", level0_values);
35
36    let level1_values = multi_idx.get_level_values(1)?;
37    println!("Values in Level 1: {:?}", level1_values);
38
39    println!("--- Swapping Levels ---");
40    let swapped = multi_idx.swaplevel(0, 1)?;
41    println!("After Swapping Levels: {:?}\n", swapped);
42
43    // =========================================
44    // DataFrame with MultiIndex
45    // =========================================
46
47    println!("--- DataFrame with MultiIndex ---");
48
49    // Create DataFrame
50    let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52    // Add data
53    let data = vec![
54        "data1".to_string(),
55        "data2".to_string(),
56        "data3".to_string(),
57        "data4".to_string(),
58    ];
59    df.add_column(
60        "data".to_string(),
61        pandrs::Series::new(data, Some("data".to_string()))?,
62    )?;
63
64    println!("DataFrame: {:?}\n", df);
65    println!("Number of Rows: {}", df.row_count());
66    println!("Number of Columns: {}", df.column_count());
67
68    // =========================================
69    // Conversion Between Simple Index and MultiIndex
70    // =========================================
71
72    println!("\n--- Example of Index Conversion ---");
73
74    // Create DataFrame from simple index
75    let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76    let mut simple_df = DataFrame::with_index(simple_idx);
77
78    // Add data
79    let values = vec![100, 200, 300];
80    let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81    simple_df.add_column(
82        "values".to_string(),
83        pandrs::Series::new(str_values, Some("values".to_string()))?,
84    )?;
85
86    println!("Simple Index DataFrame: {:?}", simple_df);
87
88    // Prepare for conversion to MultiIndex
89    let tuples = vec![
90        vec!["Category".to_string(), "X".to_string()],
91        vec!["Category".to_string(), "Y".to_string()],
92        vec!["Category".to_string(), "Z".to_string()],
93    ];
94
95    // Create and set MultiIndex
96    let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97    simple_df.set_multi_index(new_multi_idx)?;
98
99    println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101    println!("\n=== Sample Complete ===");
102    Ok(())
103}
Source

pub fn contains_column(&self, column_name: &str) -> bool

Check if the DataFrame contains a column with the given name

Source

pub fn row_count(&self) -> usize

Get the number of rows in the DataFrame

Examples found in repository?
examples/basic_usage.rs (line 39)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
More examples
Hide additional examples
examples/pivot_example.rs (line 52)
5fn main() -> Result<()> {
6    println!("=== Pivot Table and Grouping Example ===");
7
8    // Create sample data
9    let mut df = DataFrame::new();
10
11    // Create column data
12    let category = Series::new(
13        vec![
14            "A".to_string(),
15            "B".to_string(),
16            "A".to_string(),
17            "C".to_string(),
18            "B".to_string(),
19            "A".to_string(),
20            "C".to_string(),
21            "B".to_string(),
22        ],
23        Some("category".to_string()),
24    )?;
25
26    let region = Series::new(
27        vec![
28            "East".to_string(),
29            "West".to_string(),
30            "West".to_string(),
31            "East".to_string(),
32            "East".to_string(),
33            "West".to_string(),
34            "West".to_string(),
35            "East".to_string(),
36        ],
37        Some("region".to_string()),
38    )?;
39
40    let sales = Series::new(
41        vec![100, 150, 200, 120, 180, 90, 250, 160],
42        Some("sales".to_string()),
43    )?;
44
45    // Add columns to DataFrame
46    df.add_column("category".to_string(), category)?;
47    df.add_column("region".to_string(), region)?;
48    df.add_column("sales".to_string(), sales)?;
49
50    println!("DataFrame Info:");
51    println!("  Number of columns: {}", df.column_count());
52    println!("  Number of rows: {}", df.row_count());
53    println!("  Column names: {:?}", df.column_names());
54
55    // Grouping and aggregation
56    println!("\n=== Grouping by Category ===");
57    let category_group = df.groupby("category")?;
58
59    println!("Sum by category (in progress):");
60    let _category_sum = category_group.sum(&["sales"])?;
61
62    // Pivot table (in progress)
63    println!("\n=== Pivot Table ===");
64    println!("Sum of sales by category and region (in progress):");
65    let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67    // Note: Pivot table and grouping features are still under development,
68    // so actual results are not displayed
69
70    println!("\n=== Aggregation Function Examples ===");
71    let functions = [
72        AggFunction::Sum,
73        AggFunction::Mean,
74        AggFunction::Min,
75        AggFunction::Max,
76        AggFunction::Count,
77    ];
78
79    for func in &functions {
80        println!(
81            "Aggregation Function: {} ({})",
82            func.name(),
83            match func {
84                AggFunction::Sum => "Sum",
85                AggFunction::Mean => "Mean",
86                AggFunction::Min => "Min",
87                AggFunction::Max => "Max",
88                AggFunction::Count => "Count",
89            }
90        );
91    }
92
93    println!("\n=== Pivot Table Example (Complete) ===");
94    Ok(())
95}
examples/multi_index_example.rs (line 65)
4fn main() -> Result<()> {
5    println!("=== Example of Using MultiIndex ===\n");
6
7    // =========================================
8    // Creating a MultiIndex
9    // =========================================
10
11    println!("--- Creating MultiIndex from Tuples ---");
12
13    // Create MultiIndex from tuples (vector of vectors)
14    let tuples = vec![
15        vec!["A".to_string(), "a".to_string()],
16        vec!["A".to_string(), "b".to_string()],
17        vec!["B".to_string(), "a".to_string()],
18        vec!["B".to_string(), "b".to_string()],
19    ];
20
21    let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22    let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24    println!("MultiIndex: {:?}\n", multi_idx);
25    println!("Number of Levels: {}", multi_idx.n_levels());
26    println!("Number of Rows: {}\n", multi_idx.len());
27
28    // =========================================
29    // Operations on MultiIndex
30    // =========================================
31
32    println!("--- Retrieving Level Values ---");
33    let level0_values = multi_idx.get_level_values(0)?;
34    println!("Values in Level 0: {:?}", level0_values);
35
36    let level1_values = multi_idx.get_level_values(1)?;
37    println!("Values in Level 1: {:?}", level1_values);
38
39    println!("--- Swapping Levels ---");
40    let swapped = multi_idx.swaplevel(0, 1)?;
41    println!("After Swapping Levels: {:?}\n", swapped);
42
43    // =========================================
44    // DataFrame with MultiIndex
45    // =========================================
46
47    println!("--- DataFrame with MultiIndex ---");
48
49    // Create DataFrame
50    let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52    // Add data
53    let data = vec![
54        "data1".to_string(),
55        "data2".to_string(),
56        "data3".to_string(),
57        "data4".to_string(),
58    ];
59    df.add_column(
60        "data".to_string(),
61        pandrs::Series::new(data, Some("data".to_string()))?,
62    )?;
63
64    println!("DataFrame: {:?}\n", df);
65    println!("Number of Rows: {}", df.row_count());
66    println!("Number of Columns: {}", df.column_count());
67
68    // =========================================
69    // Conversion Between Simple Index and MultiIndex
70    // =========================================
71
72    println!("\n--- Example of Index Conversion ---");
73
74    // Create DataFrame from simple index
75    let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76    let mut simple_df = DataFrame::with_index(simple_idx);
77
78    // Add data
79    let values = vec![100, 200, 300];
80    let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81    simple_df.add_column(
82        "values".to_string(),
83        pandrs::Series::new(str_values, Some("values".to_string()))?,
84    )?;
85
86    println!("Simple Index DataFrame: {:?}", simple_df);
87
88    // Prepare for conversion to MultiIndex
89    let tuples = vec![
90        vec!["Category".to_string(), "X".to_string()],
91        vec!["Category".to_string(), "Y".to_string()],
92        vec!["Category".to_string(), "Z".to_string()],
93    ];
94
95    // Create and set MultiIndex
96    let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97    simple_df.set_multi_index(new_multi_idx)?;
98
99    println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101    println!("\n=== Sample Complete ===");
102    Ok(())
103}
examples/large_dataset_example.rs (line 35)
6fn main() -> Result<()> {
7    // Path to a large CSV file (replace with actual path)
8    let file_path = "examples/data/large_dataset.csv";
9
10    println!("Working with large datasets example");
11    println!("----------------------------------");
12
13    // Create a disk-based DataFrame with custom configuration
14    let config = DiskConfig {
15        memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16        chunk_size: 50_000,              // Process in chunks of 50,000 rows
17        use_memory_mapping: true,        // Use memory mapping for efficiency
18        temp_dir: None,                  // Use system temp directory
19    };
20
21    let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23    // Get schema information
24    println!("DataFrame Schema:");
25    for column in disk_df.schema().column_names() {
26        println!("  - {}", column);
27    }
28
29    // Process in chunks for counting rows
30    let mut chunked_df = disk_df.chunked()?;
31    let mut total_rows = 0;
32
33    println!("\nProcessing in chunks:");
34    while let Some(chunk) = chunked_df.next_chunk()? {
35        let chunk_rows = chunk.row_count();
36        total_rows += chunk_rows;
37        println!("  - Processed chunk with {} rows", chunk_rows);
38    }
39
40    println!("\nTotal rows in dataset: {}", total_rows);
41
42    // Example of filtering data
43    println!("\nFiltering data:");
44    let filtered = disk_df.filter(|value, _| {
45        // Example filter: keep only values starting with 'A'
46        value.starts_with('A')
47    })?;
48
49    println!("Filtered result has {} rows", filtered.len());
50
51    // Example of selecting columns
52    println!("\nSelecting columns:");
53    let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54    let selected = disk_df.select(&columns_to_select)?;
55
56    println!("Selected result has {} rows and columns:", selected.len());
57    // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58    if !selected.is_empty() {
59        for column in selected[0].keys() {
60            println!("  - {}", column);
61        }
62    }
63
64    // Example of grouping and aggregation
65    println!("\nGrouping and aggregation:");
66    let grouped = disk_df.group_by("category_column", "value_column", |values| {
67        // Example aggregation: calculate average
68        let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69        let count = values.len();
70
71        if count > 0 {
72            Ok(format!("{:.2}", sum / count as f64))
73        } else {
74            Ok("0.0".to_string())
75        }
76    })?;
77
78    println!("Grouped result has {} groups", grouped.len());
79
80    // Example of parallel processing
81    println!("\nParallel processing example:");
82    let chunk_results = chunked_df.parallel_process(
83        // Process each chunk
84        |chunk| {
85            let mut counts = HashMap::new();
86
87            // Example: count occurrences of values in a column
88            for row_idx in 0..chunk.row_count() {
89                if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90                    *counts.entry(value.to_string()).or_insert(0) += 1;
91                }
92            }
93
94            Ok(counts)
95        },
96        // Combine results
97        |chunk_maps| {
98            let mut result_map = HashMap::new();
99
100            // Merge all maps
101            for chunk_map in chunk_maps {
102                for (key, count) in chunk_map {
103                    *result_map.entry(key).or_insert(0) += count;
104                }
105            }
106
107            Ok(result_map)
108        },
109    )?;
110
111    println!("Category counts from parallel processing:");
112    for (category, count) in chunk_results.iter().take(5) {
113        println!("  - {}: {}", category, count);
114    }
115
116    Ok(())
117}
examples/parallel_example.rs (line 76)
4fn main() -> Result<(), Box<dyn Error>> {
5    println!("=== Example of Parallel Processing Features ===\n");
6
7    // Create sample data
8    let numbers = Series::new(
9        vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10        Some("numbers".to_string()),
11    )?;
12
13    // Parallel map: square each number
14    println!("Example of parallel map processing:");
15    let squared = numbers.par_map(|x| x * x);
16    println!("Original values: {:?}", numbers.values());
17    println!("Squared values: {:?}", squared.values());
18
19    // Parallel filter: keep only even numbers
20    println!("\nExample of parallel filtering:");
21    let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22    println!("Even Numbers: {:?}", even_numbers.values());
23
24    // Processing data containing NA
25    let na_data = vec![
26        NA::Value(10),
27        NA::Value(20),
28        NA::NA,
29        NA::Value(40),
30        NA::NA,
31        NA::Value(60),
32    ];
33    let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35    println!("\nParallel processing of data containing NA:");
36    let na_tripled = na_series.par_map(|x| x * 3);
37    println!("Original values: {:?}", na_series.values());
38    println!("Tripled values: {:?}", na_tripled.values());
39
40    // Parallel processing of DataFrame
41    println!("\nParallel processing of DataFrame:");
42
43    // Creating a sample DataFrame
44    let mut df = DataFrame::new();
45    let names = Series::new(
46        vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47        Some("name".to_string()),
48    )?;
49    let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50    let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52    df.add_column("name".to_string(), names)?;
53    df.add_column("age".to_string(), ages)?;
54    df.add_column("score".to_string(), scores)?;
55
56    // Parallel transformation of DataFrame
57    println!("Example of DataFrame.par_apply:");
58    let transformed_df = df.par_apply(|col, _row, val| {
59        match col {
60            "age" => {
61                // Add 1 to age
62                let age: i32 = val.parse().unwrap_or(0);
63                (age + 1).to_string()
64            }
65            "score" => {
66                // Add 5 to score
67                let score: i32 = val.parse().unwrap_or(0);
68                (score + 5).to_string()
69            }
70            _ => val.to_string(),
71        }
72    })?;
73
74    println!(
75        "Original DF row count: {}, column count: {}",
76        df.row_count(),
77        df.column_count()
78    );
79    println!(
80        "Transformed DF row count: {}, column count: {}",
81        transformed_df.row_count(),
82        transformed_df.column_count()
83    );
84
85    // Filtering rows
86    println!("\nExample of DataFrame.par_filter_rows:");
87    let filtered_df = df.par_filter_rows(|row| {
88        // Keep only rows where score > 85
89        if let Ok(values) = df.get_column_numeric_values("score") {
90            if row < values.len() {
91                return values[row] > 85.0;
92            }
93        }
94        false
95    })?;
96
97    println!("Row count after filtering: {}", filtered_df.row_count());
98
99    // Example of using ParallelUtils
100    println!("\nExample of ParallelUtils features:");
101
102    let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103    let sorted = ParallelUtils::par_sort(unsorted.clone());
104    println!("Before sorting: {:?}", unsorted);
105    println!("After sorting: {:?}", sorted);
106
107    let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108    let sum = ParallelUtils::par_sum(&numbers_vec);
109    let mean = ParallelUtils::par_mean(&numbers_vec);
110    let min = ParallelUtils::par_min(&numbers_vec);
111    let max = ParallelUtils::par_max(&numbers_vec);
112
113    println!("Sum: {}", sum);
114    println!("Mean: {}", mean.unwrap());
115    println!("Min: {}", min.unwrap());
116    println!("Max: {}", max.unwrap());
117
118    println!("\n=== Example of Parallel Processing Features Complete ===");
119    Ok(())
120}
examples/categorical_na_example.rs (line 129)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
Source

pub fn nrows(&self) -> usize

Get the number of rows (alias for compatibility)

Source

pub fn get_string_value( &self, column_name: &str, row_idx: usize, ) -> Result<&str>

Get a string value from the DataFrame

Examples found in repository?
examples/large_dataset_example.rs (line 89)
6fn main() -> Result<()> {
7    // Path to a large CSV file (replace with actual path)
8    let file_path = "examples/data/large_dataset.csv";
9
10    println!("Working with large datasets example");
11    println!("----------------------------------");
12
13    // Create a disk-based DataFrame with custom configuration
14    let config = DiskConfig {
15        memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16        chunk_size: 50_000,              // Process in chunks of 50,000 rows
17        use_memory_mapping: true,        // Use memory mapping for efficiency
18        temp_dir: None,                  // Use system temp directory
19    };
20
21    let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23    // Get schema information
24    println!("DataFrame Schema:");
25    for column in disk_df.schema().column_names() {
26        println!("  - {}", column);
27    }
28
29    // Process in chunks for counting rows
30    let mut chunked_df = disk_df.chunked()?;
31    let mut total_rows = 0;
32
33    println!("\nProcessing in chunks:");
34    while let Some(chunk) = chunked_df.next_chunk()? {
35        let chunk_rows = chunk.row_count();
36        total_rows += chunk_rows;
37        println!("  - Processed chunk with {} rows", chunk_rows);
38    }
39
40    println!("\nTotal rows in dataset: {}", total_rows);
41
42    // Example of filtering data
43    println!("\nFiltering data:");
44    let filtered = disk_df.filter(|value, _| {
45        // Example filter: keep only values starting with 'A'
46        value.starts_with('A')
47    })?;
48
49    println!("Filtered result has {} rows", filtered.len());
50
51    // Example of selecting columns
52    println!("\nSelecting columns:");
53    let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54    let selected = disk_df.select(&columns_to_select)?;
55
56    println!("Selected result has {} rows and columns:", selected.len());
57    // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58    if !selected.is_empty() {
59        for column in selected[0].keys() {
60            println!("  - {}", column);
61        }
62    }
63
64    // Example of grouping and aggregation
65    println!("\nGrouping and aggregation:");
66    let grouped = disk_df.group_by("category_column", "value_column", |values| {
67        // Example aggregation: calculate average
68        let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69        let count = values.len();
70
71        if count > 0 {
72            Ok(format!("{:.2}", sum / count as f64))
73        } else {
74            Ok("0.0".to_string())
75        }
76    })?;
77
78    println!("Grouped result has {} groups", grouped.len());
79
80    // Example of parallel processing
81    println!("\nParallel processing example:");
82    let chunk_results = chunked_df.parallel_process(
83        // Process each chunk
84        |chunk| {
85            let mut counts = HashMap::new();
86
87            // Example: count occurrences of values in a column
88            for row_idx in 0..chunk.row_count() {
89                if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90                    *counts.entry(value.to_string()).or_insert(0) += 1;
91                }
92            }
93
94            Ok(counts)
95        },
96        // Combine results
97        |chunk_maps| {
98            let mut result_map = HashMap::new();
99
100            // Merge all maps
101            for chunk_map in chunk_maps {
102                for (key, count) in chunk_map {
103                    *result_map.entry(key).or_insert(0) += count;
104                }
105            }
106
107            Ok(result_map)
108        },
109    )?;
110
111    println!("Category counts from parallel processing:");
112    for (category, count) in chunk_results.iter().take(5) {
113        println!("  - {}: {}", category, count);
114    }
115
116    Ok(())
117}
Source

pub fn add_column<T: 'static + Debug + Clone + Send + Sync>( &mut self, column_name: String, series: Series<T>, ) -> Result<()>

Add a column to the DataFrame

Examples found in repository?
examples/dataframe_window_example.rs (line 98)
81fn create_sample_dataframe() -> Result<DataFrame> {
82    let mut df = DataFrame::new();
83
84    // Add date column
85    let dates = vec![
86        "2023-01-01",
87        "2023-01-02",
88        "2023-01-03",
89        "2023-01-04",
90        "2023-01-05",
91        "2023-01-06",
92        "2023-01-07",
93        "2023-01-08",
94        "2023-01-09",
95        "2023-01-10",
96    ];
97    let date_series = Series::new(dates, Some("Date".to_string()))?;
98    df.add_column("Date".to_string(), date_series)?;
99
100    // Add product column
101    let products = vec![
102        "ProductA", "ProductB", "ProductA", "ProductC", "ProductB", "ProductA", "ProductC",
103        "ProductA", "ProductB", "ProductC",
104    ];
105    let product_series = Series::new(products, Some("Product".to_string()))?;
106    df.add_column("Product".to_string(), product_series)?;
107
108    // Add price column
109    let prices = vec![
110        "100", "150", "110", "200", "160", "120", "210", "115", "165", "220",
111    ];
112    let price_series = Series::new(prices, Some("Price".to_string()))?;
113    df.add_column("Price".to_string(), price_series)?;
114
115    // Add quantity column
116    let quantities = vec!["5", "3", "6", "2", "4", "7", "3", "8", "5", "4"];
117    let quantity_series = Series::new(quantities, Some("Quantity".to_string()))?;
118    df.add_column("Quantity".to_string(), quantity_series)?;
119
120    Ok(df)
121}
More examples
Hide additional examples
examples/gpu_dataframe_api_example.rs (line 125)
96fn create_sample_dataframe(size: usize) -> Result<DataFrame> {
97    // Create data for columns
98    let mut x1 = Vec::with_capacity(size);
99    let mut x2 = Vec::with_capacity(size);
100    let mut x3 = Vec::with_capacity(size);
101    let mut x4 = Vec::with_capacity(size);
102    let mut y = Vec::with_capacity(size);
103
104    for i in 0..size {
105        // Create features with some correlation to the target
106        let x1_val = (i % 100) as f64 / 100.0;
107        let x2_val = ((i * 2) % 100) as f64 / 100.0;
108        let x3_val = ((i * 3) % 100) as f64 / 100.0;
109        let x4_val = ((i * 5) % 100) as f64 / 100.0;
110
111        // Create a target variable that depends on the features
112        let y_val = 2.0 * x1_val + 1.5 * x2_val - 0.5 * x3_val
113            + 3.0 * x4_val
114            + 0.1 * (rand::random::<f64>() - 0.5); // Add some noise
115
116        x1.push(x1_val);
117        x2.push(x2_val);
118        x3.push(x3_val);
119        x4.push(x4_val);
120        y.push(y_val);
121    }
122
123    // Create DataFrame
124    let mut df = DataFrame::new();
125    df.add_column("x1".to_string(), Series::new(x1, Some("x1".to_string()))?)?;
126    df.add_column("x2".to_string(), Series::new(x2, Some("x2".to_string()))?)?;
127    df.add_column("x3".to_string(), Series::new(x3, Some("x3".to_string()))?)?;
128    df.add_column("x4".to_string(), Series::new(x4, Some("x4".to_string()))?)?;
129    df.add_column("y".to_string(), Series::new(y, Some("y".to_string()))?)?;
130
131    Ok(df)
132}
examples/stats_example.rs (line 33)
22fn descriptive_stats_example() -> Result<()> {
23    println!("1. Descriptive Statistics Sample");
24    println!("-----------------");
25
26    // Create dataset
27    let mut df = DataFrame::new();
28    let values = Series::new(
29        vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30        Some("Values".to_string()),
31    )?;
32
33    df.add_column("Values".to_string(), values)?;
34
35    // Descriptive statistics
36    let stats = pandrs::stats::describe(
37        df.get_column("Values")
38            .unwrap()
39            .values()
40            .iter()
41            .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42            .collect::<Vec<f64>>(),
43    )?;
44
45    // Display results
46    println!("Count: {}", stats.count);
47    println!("Mean: {:.2}", stats.mean);
48    println!("Standard Deviation: {:.2}", stats.std);
49    println!("Min: {:.2}", stats.min);
50    println!("First Quartile: {:.2}", stats.q1);
51    println!("Median: {:.2}", stats.median);
52    println!("Third Quartile: {:.2}", stats.q3);
53    println!("Max: {:.2}", stats.max);
54
55    // Covariance and correlation coefficient
56    let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57    let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59    let cov = pandrs::stats::covariance(&data1, &data2)?;
60    let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62    println!("\nCovariance and Correlation Coefficient:");
63    println!("Covariance: {:.4}", cov);
64    println!("Correlation Coefficient: {:.4}", corr);
65
66    println!();
67    Ok(())
68}
69
70fn ttest_example() -> Result<()> {
71    println!("2. t-test Sample");
72    println!("--------------");
73
74    // Create sample data
75    let group1 = vec![5.2, 5.8, 6.1, 5.5, 5.9, 6.2, 5.7, 6.0, 5.6, 5.8];
76    let group2 = vec![4.8, 5.1, 5.3, 4.9, 5.0, 5.2, 4.7, 5.1, 4.9, 5.0];
77
78    // Perform t-test with significance level 0.05 (5%)
79    let alpha = 0.05;
80
81    // t-test assuming equal variances
82    let result_equal = pandrs::stats::ttest(&group1, &group2, alpha, true)?;
83
84    println!("t-test result assuming equal variances:");
85    print_ttest_result(&result_equal);
86
87    // Welch's t-test (not assuming equal variances)
88    let result_welch = pandrs::stats::ttest(&group1, &group2, alpha, false)?;
89
90    println!("\nWelch's t-test result (not assuming equal variances):");
91    print_ttest_result(&result_welch);
92
93    println!();
94    Ok(())
95}
96
97fn print_ttest_result(result: &TTestResult) {
98    println!("t-statistic: {:.4}", result.statistic);
99    println!("p-value: {:.4}", result.pvalue);
100    println!("Degrees of Freedom: {}", result.df);
101    println!(
102        "Significant: {}",
103        if result.significant { "Yes" } else { "No" }
104    );
105}
106
107fn regression_example() -> Result<()> {
108    println!("3. Regression Analysis Sample");
109    println!("-----------------");
110
111    // Create dataset
112    let mut df = DataFrame::new();
113
114    // Explanatory variables
115    let x1 = Series::new(
116        vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
117        Some("x1".to_string()),
118    )?;
119    let x2 = Series::new(
120        vec![5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0, 32.0],
121        Some("x2".to_string()),
122    )?;
123
124    // Dependent variable (y = 2*x1 + 1.5*x2 + 3 + noise)
125    let mut y_values = Vec::with_capacity(10);
126    let mut rng = rand::rng();
127
128    for i in 0..10 {
129        let noise = rng.random_range(-1.0..1.0);
130        let y_val = 2.0 * (i as f64 + 1.0) + 1.5 * (5.0 + 3.0 * i as f64) + 3.0 + noise;
131        y_values.push(y_val);
132    }
133
134    let y = Series::new(y_values, Some("y".to_string()))?;
135
136    // Add to DataFrame
137    df.add_column("x1".to_string(), x1)?;
138    df.add_column("x2".to_string(), x2)?;
139    df.add_column("y".to_string(), y)?;
140
141    // Perform regression analysis
142    let model = pandrs::stats::linear_regression(&df, "y", &["x1", "x2"])?;
143
144    // Display results
145    println!(
146        "Linear Regression Model: y = {:.4} + {:.4} × x1 + {:.4} × x2",
147        model.intercept, model.coefficients[0], model.coefficients[1]
148    );
149    println!("R²: {:.4}", model.r_squared);
150    println!("Adjusted R²: {:.4}", model.adj_r_squared);
151    println!("p-values of regression coefficients: {:?}", model.p_values);
152
153    // Simple regression example
154    println!("\nSimple Regression Model (x1 only):");
155    let model_simple = pandrs::stats::linear_regression(&df, "y", &["x1"])?;
156    println!(
157        "Linear Regression Model: y = {:.4} + {:.4} × x1",
158        model_simple.intercept, model_simple.coefficients[0]
159    );
160    println!("R²: {:.4}", model_simple.r_squared);
161
162    Ok(())
163}
examples/basic_usage.rs (line 33)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
examples/parquet_example.rs (line 10)
4fn main() -> Result<(), Box<dyn Error>> {
5    // Create a sample DataFrame
6    let mut df = DataFrame::new();
7
8    // Add an integer column
9    let int_data = Series::new(vec![1, 2, 3, 4, 5], Some("id".to_string()))?;
10    df.add_column("id".to_string(), int_data)?;
11
12    // Add a floating-point column
13    let float_data = Series::new(vec![1.1, 2.2, 3.3, 4.4, 5.5], Some("value".to_string()))?;
14    df.add_column("value".to_string(), float_data)?;
15
16    // Add a string column
17    let string_data = Series::new(
18        vec![
19            "A".to_string(),
20            "B".to_string(),
21            "C".to_string(),
22            "D".to_string(),
23            "E".to_string(),
24        ],
25        Some("category".to_string()),
26    )?;
27    df.add_column("category".to_string(), string_data)?;
28
29    println!("Original DataFrame:");
30    println!("{:?}", df);
31
32    // Parquet support is still under development
33    println!("\nNote: Parquet support is currently under development.");
34    println!("It is planned to be available in a future release.");
35
36    /*
37    // Although Parquet functionality is not yet implemented, dependencies have been introduced.
38    // The following code is expected to work in a future version.
39
40    // Write the DataFrame to a Parquet file
41    let path = "example.parquet";
42    match write_parquet(&df, path, Some(ParquetCompression::Snappy)) {
43        Ok(_) => {
44            println!("DataFrame written to {}", path);
45
46            // Read the DataFrame from the Parquet file
47            match read_parquet(path) {
48                Ok(df_read) => {
49                    println!("\nDataFrame read from Parquet file:");
50                    println!("{:?}", df_read);
51
52                    // Verify the results
53                    assert_eq!(df.row_count(), df_read.row_count());
54                    assert_eq!(df.column_count(), df_read.column_count());
55
56                    println!("\nVerification successful: Data matches");
57                },
58                Err(e) => println!("Error reading Parquet file: {}", e),
59            }
60        },
61        Err(e) => println!("Error writing Parquet file: {}", e),
62    }
63    */
64
65    Ok(())
66}
examples/benchmark_million.rs (line 57)
6fn main() -> Result<()> {
7    println!("=== Benchmark with One Million Rows ===\n");
8
9    // Benchmark function
10    fn bench<F>(name: &str, f: F) -> Duration
11    where
12        F: FnOnce() -> (),
13    {
14        println!("Running: {}", name);
15        let start = Instant::now();
16        f();
17        let duration = start.elapsed();
18        println!("  Completed: {:?}\n", duration);
19        duration
20    }
21
22    // Benchmark for creating a DataFrame with one million rows
23    println!("--- DataFrame with One Million Rows ---");
24
25    bench("Creating Series x3 (One Million Rows)", || {
26        let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27        let _ = Series::new(
28            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29            Some("B".to_string()),
30        )
31        .unwrap();
32        let _ = Series::new(
33            (0..1_000_000)
34                .map(|i| format!("val_{}", i))
35                .collect::<Vec<_>>(),
36            Some("C".to_string()),
37        )
38        .unwrap();
39    });
40
41    let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42        let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43        let col_b = Series::new(
44            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45            Some("B".to_string()),
46        )
47        .unwrap();
48        let col_c = Series::new(
49            (0..1_000_000)
50                .map(|i| format!("val_{}", i))
51                .collect::<Vec<_>>(),
52            Some("C".to_string()),
53        )
54        .unwrap();
55
56        let mut df = DataFrame::new();
57        df.add_column("A".to_string(), col_a).unwrap();
58        df.add_column("B".to_string(), col_b).unwrap();
59        df.add_column("C".to_string(), col_c).unwrap();
60    });
61
62    bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63        let mut data = HashMap::new();
64        data.insert(
65            "A".to_string(),
66            (0..1_000_000).map(|n| n.to_string()).collect(),
67        );
68        data.insert(
69            "B".to_string(),
70            (0..1_000_000)
71                .map(|n| format!("{:.1}", n as f64 * 0.5))
72                .collect(),
73        );
74        data.insert(
75            "C".to_string(),
76            (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77        );
78
79        let _ = DataFrame::from_map(data, None).unwrap();
80    });
81
82    println!(
83        "Time to create DataFrame with one million rows in pure Rust: {:?}",
84        large_duration
85    );
86
87    Ok(())
88}
Source

pub fn column_names(&self) -> Vec<String>

Get column names in the DataFrame

Examples found in repository?
examples/basic_usage.rs (line 40)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
More examples
Hide additional examples
examples/pivot_example.rs (line 53)
5fn main() -> Result<()> {
6    println!("=== Pivot Table and Grouping Example ===");
7
8    // Create sample data
9    let mut df = DataFrame::new();
10
11    // Create column data
12    let category = Series::new(
13        vec![
14            "A".to_string(),
15            "B".to_string(),
16            "A".to_string(),
17            "C".to_string(),
18            "B".to_string(),
19            "A".to_string(),
20            "C".to_string(),
21            "B".to_string(),
22        ],
23        Some("category".to_string()),
24    )?;
25
26    let region = Series::new(
27        vec![
28            "East".to_string(),
29            "West".to_string(),
30            "West".to_string(),
31            "East".to_string(),
32            "East".to_string(),
33            "West".to_string(),
34            "West".to_string(),
35            "East".to_string(),
36        ],
37        Some("region".to_string()),
38    )?;
39
40    let sales = Series::new(
41        vec![100, 150, 200, 120, 180, 90, 250, 160],
42        Some("sales".to_string()),
43    )?;
44
45    // Add columns to DataFrame
46    df.add_column("category".to_string(), category)?;
47    df.add_column("region".to_string(), region)?;
48    df.add_column("sales".to_string(), sales)?;
49
50    println!("DataFrame Info:");
51    println!("  Number of columns: {}", df.column_count());
52    println!("  Number of rows: {}", df.row_count());
53    println!("  Column names: {:?}", df.column_names());
54
55    // Grouping and aggregation
56    println!("\n=== Grouping by Category ===");
57    let category_group = df.groupby("category")?;
58
59    println!("Sum by category (in progress):");
60    let _category_sum = category_group.sum(&["sales"])?;
61
62    // Pivot table (in progress)
63    println!("\n=== Pivot Table ===");
64    println!("Sum of sales by category and region (in progress):");
65    let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67    // Note: Pivot table and grouping features are still under development,
68    // so actual results are not displayed
69
70    println!("\n=== Aggregation Function Examples ===");
71    let functions = [
72        AggFunction::Sum,
73        AggFunction::Mean,
74        AggFunction::Min,
75        AggFunction::Max,
76        AggFunction::Count,
77    ];
78
79    for func in &functions {
80        println!(
81            "Aggregation Function: {} ({})",
82            func.name(),
83            match func {
84                AggFunction::Sum => "Sum",
85                AggFunction::Mean => "Mean",
86                AggFunction::Min => "Min",
87                AggFunction::Max => "Max",
88                AggFunction::Count => "Count",
89            }
90        );
91    }
92
93    println!("\n=== Pivot Table Example (Complete) ===");
94    Ok(())
95}
examples/large_dataset_example.rs (line 25)
6fn main() -> Result<()> {
7    // Path to a large CSV file (replace with actual path)
8    let file_path = "examples/data/large_dataset.csv";
9
10    println!("Working with large datasets example");
11    println!("----------------------------------");
12
13    // Create a disk-based DataFrame with custom configuration
14    let config = DiskConfig {
15        memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16        chunk_size: 50_000,              // Process in chunks of 50,000 rows
17        use_memory_mapping: true,        // Use memory mapping for efficiency
18        temp_dir: None,                  // Use system temp directory
19    };
20
21    let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23    // Get schema information
24    println!("DataFrame Schema:");
25    for column in disk_df.schema().column_names() {
26        println!("  - {}", column);
27    }
28
29    // Process in chunks for counting rows
30    let mut chunked_df = disk_df.chunked()?;
31    let mut total_rows = 0;
32
33    println!("\nProcessing in chunks:");
34    while let Some(chunk) = chunked_df.next_chunk()? {
35        let chunk_rows = chunk.row_count();
36        total_rows += chunk_rows;
37        println!("  - Processed chunk with {} rows", chunk_rows);
38    }
39
40    println!("\nTotal rows in dataset: {}", total_rows);
41
42    // Example of filtering data
43    println!("\nFiltering data:");
44    let filtered = disk_df.filter(|value, _| {
45        // Example filter: keep only values starting with 'A'
46        value.starts_with('A')
47    })?;
48
49    println!("Filtered result has {} rows", filtered.len());
50
51    // Example of selecting columns
52    println!("\nSelecting columns:");
53    let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54    let selected = disk_df.select(&columns_to_select)?;
55
56    println!("Selected result has {} rows and columns:", selected.len());
57    // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58    if !selected.is_empty() {
59        for column in selected[0].keys() {
60            println!("  - {}", column);
61        }
62    }
63
64    // Example of grouping and aggregation
65    println!("\nGrouping and aggregation:");
66    let grouped = disk_df.group_by("category_column", "value_column", |values| {
67        // Example aggregation: calculate average
68        let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69        let count = values.len();
70
71        if count > 0 {
72            Ok(format!("{:.2}", sum / count as f64))
73        } else {
74            Ok("0.0".to_string())
75        }
76    })?;
77
78    println!("Grouped result has {} groups", grouped.len());
79
80    // Example of parallel processing
81    println!("\nParallel processing example:");
82    let chunk_results = chunked_df.parallel_process(
83        // Process each chunk
84        |chunk| {
85            let mut counts = HashMap::new();
86
87            // Example: count occurrences of values in a column
88            for row_idx in 0..chunk.row_count() {
89                if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90                    *counts.entry(value.to_string()).or_insert(0) += 1;
91                }
92            }
93
94            Ok(counts)
95        },
96        // Combine results
97        |chunk_maps| {
98            let mut result_map = HashMap::new();
99
100            // Merge all maps
101            for chunk_map in chunk_maps {
102                for (key, count) in chunk_map {
103                    *result_map.entry(key).or_insert(0) += count;
104                }
105            }
106
107            Ok(result_map)
108        },
109    )?;
110
111    println!("Category counts from parallel processing:");
112    for (category, count) in chunk_results.iter().take(5) {
113        println!("  - {}: {}", category, count);
114    }
115
116    Ok(())
117}
Source

pub fn get_column<T: 'static + Debug + Clone + Send + Sync>( &self, column_name: &str, ) -> Result<&Series<T>>

Get a column from the DataFrame with generic type

Examples found in repository?
examples/stats_example.rs (line 37)
22fn descriptive_stats_example() -> Result<()> {
23    println!("1. Descriptive Statistics Sample");
24    println!("-----------------");
25
26    // Create dataset
27    let mut df = DataFrame::new();
28    let values = Series::new(
29        vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30        Some("Values".to_string()),
31    )?;
32
33    df.add_column("Values".to_string(), values)?;
34
35    // Descriptive statistics
36    let stats = pandrs::stats::describe(
37        df.get_column("Values")
38            .unwrap()
39            .values()
40            .iter()
41            .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42            .collect::<Vec<f64>>(),
43    )?;
44
45    // Display results
46    println!("Count: {}", stats.count);
47    println!("Mean: {:.2}", stats.mean);
48    println!("Standard Deviation: {:.2}", stats.std);
49    println!("Min: {:.2}", stats.min);
50    println!("First Quartile: {:.2}", stats.q1);
51    println!("Median: {:.2}", stats.median);
52    println!("Third Quartile: {:.2}", stats.q3);
53    println!("Max: {:.2}", stats.max);
54
55    // Covariance and correlation coefficient
56    let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57    let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59    let cov = pandrs::stats::covariance(&data1, &data2)?;
60    let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62    println!("\nCovariance and Correlation Coefficient:");
63    println!("Covariance: {:.4}", cov);
64    println!("Correlation Coefficient: {:.4}", corr);
65
66    println!();
67    Ok(())
68}
More examples
Hide additional examples
examples/categorical_na_example.rs (line 183)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
Source

pub fn get_column_string_values(&self, column_name: &str) -> Result<Vec<String>>

Get string values from a column (stub implementation for tests)

Examples found in repository?
examples/benchmark_comparison.rs (line 105)
38fn run_benchmark_suite() {
39    // Header
40    println!("\n=== PandRS Performance Optimization Benchmark ===\n");
41
42    // Benchmark data sizes
43    let sizes = [1000, 10_000, 100_000, 1_000_000];
44
45    for &size in &sizes {
46        println!("\n## Data Size: {} rows ##", size);
47
48        // Data preparation
49        let int_data: Vec<i32> = (0..size).collect();
50        let float_data: Vec<f64> = (0..size).map(|i| i as f64 * 0.5).collect();
51        let string_data: Vec<String> = (0..size).map(|i| format!("val_{}", i % 100)).collect();
52
53        // Legacy implementation: Series creation
54        let (legacy_series_time, (legacy_int_series, legacy_float_series, legacy_string_series)) =
55            bench("Legacy Implementation - Series Creation", || {
56                let int_series =
57                    Series::new(int_data.clone(), Some("int_col".to_string())).unwrap();
58                let float_series =
59                    Series::new(float_data.clone(), Some("float_col".to_string())).unwrap();
60                let string_series =
61                    Series::new(string_data.clone(), Some("string_col".to_string())).unwrap();
62                (int_series, float_series, string_series)
63            });
64
65        // Optimized implementation: Column creation
66        let (optimized_series_time, (opt_int_col, opt_float_col, opt_string_col)) =
67            bench("Optimized Implementation - Column Creation", || {
68                let int_col =
69                    prototype::Int64Column::new(int_data.iter().map(|&i| i as i64).collect())
70                        .with_name("int_col");
71                let float_col =
72                    prototype::Float64Column::new(float_data.clone()).with_name("float_col");
73                let string_col =
74                    prototype::StringColumn::new(string_data.clone()).with_name("string_col");
75                (int_col, float_col, string_col)
76            });
77
78        // Legacy implementation: DataFrame creation
79        let (legacy_df_time, legacy_df) =
80            bench("Legacy Implementation - DataFrame Creation", || {
81                let mut df = DataFrame::new();
82                df.add_column("int_col".to_string(), legacy_int_series.clone())
83                    .unwrap();
84                df.add_column("float_col".to_string(), legacy_float_series.clone())
85                    .unwrap();
86                df.add_column("string_col".to_string(), legacy_string_series.clone())
87                    .unwrap();
88                df
89            });
90
91        // Optimized implementation: OptimizedDataFrame creation
92        let (optimized_df_time, optimized_df) =
93            bench("Optimized Implementation - DataFrame Creation", || {
94                let mut df = prototype::OptimizedDataFrame::new();
95                df.add_column("int_col", opt_int_col.clone()).unwrap();
96                df.add_column("float_col", opt_float_col.clone()).unwrap();
97                df.add_column("string_col", opt_string_col.clone()).unwrap();
98                df
99            });
100
101        // Legacy implementation: DataFrame aggregation operations
102        let (legacy_agg_time, _) = bench("Legacy Implementation - Aggregation Operations", || {
103            // Legacy implementation has low efficiency due to numerical operations via DataBox
104            // Legacy implementation requires string conversion for numerical operations
105            let int_values = legacy_df.get_column_string_values("int_col").unwrap();
106            let float_values = legacy_df.get_column_string_values("float_col").unwrap();
107
108            // Conversion from string to numeric
109            let int_numeric: Vec<i32> = int_values
110                .iter()
111                .filter_map(|s| s.parse::<i32>().ok())
112                .collect();
113
114            let float_numeric: Vec<f64> = float_values
115                .iter()
116                .filter_map(|s| s.parse::<f64>().ok())
117                .collect();
118
119            // Aggregation calculations
120            let int_sum: i32 = int_numeric.iter().sum();
121            let int_mean = int_sum as f64 / int_numeric.len() as f64;
122
123            let float_sum: f64 = float_numeric.iter().sum();
124            let float_mean = float_sum / float_numeric.len() as f64;
125
126            (int_sum, int_mean, float_sum, float_mean)
127        });
128
129        // Optimized implementation: DataFrame aggregation operations
130        let (optimized_agg_time, _) =
131            bench("Optimized Implementation - Aggregation Operations", || {
132                // Optimized implementation has type-safe access and direct numerical operations
133                let int_col = optimized_df.get_int64_column("int_col").unwrap();
134                let float_col = optimized_df.get_float64_column("float_col").unwrap();
135
136                // Direct aggregation calculations
137                let int_sum = int_col.sum();
138                let int_mean = int_col.mean().unwrap();
139
140                let float_sum = float_col.sum();
141                let float_mean = float_col.mean().unwrap();
142
143                (int_sum, int_mean, float_sum, float_mean)
144            });
145
146        // Result summary
147        println!("\nResult Summary ({} rows):", size);
148        println!(
149            "  Series Creation: {:.2}x speedup ({} → {})",
150            legacy_series_time.as_secs_f64() / optimized_series_time.as_secs_f64(),
151            format_duration(legacy_series_time),
152            format_duration(optimized_series_time)
153        );
154
155        println!(
156            "  DataFrame Creation: {:.2}x speedup ({} → {})",
157            legacy_df_time.as_secs_f64() / optimized_df_time.as_secs_f64(),
158            format_duration(legacy_df_time),
159            format_duration(optimized_df_time)
160        );
161
162        println!(
163            "  Aggregation Operations: {:.2}x speedup ({} → {})",
164            legacy_agg_time.as_secs_f64() / optimized_agg_time.as_secs_f64(),
165            format_duration(legacy_agg_time),
166            format_duration(optimized_agg_time)
167        );
168    }
169}
Source

pub fn column_name(&self, idx: usize) -> Option<&String>

Get a column by index (compatibility method)

Source

pub fn concat_rows(&self, _other: &DataFrame) -> Result<DataFrame>

Concat rows from another DataFrame

Source

pub fn to_csv<P: AsRef<Path>>(&self, _path: P) -> Result<()>

Convert DataFrame to CSV

Examples found in repository?
examples/basic_usage.rs (line 44)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
More examples
Hide additional examples
examples/categorical_na_example.rs (line 165)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
Source

pub fn from_csv<P: AsRef<Path>>(_path: P, _has_header: bool) -> Result<Self>

Create DataFrame from CSV

Examples found in repository?
examples/basic_usage.rs (line 48)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
More examples
Hide additional examples
examples/categorical_na_example.rs (line 170)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
Source

pub fn from_csv_reader<R: Read>( _reader: &mut Reader<R>, _has_header: bool, ) -> Result<Self>

Create DataFrame from CSV reader

Source

pub fn column_count(&self) -> usize

Get the number of columns in the DataFrame

Examples found in repository?
examples/basic_usage.rs (line 38)
4fn main() -> Result<()> {
5    println!("=== PandRS Basic Usage Example ===");
6
7    // Creating Series
8    let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9    let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10    let names = Series::new(
11        vec![
12            "Alice".to_string(),
13            "Bob".to_string(),
14            "Charlie".to_string(),
15        ],
16        Some("name".to_string()),
17    )?;
18
19    println!("Age Series: {:?}", ages);
20    println!("Height Series: {:?}", heights);
21    println!("Name Series: {:?}", names);
22
23    // Statistics for numeric series
24    println!("\n=== Statistics for Age Series ===");
25    println!("Sum: {}", ages.sum());
26    println!("Mean: {}", ages.mean()?);
27    println!("Min: {}", ages.min()?);
28    println!("Max: {}", ages.max()?);
29
30    // Creating a DataFrame
31    println!("\n=== Creating a DataFrame ===");
32    let mut df = DataFrame::new();
33    df.add_column("name".to_string(), names)?;
34    df.add_column("age".to_string(), ages)?;
35    df.add_column("height".to_string(), heights)?;
36
37    println!("DataFrame: {:?}", df);
38    println!("Number of Columns: {}", df.column_count());
39    println!("Number of Rows: {}", df.row_count());
40    println!("Column Names: {:?}", df.column_names());
41
42    // Testing saving to and loading from CSV
43    let file_path = "example_data.csv";
44    df.to_csv(file_path)?;
45    println!("\nSaved to CSV file: {}", file_path);
46
47    // Testing loading from CSV (may not be fully implemented yet)
48    match DataFrame::from_csv(file_path, true) {
49        Ok(loaded_df) => {
50            println!("DataFrame loaded from CSV: {:?}", loaded_df);
51            println!("Number of Columns: {}", loaded_df.column_count());
52            println!("Number of Rows: {}", loaded_df.row_count());
53            println!("Column Names: {:?}", loaded_df.column_names());
54        }
55        Err(e) => {
56            println!("Failed to load CSV: {:?}", e);
57        }
58    }
59
60    println!("\n=== Sample Complete ===");
61    Ok(())
62}
More examples
Hide additional examples
examples/pivot_example.rs (line 51)
5fn main() -> Result<()> {
6    println!("=== Pivot Table and Grouping Example ===");
7
8    // Create sample data
9    let mut df = DataFrame::new();
10
11    // Create column data
12    let category = Series::new(
13        vec![
14            "A".to_string(),
15            "B".to_string(),
16            "A".to_string(),
17            "C".to_string(),
18            "B".to_string(),
19            "A".to_string(),
20            "C".to_string(),
21            "B".to_string(),
22        ],
23        Some("category".to_string()),
24    )?;
25
26    let region = Series::new(
27        vec![
28            "East".to_string(),
29            "West".to_string(),
30            "West".to_string(),
31            "East".to_string(),
32            "East".to_string(),
33            "West".to_string(),
34            "West".to_string(),
35            "East".to_string(),
36        ],
37        Some("region".to_string()),
38    )?;
39
40    let sales = Series::new(
41        vec![100, 150, 200, 120, 180, 90, 250, 160],
42        Some("sales".to_string()),
43    )?;
44
45    // Add columns to DataFrame
46    df.add_column("category".to_string(), category)?;
47    df.add_column("region".to_string(), region)?;
48    df.add_column("sales".to_string(), sales)?;
49
50    println!("DataFrame Info:");
51    println!("  Number of columns: {}", df.column_count());
52    println!("  Number of rows: {}", df.row_count());
53    println!("  Column names: {:?}", df.column_names());
54
55    // Grouping and aggregation
56    println!("\n=== Grouping by Category ===");
57    let category_group = df.groupby("category")?;
58
59    println!("Sum by category (in progress):");
60    let _category_sum = category_group.sum(&["sales"])?;
61
62    // Pivot table (in progress)
63    println!("\n=== Pivot Table ===");
64    println!("Sum of sales by category and region (in progress):");
65    let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67    // Note: Pivot table and grouping features are still under development,
68    // so actual results are not displayed
69
70    println!("\n=== Aggregation Function Examples ===");
71    let functions = [
72        AggFunction::Sum,
73        AggFunction::Mean,
74        AggFunction::Min,
75        AggFunction::Max,
76        AggFunction::Count,
77    ];
78
79    for func in &functions {
80        println!(
81            "Aggregation Function: {} ({})",
82            func.name(),
83            match func {
84                AggFunction::Sum => "Sum",
85                AggFunction::Mean => "Mean",
86                AggFunction::Min => "Min",
87                AggFunction::Max => "Max",
88                AggFunction::Count => "Count",
89            }
90        );
91    }
92
93    println!("\n=== Pivot Table Example (Complete) ===");
94    Ok(())
95}
examples/multi_index_example.rs (line 66)
4fn main() -> Result<()> {
5    println!("=== Example of Using MultiIndex ===\n");
6
7    // =========================================
8    // Creating a MultiIndex
9    // =========================================
10
11    println!("--- Creating MultiIndex from Tuples ---");
12
13    // Create MultiIndex from tuples (vector of vectors)
14    let tuples = vec![
15        vec!["A".to_string(), "a".to_string()],
16        vec!["A".to_string(), "b".to_string()],
17        vec!["B".to_string(), "a".to_string()],
18        vec!["B".to_string(), "b".to_string()],
19    ];
20
21    let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22    let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24    println!("MultiIndex: {:?}\n", multi_idx);
25    println!("Number of Levels: {}", multi_idx.n_levels());
26    println!("Number of Rows: {}\n", multi_idx.len());
27
28    // =========================================
29    // Operations on MultiIndex
30    // =========================================
31
32    println!("--- Retrieving Level Values ---");
33    let level0_values = multi_idx.get_level_values(0)?;
34    println!("Values in Level 0: {:?}", level0_values);
35
36    let level1_values = multi_idx.get_level_values(1)?;
37    println!("Values in Level 1: {:?}", level1_values);
38
39    println!("--- Swapping Levels ---");
40    let swapped = multi_idx.swaplevel(0, 1)?;
41    println!("After Swapping Levels: {:?}\n", swapped);
42
43    // =========================================
44    // DataFrame with MultiIndex
45    // =========================================
46
47    println!("--- DataFrame with MultiIndex ---");
48
49    // Create DataFrame
50    let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52    // Add data
53    let data = vec![
54        "data1".to_string(),
55        "data2".to_string(),
56        "data3".to_string(),
57        "data4".to_string(),
58    ];
59    df.add_column(
60        "data".to_string(),
61        pandrs::Series::new(data, Some("data".to_string()))?,
62    )?;
63
64    println!("DataFrame: {:?}\n", df);
65    println!("Number of Rows: {}", df.row_count());
66    println!("Number of Columns: {}", df.column_count());
67
68    // =========================================
69    // Conversion Between Simple Index and MultiIndex
70    // =========================================
71
72    println!("\n--- Example of Index Conversion ---");
73
74    // Create DataFrame from simple index
75    let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76    let mut simple_df = DataFrame::with_index(simple_idx);
77
78    // Add data
79    let values = vec![100, 200, 300];
80    let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81    simple_df.add_column(
82        "values".to_string(),
83        pandrs::Series::new(str_values, Some("values".to_string()))?,
84    )?;
85
86    println!("Simple Index DataFrame: {:?}", simple_df);
87
88    // Prepare for conversion to MultiIndex
89    let tuples = vec![
90        vec!["Category".to_string(), "X".to_string()],
91        vec!["Category".to_string(), "Y".to_string()],
92        vec!["Category".to_string(), "Z".to_string()],
93    ];
94
95    // Create and set MultiIndex
96    let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97    simple_df.set_multi_index(new_multi_idx)?;
98
99    println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101    println!("\n=== Sample Complete ===");
102    Ok(())
103}
examples/parallel_example.rs (line 77)
4fn main() -> Result<(), Box<dyn Error>> {
5    println!("=== Example of Parallel Processing Features ===\n");
6
7    // Create sample data
8    let numbers = Series::new(
9        vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10        Some("numbers".to_string()),
11    )?;
12
13    // Parallel map: square each number
14    println!("Example of parallel map processing:");
15    let squared = numbers.par_map(|x| x * x);
16    println!("Original values: {:?}", numbers.values());
17    println!("Squared values: {:?}", squared.values());
18
19    // Parallel filter: keep only even numbers
20    println!("\nExample of parallel filtering:");
21    let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22    println!("Even Numbers: {:?}", even_numbers.values());
23
24    // Processing data containing NA
25    let na_data = vec![
26        NA::Value(10),
27        NA::Value(20),
28        NA::NA,
29        NA::Value(40),
30        NA::NA,
31        NA::Value(60),
32    ];
33    let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35    println!("\nParallel processing of data containing NA:");
36    let na_tripled = na_series.par_map(|x| x * 3);
37    println!("Original values: {:?}", na_series.values());
38    println!("Tripled values: {:?}", na_tripled.values());
39
40    // Parallel processing of DataFrame
41    println!("\nParallel processing of DataFrame:");
42
43    // Creating a sample DataFrame
44    let mut df = DataFrame::new();
45    let names = Series::new(
46        vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47        Some("name".to_string()),
48    )?;
49    let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50    let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52    df.add_column("name".to_string(), names)?;
53    df.add_column("age".to_string(), ages)?;
54    df.add_column("score".to_string(), scores)?;
55
56    // Parallel transformation of DataFrame
57    println!("Example of DataFrame.par_apply:");
58    let transformed_df = df.par_apply(|col, _row, val| {
59        match col {
60            "age" => {
61                // Add 1 to age
62                let age: i32 = val.parse().unwrap_or(0);
63                (age + 1).to_string()
64            }
65            "score" => {
66                // Add 5 to score
67                let score: i32 = val.parse().unwrap_or(0);
68                (score + 5).to_string()
69            }
70            _ => val.to_string(),
71        }
72    })?;
73
74    println!(
75        "Original DF row count: {}, column count: {}",
76        df.row_count(),
77        df.column_count()
78    );
79    println!(
80        "Transformed DF row count: {}, column count: {}",
81        transformed_df.row_count(),
82        transformed_df.column_count()
83    );
84
85    // Filtering rows
86    println!("\nExample of DataFrame.par_filter_rows:");
87    let filtered_df = df.par_filter_rows(|row| {
88        // Keep only rows where score > 85
89        if let Ok(values) = df.get_column_numeric_values("score") {
90            if row < values.len() {
91                return values[row] > 85.0;
92            }
93        }
94        false
95    })?;
96
97    println!("Row count after filtering: {}", filtered_df.row_count());
98
99    // Example of using ParallelUtils
100    println!("\nExample of ParallelUtils features:");
101
102    let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103    let sorted = ParallelUtils::par_sort(unsorted.clone());
104    println!("Before sorting: {:?}", unsorted);
105    println!("After sorting: {:?}", sorted);
106
107    let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108    let sum = ParallelUtils::par_sum(&numbers_vec);
109    let mean = ParallelUtils::par_mean(&numbers_vec);
110    let min = ParallelUtils::par_min(&numbers_vec);
111    let max = ParallelUtils::par_max(&numbers_vec);
112
113    println!("Sum: {}", sum);
114    println!("Mean: {}", mean.unwrap());
115    println!("Min: {}", min.unwrap());
116    println!("Max: {}", max.unwrap());
117
118    println!("\n=== Example of Parallel Processing Features Complete ===");
119    Ok(())
120}
Source

pub fn ncols(&self) -> usize

Get the number of columns (alias for compatibility)

Source

pub fn select_columns(&self, columns: &[&str]) -> Result<Self>

Create a new DataFrame with only the specified columns

Source

pub fn from_map( data: HashMap<String, Vec<String>>, index: Option<Index<String>>, ) -> Result<Self>

Create a new DataFrame from a HashMap of column names to string vectors

Examples found in repository?
examples/benchmark_million.rs (line 79)
6fn main() -> Result<()> {
7    println!("=== Benchmark with One Million Rows ===\n");
8
9    // Benchmark function
10    fn bench<F>(name: &str, f: F) -> Duration
11    where
12        F: FnOnce() -> (),
13    {
14        println!("Running: {}", name);
15        let start = Instant::now();
16        f();
17        let duration = start.elapsed();
18        println!("  Completed: {:?}\n", duration);
19        duration
20    }
21
22    // Benchmark for creating a DataFrame with one million rows
23    println!("--- DataFrame with One Million Rows ---");
24
25    bench("Creating Series x3 (One Million Rows)", || {
26        let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27        let _ = Series::new(
28            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29            Some("B".to_string()),
30        )
31        .unwrap();
32        let _ = Series::new(
33            (0..1_000_000)
34                .map(|i| format!("val_{}", i))
35                .collect::<Vec<_>>(),
36            Some("C".to_string()),
37        )
38        .unwrap();
39    });
40
41    let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42        let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43        let col_b = Series::new(
44            (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45            Some("B".to_string()),
46        )
47        .unwrap();
48        let col_c = Series::new(
49            (0..1_000_000)
50                .map(|i| format!("val_{}", i))
51                .collect::<Vec<_>>(),
52            Some("C".to_string()),
53        )
54        .unwrap();
55
56        let mut df = DataFrame::new();
57        df.add_column("A".to_string(), col_a).unwrap();
58        df.add_column("B".to_string(), col_b).unwrap();
59        df.add_column("C".to_string(), col_c).unwrap();
60    });
61
62    bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63        let mut data = HashMap::new();
64        data.insert(
65            "A".to_string(),
66            (0..1_000_000).map(|n| n.to_string()).collect(),
67        );
68        data.insert(
69            "B".to_string(),
70            (0..1_000_000)
71                .map(|n| format!("{:.1}", n as f64 * 0.5))
72                .collect(),
73        );
74        data.insert(
75            "C".to_string(),
76            (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77        );
78
79        let _ = DataFrame::from_map(data, None).unwrap();
80    });
81
82    println!(
83        "Time to create DataFrame with one million rows in pure Rust: {:?}",
84        large_duration
85    );
86
87    Ok(())
88}
More examples
Hide additional examples
examples/performance_bench.rs (line 79)
5fn main() -> Result<(), PandRSError> {
6    println!("=== PandRS Performance Benchmark ===\n");
7
8    // Benchmark function
9    fn bench<F>(name: &str, f: F) -> Duration
10    where
11        F: FnOnce() -> (),
12    {
13        println!("Running: {}", name);
14        let start = Instant::now();
15        f();
16        let duration = start.elapsed();
17        println!("  Completed: {:?}\n", duration);
18        duration
19    }
20
21    // Benchmark for creating a small DataFrame
22    println!("--- Small DataFrame (10 rows) ---");
23
24    bench("Create Series x3", || {
25        let _ = Series::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], Some("A".to_string())).unwrap();
26        let _ = Series::new(
27            vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10],
28            Some("B".to_string()),
29        )
30        .unwrap();
31        let _ = Series::new(
32            vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
33                .into_iter()
34                .map(|s| s.to_string())
35                .collect::<Vec<_>>(),
36            Some("C".to_string()),
37        )
38        .unwrap();
39    });
40
41    bench("Create DataFrame (3 columns x 10 rows)", || {
42        let col_a =
43            Series::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], Some("A".to_string())).unwrap();
44        let col_b = Series::new(
45            vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10],
46            Some("B".to_string()),
47        )
48        .unwrap();
49        let col_c = Series::new(
50            vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
51                .into_iter()
52                .map(|s| s.to_string())
53                .collect::<Vec<_>>(),
54            Some("C".to_string()),
55        )
56        .unwrap();
57
58        let mut df = DataFrame::new();
59        df.add_column("A".to_string(), col_a).unwrap();
60        df.add_column("B".to_string(), col_b).unwrap();
61        df.add_column("C".to_string(), col_c).unwrap();
62    });
63
64    bench("DataFrame from_map (3 columns x 10 rows)", || {
65        let mut data = HashMap::new();
66        data.insert("A".to_string(), (0..10).map(|n| n.to_string()).collect());
67        data.insert(
68            "B".to_string(),
69            (0..10).map(|n| format!("{:.1}", n as f64 + 0.1)).collect(),
70        );
71        data.insert(
72            "C".to_string(),
73            vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
74                .into_iter()
75                .map(|s| s.to_string())
76                .collect(),
77        );
78
79        let _ = DataFrame::from_map(data, None).unwrap();
80    });
81
82    // Benchmark for creating a medium DataFrame
83    println!("\n--- Medium DataFrame (1,000 rows) ---");
84
85    bench("Create Series x3 (1000 rows)", || {
86        let _ = Series::new((0..1000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
87        let _ = Series::new(
88            (0..1000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
89            Some("B".to_string()),
90        )
91        .unwrap();
92        let _ = Series::new(
93            (0..1000).map(|i| format!("val_{}", i)).collect::<Vec<_>>(),
94            Some("C".to_string()),
95        )
96        .unwrap();
97    });
98
99    bench("Create DataFrame (3 columns x 1000 rows)", || {
100        let col_a = Series::new((0..1000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
101        let col_b = Series::new(
102            (0..1000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
103            Some("B".to_string()),
104        )
105        .unwrap();
106        let col_c = Series::new(
107            (0..1000).map(|i| format!("val_{}", i)).collect::<Vec<_>>(),
108            Some("C".to_string()),
109        )
110        .unwrap();
111
112        let mut df = DataFrame::new();
113        df.add_column("A".to_string(), col_a).unwrap();
114        df.add_column("B".to_string(), col_b).unwrap();
115        df.add_column("C".to_string(), col_c).unwrap();
116    });
117
118    bench("DataFrame from_map (3 columns x 1000 rows)", || {
119        let mut data = HashMap::new();
120        data.insert("A".to_string(), (0..1000).map(|n| n.to_string()).collect());
121        data.insert(
122            "B".to_string(),
123            (0..1000)
124                .map(|n| format!("{:.1}", n as f64 * 0.5))
125                .collect(),
126        );
127        data.insert(
128            "C".to_string(),
129            (0..1000).map(|i| format!("val_{}", i)).collect(),
130        );
131
132        let _ = DataFrame::from_map(data, None).unwrap();
133    });
134
135    // Benchmark for creating a large DataFrame
136    println!("\n--- Large DataFrame (100,000 rows) ---");
137
138    bench("Create Series x3 (100,000 rows)", || {
139        let _ = Series::new((0..100_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
140        let _ = Series::new(
141            (0..100_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
142            Some("B".to_string()),
143        )
144        .unwrap();
145        let _ = Series::new(
146            (0..100_000)
147                .map(|i| format!("val_{}", i))
148                .collect::<Vec<_>>(),
149            Some("C".to_string()),
150        )
151        .unwrap();
152    });
153
154    let large_duration = bench("Create DataFrame (3 columns x 100,000 rows)", || {
155        let col_a = Series::new((0..100_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
156        let col_b = Series::new(
157            (0..100_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
158            Some("B".to_string()),
159        )
160        .unwrap();
161        let col_c = Series::new(
162            (0..100_000)
163                .map(|i| format!("val_{}", i))
164                .collect::<Vec<_>>(),
165            Some("C".to_string()),
166        )
167        .unwrap();
168
169        let mut df = DataFrame::new();
170        df.add_column("A".to_string(), col_a).unwrap();
171        df.add_column("B".to_string(), col_b).unwrap();
172        df.add_column("C".to_string(), col_c).unwrap();
173    });
174
175    bench("DataFrame from_map (3 columns x 100,000 rows)", || {
176        let mut data = HashMap::new();
177        data.insert(
178            "A".to_string(),
179            (0..100_000).map(|n| n.to_string()).collect(),
180        );
181        data.insert(
182            "B".to_string(),
183            (0..100_000)
184                .map(|n| format!("{:.1}", n as f64 * 0.5))
185                .collect(),
186        );
187        data.insert(
188            "C".to_string(),
189            (0..100_000).map(|i| format!("val_{}", i)).collect(),
190        );
191
192        let _ = DataFrame::from_map(data, None).unwrap();
193    });
194
195    println!(
196        "Pure Rust code DataFrame creation time for 100,000 rows: {:?}",
197        large_duration
198    );
199    println!("(Equivalent operation in Python: approximately 0.35 seconds)");
200
201    Ok(())
202}
Source

pub fn has_column(&self, column_name: &str) -> bool

Check if the DataFrame has the specified column (alias for contains_column)

Source

pub fn get_index(&self) -> DataFrameIndex<String>

Get the DataFrame’s index

Source

pub fn set_index(&mut self, index: Index<String>) -> Result<()>

Set the DataFrame’s index from an Index

Source

pub fn set_multi_index(&mut self, multi_index: MultiIndex<String>) -> Result<()>

Set a multi-index for the DataFrame

Examples found in repository?
examples/multi_index_example.rs (line 97)
4fn main() -> Result<()> {
5    println!("=== Example of Using MultiIndex ===\n");
6
7    // =========================================
8    // Creating a MultiIndex
9    // =========================================
10
11    println!("--- Creating MultiIndex from Tuples ---");
12
13    // Create MultiIndex from tuples (vector of vectors)
14    let tuples = vec![
15        vec!["A".to_string(), "a".to_string()],
16        vec!["A".to_string(), "b".to_string()],
17        vec!["B".to_string(), "a".to_string()],
18        vec!["B".to_string(), "b".to_string()],
19    ];
20
21    let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22    let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24    println!("MultiIndex: {:?}\n", multi_idx);
25    println!("Number of Levels: {}", multi_idx.n_levels());
26    println!("Number of Rows: {}\n", multi_idx.len());
27
28    // =========================================
29    // Operations on MultiIndex
30    // =========================================
31
32    println!("--- Retrieving Level Values ---");
33    let level0_values = multi_idx.get_level_values(0)?;
34    println!("Values in Level 0: {:?}", level0_values);
35
36    let level1_values = multi_idx.get_level_values(1)?;
37    println!("Values in Level 1: {:?}", level1_values);
38
39    println!("--- Swapping Levels ---");
40    let swapped = multi_idx.swaplevel(0, 1)?;
41    println!("After Swapping Levels: {:?}\n", swapped);
42
43    // =========================================
44    // DataFrame with MultiIndex
45    // =========================================
46
47    println!("--- DataFrame with MultiIndex ---");
48
49    // Create DataFrame
50    let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52    // Add data
53    let data = vec![
54        "data1".to_string(),
55        "data2".to_string(),
56        "data3".to_string(),
57        "data4".to_string(),
58    ];
59    df.add_column(
60        "data".to_string(),
61        pandrs::Series::new(data, Some("data".to_string()))?,
62    )?;
63
64    println!("DataFrame: {:?}\n", df);
65    println!("Number of Rows: {}", df.row_count());
66    println!("Number of Columns: {}", df.column_count());
67
68    // =========================================
69    // Conversion Between Simple Index and MultiIndex
70    // =========================================
71
72    println!("\n--- Example of Index Conversion ---");
73
74    // Create DataFrame from simple index
75    let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76    let mut simple_df = DataFrame::with_index(simple_idx);
77
78    // Add data
79    let values = vec![100, 200, 300];
80    let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81    simple_df.add_column(
82        "values".to_string(),
83        pandrs::Series::new(str_values, Some("values".to_string()))?,
84    )?;
85
86    println!("Simple Index DataFrame: {:?}", simple_df);
87
88    // Prepare for conversion to MultiIndex
89    let tuples = vec![
90        vec!["Category".to_string(), "X".to_string()],
91        vec!["Category".to_string(), "Y".to_string()],
92        vec!["Category".to_string(), "Z".to_string()],
93    ];
94
95    // Create and set MultiIndex
96    let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97    simple_df.set_multi_index(new_multi_idx)?;
98
99    println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101    println!("\n=== Sample Complete ===");
102    Ok(())
103}
Source

pub fn get_column_numeric_values(&self, column_name: &str) -> Result<Vec<f64>>

Get numeric values from a column

Examples found in repository?
examples/parallel_example.rs (line 89)
4fn main() -> Result<(), Box<dyn Error>> {
5    println!("=== Example of Parallel Processing Features ===\n");
6
7    // Create sample data
8    let numbers = Series::new(
9        vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10        Some("numbers".to_string()),
11    )?;
12
13    // Parallel map: square each number
14    println!("Example of parallel map processing:");
15    let squared = numbers.par_map(|x| x * x);
16    println!("Original values: {:?}", numbers.values());
17    println!("Squared values: {:?}", squared.values());
18
19    // Parallel filter: keep only even numbers
20    println!("\nExample of parallel filtering:");
21    let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22    println!("Even Numbers: {:?}", even_numbers.values());
23
24    // Processing data containing NA
25    let na_data = vec![
26        NA::Value(10),
27        NA::Value(20),
28        NA::NA,
29        NA::Value(40),
30        NA::NA,
31        NA::Value(60),
32    ];
33    let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35    println!("\nParallel processing of data containing NA:");
36    let na_tripled = na_series.par_map(|x| x * 3);
37    println!("Original values: {:?}", na_series.values());
38    println!("Tripled values: {:?}", na_tripled.values());
39
40    // Parallel processing of DataFrame
41    println!("\nParallel processing of DataFrame:");
42
43    // Creating a sample DataFrame
44    let mut df = DataFrame::new();
45    let names = Series::new(
46        vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47        Some("name".to_string()),
48    )?;
49    let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50    let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52    df.add_column("name".to_string(), names)?;
53    df.add_column("age".to_string(), ages)?;
54    df.add_column("score".to_string(), scores)?;
55
56    // Parallel transformation of DataFrame
57    println!("Example of DataFrame.par_apply:");
58    let transformed_df = df.par_apply(|col, _row, val| {
59        match col {
60            "age" => {
61                // Add 1 to age
62                let age: i32 = val.parse().unwrap_or(0);
63                (age + 1).to_string()
64            }
65            "score" => {
66                // Add 5 to score
67                let score: i32 = val.parse().unwrap_or(0);
68                (score + 5).to_string()
69            }
70            _ => val.to_string(),
71        }
72    })?;
73
74    println!(
75        "Original DF row count: {}, column count: {}",
76        df.row_count(),
77        df.column_count()
78    );
79    println!(
80        "Transformed DF row count: {}, column count: {}",
81        transformed_df.row_count(),
82        transformed_df.column_count()
83    );
84
85    // Filtering rows
86    println!("\nExample of DataFrame.par_filter_rows:");
87    let filtered_df = df.par_filter_rows(|row| {
88        // Keep only rows where score > 85
89        if let Ok(values) = df.get_column_numeric_values("score") {
90            if row < values.len() {
91                return values[row] > 85.0;
92            }
93        }
94        false
95    })?;
96
97    println!("Row count after filtering: {}", filtered_df.row_count());
98
99    // Example of using ParallelUtils
100    println!("\nExample of ParallelUtils features:");
101
102    let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103    let sorted = ParallelUtils::par_sort(unsorted.clone());
104    println!("Before sorting: {:?}", unsorted);
105    println!("After sorting: {:?}", sorted);
106
107    let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108    let sum = ParallelUtils::par_sum(&numbers_vec);
109    let mean = ParallelUtils::par_mean(&numbers_vec);
110    let min = ParallelUtils::par_min(&numbers_vec);
111    let max = ParallelUtils::par_max(&numbers_vec);
112
113    println!("Sum: {}", sum);
114    println!("Mean: {}", mean.unwrap());
115    println!("Min: {}", min.unwrap());
116    println!("Max: {}", max.unwrap());
117
118    println!("\n=== Example of Parallel Processing Features Complete ===");
119    Ok(())
120}
Source

pub fn add_row_data(&mut self, row_data: Vec<Box<dyn DValue>>) -> Result<()>

Add a row to the DataFrame

Source

pub fn filter<F>(&self, column_name: &str, predicate: F) -> Result<Self>
where F: Fn(&Box<dyn DValue>) -> bool,

Filter rows based on a predicate

Source

pub fn mean(&self, column_name: &str) -> Result<f64>

Compute the mean of a column

Source

pub fn group_by(&self, _column_name: &str) -> Result<()>

Group by a column

Source

pub fn gpu_accelerate(&self) -> Result<Self>

Enable GPU acceleration for a DataFrame

Source

pub fn corr_matrix(&self, _columns: &[&str]) -> Result<()>

Calculate a correlation matrix

Source

pub fn head(&self, n: usize) -> Result<String>

Display the head of the DataFrame

Source

pub fn add_row_data_from_hashmap( &mut self, row_data: HashMap<String, String>, ) -> Result<()>

Add a row to the DataFrame using a HashMap of column names to values

Source

pub fn is_categorical(&self, column_name: &str) -> bool

Check if a column is categorical

Examples found in repository?
examples/categorical_na_example.rs (line 147)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
More examples
Hide additional examples
examples/categorical_example.rs (line 155)
8fn main() -> Result<()> {
9    println!("=== Example of Using Categorical Data Type ===\n");
10
11    // ===========================================================
12    // Creating Basic Categorical Data
13    // ===========================================================
14
15    println!("--- Creating Basic Categorical Data ---");
16    let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17    let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19    // Create categorical data (unique values are automatically extracted)
20    // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21    let cat = StringCategorical::new(
22        values_str, None,  // Automatically detect categories
23        false, // Unordered
24    )?;
25
26    println!("Original Data: {:?}", values);
27    println!("Categories: {:?}", cat.categories());
28    println!("Order Type: {:?}", cat.ordered());
29    println!("Data Length: {}", cat.len());
30
31    // Retrieve actual values from categorical data
32    println!(
33        "\nFirst 3 values: {} {} {}",
34        cat.get(0).unwrap_or(&"None".to_string()),
35        cat.get(1).unwrap_or(&"None".to_string()),
36        cat.get(2).unwrap_or(&"None".to_string())
37    );
38    println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40    // ===========================================================
41    // Creating with Explicit Category List
42    // ===========================================================
43
44    println!("\n--- Creating with Explicit Category List ---");
45    let values2 = vec!["Red", "Blue", "Red"];
46    let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48    // Define all categories beforehand
49    let categories = vec!["Red", "Blue", "Green", "Yellow"];
50    let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52    // Create ordered categorical data
53    // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54    let cat2 = StringCategorical::new(
55        values2_str,
56        Some(categories_str), // Explicit category list
57        true,                 // Ordered
58    )?;
59
60    println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61    println!("Codes: {:?}", cat2.codes());
62
63    // ===========================================================
64    // Operations on Categorical Data
65    // ===========================================================
66
67    println!("\n--- Example of Categorical Operations ---");
68
69    // Base categorical data
70    // Changed: Using false instead of None for the ordered parameter
71    let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72    let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73    let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75    println!("Original Categories: {:?}", fruit_cat.categories());
76
77    // Add categories
78    let new_cats = vec!["Grape", "Strawberry"];
79    let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80    fruit_cat.add_categories(new_cats_str)?;
81
82    println!("Categories after addition: {:?}", fruit_cat.categories());
83
84    // Change category order
85    let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86    let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87    fruit_cat.reorder_categories(reordered_str)?;
88
89    println!("Categories after reordering: {:?}", fruit_cat.categories());
90    println!("Codes: {:?}", fruit_cat.codes());
91
92    // ===========================================================
93    // Integration with DataFrame
94    // ===========================================================
95
96    println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98    // Create a basic DataFrame
99    let mut df = DataFrame::new();
100
101    // Add regular columns
102    let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104    let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105    let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107    df.add_column(
108        "Region".to_string(),
109        Series::new(regions_str, Some("Region".to_string()))?,
110    )?;
111    df.add_column(
112        "Population".to_string(),
113        Series::new(pop_str, Some("Population".to_string()))?,
114    )?;
115
116    println!("Original DataFrame:\n{:?}", df);
117
118    // ===========================================================
119    // Creating Simplified Categorical DataFrame
120    // ===========================================================
121
122    // Create a DataFrame directly from categorical data
123    println!("\n--- Creating DataFrame with Categorical Data ---");
124
125    // Create categorical data
126    // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127    let populations = vec!["Low", "Medium", "High"];
128    let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129    let pop_cat = StringCategorical::new(
130        populations_str,
131        None, // Automatically detect
132        true, // Ordered
133    )?;
134
135    // Region data
136    let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139    // Create DataFrame from both categorical data
140    let categoricals = vec![("Population".to_string(), pop_cat)];
141
142    let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144    // Add region column
145    df_cat.add_column(
146        "Region".to_string(),
147        Series::new(regions_str, Some("Region".to_string()))?,
148    )?;
149
150    println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152    // Check if columns are categorical
153    println!(
154        "\nIs 'Population' column categorical: {}",
155        df_cat.is_categorical("Population")
156    );
157    println!(
158        "Is 'Region' column categorical: {}",
159        df_cat.is_categorical("Region")
160    );
161
162    // ===========================================================
163    // Example of Multi-Categorical DataFrame
164    // ===========================================================
165
166    println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168    // Create product and color data as separate categories
169    // Changed: Using false instead of None for the ordered parameter
170    let products = vec!["A", "B", "C"];
171    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172    let product_cat = StringCategorical::new(products_str, None, false)?;
173
174    let colors = vec!["Red", "Blue", "Green"];
175    let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176    let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178    // Create a DataFrame containing both categories
179    let multi_categoricals = vec![
180        ("Product".to_string(), product_cat),
181        ("Color".to_string(), color_cat),
182    ];
183
184    let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186    println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187    println!(
188        "\nIs 'Product' column categorical: {}",
189        multi_df.is_categorical("Product")
190    );
191    println!(
192        "Is 'Color' column categorical: {}",
193        multi_df.is_categorical("Color")
194    );
195
196    // ===========================================================
197    // Aggregation and Analysis of Categorical Data
198    // ===========================================================
199
200    println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202    // Start with a simple DataFrame
203    let mut df_simple = DataFrame::new();
204
205    // Add product data
206    let products = vec!["A", "B", "C", "A", "B"];
207    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208    let sales = vec!["100", "150", "200", "120", "180"];
209    let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211    df_simple.add_column(
212        "Product".to_string(),
213        Series::new(products_str.clone(), Some("Product".to_string()))?,
214    )?;
215    df_simple.add_column(
216        "Sales".to_string(),
217        Series::new(sales_str, Some("Sales".to_string()))?,
218    )?;
219
220    println!("Original DataFrame:\n{:?}", df_simple);
221
222    // Aggregate by product
223    let product_counts = df_simple.value_counts("Product")?;
224    println!("\nProduct Counts:\n{:?}", product_counts);
225
226    // Transformation and interaction between categorical and series
227    println!("\n--- Interaction between Categorical and Series ---");
228
229    // Create a simple categorical series
230    // Changed: Using false instead of None for the ordered parameter
231    let letter_cat = StringCategorical::new(
232        vec!["A".to_string(), "B".to_string(), "C".to_string()],
233        None,
234        false,
235    )?;
236
237    // Convert to series
238    let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239    println!("Converted from categorical to series: {:?}", letter_series);
240
241    // Additional information about categorical data
242    println!("\n--- Characteristics of Categorical Data ---");
243    println!(
244        "Categorical data is stored in memory only once, regardless of repeated string values."
245    );
246    println!(
247        "This makes it particularly efficient for datasets with many duplicate string values."
248    );
249    println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251    println!("\n=== Sample Complete ===");
252    Ok(())
253}
Source

pub fn sample(&self, indices: &[usize]) -> Result<Self>

Get a categorical column with generic type

Source

pub fn get_categorical<T: 'static + Debug + Clone + Eq + Hash + Send + Sync>( &self, column_name: &str, ) -> Result<Categorical<T>>

Get a categorical column with generic type

Examples found in repository?
examples/categorical_na_example.rs (line 151)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
Source

pub fn is_numeric_column(&self, column_name: &str) -> bool

Check if a column is numeric

Source

pub fn add_na_series_as_categorical( &mut self, name: String, series: NASeries<String>, categories: Option<Vec<String>>, ordered: Option<CategoricalOrder>, ) -> Result<&mut Self>

Add a NASeries as a categorical column

Source

pub fn from_categoricals( categoricals: Vec<(String, StringCategorical)>, ) -> Result<Self>

Create a DataFrame from multiple categorical data

Examples found in repository?
examples/categorical_na_example.rs (line 126)
6fn main() -> Result<()> {
7    println!("=== Example of Categorical Data with Missing Values ===\n");
8
9    // 1. Create categorical data
10    println!("1. Create categorical data");
11
12    // Create a vector with NA values
13    let values = vec![
14        NA::Value("Red".to_string()),
15        NA::Value("Blue".to_string()),
16        NA::NA, // Missing value
17        NA::Value("Green".to_string()),
18        NA::Value("Red".to_string()), // Duplicate value
19    ];
20
21    // Create categorical data type from vector
22    // Create as unordered category
23    let cat = StringCategorical::from_na_vec(
24        values.clone(),
25        None,                              // Auto-detect categories
26        Some(CategoricalOrder::Unordered), // Unordered
27    )?;
28
29    println!("Categories: {:?}", cat.categories());
30    println!("Number of categories: {}", cat.categories().len());
31    println!("Number of data: {}", cat.len());
32
33    // Display category codes
34    println!("Internal codes: {:?}", cat.codes());
35    println!();
36
37    // 2. Create ordered categorical data
38    println!("2. Create ordered categorical data");
39
40    // Explicitly ordered category list
41    let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43    // Create a vector with NA values
44    let values = vec![
45        NA::Value("Medium".to_string()),
46        NA::Value("Low".to_string()),
47        NA::NA, // Missing value
48        NA::Value("High".to_string()),
49        NA::Value("Medium".to_string()), // Duplicate value
50    ];
51
52    // Create as ordered category
53    let ordered_cat = StringCategorical::from_na_vec(
54        values.clone(),
55        Some(ordered_categories),        // Explicit category list
56        Some(CategoricalOrder::Ordered), // Ordered
57    )?;
58
59    println!("Ordered categories: {:?}", ordered_cat.categories());
60    println!("Number of categories: {}", ordered_cat.categories().len());
61    println!("Number of data: {}", ordered_cat.len());
62
63    // Display category codes
64    println!("Internal codes: {:?}", ordered_cat.codes());
65    println!();
66
67    // 3. Operations on categorical data
68    println!("3. Operations on categorical data");
69
70    // Create two categorical data
71    let values1 = vec![
72        NA::Value("A".to_string()),
73        NA::Value("B".to_string()),
74        NA::NA,
75        NA::Value("C".to_string()),
76    ];
77
78    let values2 = vec![
79        NA::Value("B".to_string()),
80        NA::Value("C".to_string()),
81        NA::Value("D".to_string()),
82        NA::NA,
83    ];
84
85    let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86    let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88    // Set operations
89    let union = cat1.union(&cat2)?; // Union
90    let intersection = cat1.intersection(&cat2)?; // Intersection
91    let difference = cat1.difference(&cat2)?; // Difference
92
93    println!("Categories of set 1: {:?}", cat1.categories());
94    println!("Categories of set 2: {:?}", cat2.categories());
95    println!("Union: {:?}", union.categories());
96    println!("Intersection: {:?}", intersection.categories());
97    println!("Difference (set 1 - set 2): {:?}", difference.categories());
98    println!();
99
100    // 4. Using categorical columns in DataFrame
101    println!("4. Using categorical columns in DataFrame");
102
103    // Create a vector with NA values (first create for categorical)
104    let values = vec![
105        NA::Value("High".to_string()),
106        NA::Value("Medium".to_string()),
107        NA::NA,
108        NA::Value("Low".to_string()),
109    ];
110
111    // Simplified for sample code
112    let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114    // Create categorical data
115    let cat_eval = StringCategorical::from_na_vec(
116        values.clone(), // Clone it
117        Some(order_cats),
118        Some(CategoricalOrder::Ordered),
119    )?;
120
121    // Output the size of the created categorical data
122    println!("Size of created categorical data: {}", cat_eval.len());
123
124    // Add as categorical column
125    let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126    let mut df = DataFrame::from_categoricals(categoricals)?;
127
128    // Check the number of rows in the data and match it
129    println!("Number of rows in DataFrame: {}", df.row_count());
130    println!("Note: NA values are excluded when creating DataFrame");
131
132    // Add numeric column (match the number of rows)
133    let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134    println!("Size of scores: {}", scores.len());
135
136    df.add_column(
137        "Score".to_string(),
138        Series::new(scores, Some("Score".to_string()))?,
139    )?;
140
141    println!("DataFrame: ");
142    println!("{:#?}", df);
143
144    // Retrieve and verify categorical data
145    println!(
146        "Is 'Evaluation' column categorical: {}",
147        df.is_categorical("Evaluation")
148    );
149
150    // Explicitly handle errors
151    match df.get_categorical::<String>("Evaluation") {
152        Ok(cat_col) => println!(
153            "Categories of 'Evaluation' column: {:?}",
154            cat_col.categories()
155        ),
156        Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157    }
158    println!();
159
160    // 5. Input and output with CSV file
161    println!("5. Input and output with CSV file");
162
163    // Save to temporary file
164    let temp_path = Path::new("/tmp/categorical_example.csv");
165    df.to_csv(temp_path)?;
166
167    println!("Saved to CSV file: {}", temp_path.display());
168
169    // Load from file
170    let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172    // After loading from CSV, categorical information is lost (loaded as regular string column)
173    println!("Data loaded from CSV:");
174    println!("{:#?}", df_loaded);
175
176    // Check data loaded from CSV
177
178    // Note that data loaded from CSV is in a special format
179    println!("Example of data format loaded from CSV:");
180    println!(
181        "First value of 'Evaluation' column: {:?}",
182        df_loaded
183            .get_column::<String>("Evaluation")
184            .unwrap()
185            .values()[0]
186    );
187
188    // To reconstruct categorical data from this CSV loaded data,
189    // more complex processing is required, so the following is a simple example
190
191    // Create new categorical data as an example
192    let new_values = vec![
193        NA::Value("High".to_string()),
194        NA::Value("Medium".to_string()),
195        NA::NA,
196        NA::Value("Low".to_string()),
197    ];
198
199    let new_cat =
200        StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202    println!("Example of newly created categorical data:");
203    println!("Categories: {:?}", new_cat.categories());
204    println!("Order: {:?}", new_cat.ordered());
205
206    println!("\nTo actually convert data loaded from CSV to categorical data,");
207    println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209    println!("\n=== Sample End ===");
210    Ok(())
211}
More examples
Hide additional examples
examples/categorical_example.rs (line 142)
8fn main() -> Result<()> {
9    println!("=== Example of Using Categorical Data Type ===\n");
10
11    // ===========================================================
12    // Creating Basic Categorical Data
13    // ===========================================================
14
15    println!("--- Creating Basic Categorical Data ---");
16    let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17    let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19    // Create categorical data (unique values are automatically extracted)
20    // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21    let cat = StringCategorical::new(
22        values_str, None,  // Automatically detect categories
23        false, // Unordered
24    )?;
25
26    println!("Original Data: {:?}", values);
27    println!("Categories: {:?}", cat.categories());
28    println!("Order Type: {:?}", cat.ordered());
29    println!("Data Length: {}", cat.len());
30
31    // Retrieve actual values from categorical data
32    println!(
33        "\nFirst 3 values: {} {} {}",
34        cat.get(0).unwrap_or(&"None".to_string()),
35        cat.get(1).unwrap_or(&"None".to_string()),
36        cat.get(2).unwrap_or(&"None".to_string())
37    );
38    println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40    // ===========================================================
41    // Creating with Explicit Category List
42    // ===========================================================
43
44    println!("\n--- Creating with Explicit Category List ---");
45    let values2 = vec!["Red", "Blue", "Red"];
46    let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48    // Define all categories beforehand
49    let categories = vec!["Red", "Blue", "Green", "Yellow"];
50    let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52    // Create ordered categorical data
53    // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54    let cat2 = StringCategorical::new(
55        values2_str,
56        Some(categories_str), // Explicit category list
57        true,                 // Ordered
58    )?;
59
60    println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61    println!("Codes: {:?}", cat2.codes());
62
63    // ===========================================================
64    // Operations on Categorical Data
65    // ===========================================================
66
67    println!("\n--- Example of Categorical Operations ---");
68
69    // Base categorical data
70    // Changed: Using false instead of None for the ordered parameter
71    let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72    let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73    let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75    println!("Original Categories: {:?}", fruit_cat.categories());
76
77    // Add categories
78    let new_cats = vec!["Grape", "Strawberry"];
79    let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80    fruit_cat.add_categories(new_cats_str)?;
81
82    println!("Categories after addition: {:?}", fruit_cat.categories());
83
84    // Change category order
85    let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86    let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87    fruit_cat.reorder_categories(reordered_str)?;
88
89    println!("Categories after reordering: {:?}", fruit_cat.categories());
90    println!("Codes: {:?}", fruit_cat.codes());
91
92    // ===========================================================
93    // Integration with DataFrame
94    // ===========================================================
95
96    println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98    // Create a basic DataFrame
99    let mut df = DataFrame::new();
100
101    // Add regular columns
102    let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104    let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105    let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107    df.add_column(
108        "Region".to_string(),
109        Series::new(regions_str, Some("Region".to_string()))?,
110    )?;
111    df.add_column(
112        "Population".to_string(),
113        Series::new(pop_str, Some("Population".to_string()))?,
114    )?;
115
116    println!("Original DataFrame:\n{:?}", df);
117
118    // ===========================================================
119    // Creating Simplified Categorical DataFrame
120    // ===========================================================
121
122    // Create a DataFrame directly from categorical data
123    println!("\n--- Creating DataFrame with Categorical Data ---");
124
125    // Create categorical data
126    // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127    let populations = vec!["Low", "Medium", "High"];
128    let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129    let pop_cat = StringCategorical::new(
130        populations_str,
131        None, // Automatically detect
132        true, // Ordered
133    )?;
134
135    // Region data
136    let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139    // Create DataFrame from both categorical data
140    let categoricals = vec![("Population".to_string(), pop_cat)];
141
142    let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144    // Add region column
145    df_cat.add_column(
146        "Region".to_string(),
147        Series::new(regions_str, Some("Region".to_string()))?,
148    )?;
149
150    println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152    // Check if columns are categorical
153    println!(
154        "\nIs 'Population' column categorical: {}",
155        df_cat.is_categorical("Population")
156    );
157    println!(
158        "Is 'Region' column categorical: {}",
159        df_cat.is_categorical("Region")
160    );
161
162    // ===========================================================
163    // Example of Multi-Categorical DataFrame
164    // ===========================================================
165
166    println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168    // Create product and color data as separate categories
169    // Changed: Using false instead of None for the ordered parameter
170    let products = vec!["A", "B", "C"];
171    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172    let product_cat = StringCategorical::new(products_str, None, false)?;
173
174    let colors = vec!["Red", "Blue", "Green"];
175    let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176    let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178    // Create a DataFrame containing both categories
179    let multi_categoricals = vec![
180        ("Product".to_string(), product_cat),
181        ("Color".to_string(), color_cat),
182    ];
183
184    let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186    println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187    println!(
188        "\nIs 'Product' column categorical: {}",
189        multi_df.is_categorical("Product")
190    );
191    println!(
192        "Is 'Color' column categorical: {}",
193        multi_df.is_categorical("Color")
194    );
195
196    // ===========================================================
197    // Aggregation and Analysis of Categorical Data
198    // ===========================================================
199
200    println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202    // Start with a simple DataFrame
203    let mut df_simple = DataFrame::new();
204
205    // Add product data
206    let products = vec!["A", "B", "C", "A", "B"];
207    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208    let sales = vec!["100", "150", "200", "120", "180"];
209    let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211    df_simple.add_column(
212        "Product".to_string(),
213        Series::new(products_str.clone(), Some("Product".to_string()))?,
214    )?;
215    df_simple.add_column(
216        "Sales".to_string(),
217        Series::new(sales_str, Some("Sales".to_string()))?,
218    )?;
219
220    println!("Original DataFrame:\n{:?}", df_simple);
221
222    // Aggregate by product
223    let product_counts = df_simple.value_counts("Product")?;
224    println!("\nProduct Counts:\n{:?}", product_counts);
225
226    // Transformation and interaction between categorical and series
227    println!("\n--- Interaction between Categorical and Series ---");
228
229    // Create a simple categorical series
230    // Changed: Using false instead of None for the ordered parameter
231    let letter_cat = StringCategorical::new(
232        vec!["A".to_string(), "B".to_string(), "C".to_string()],
233        None,
234        false,
235    )?;
236
237    // Convert to series
238    let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239    println!("Converted from categorical to series: {:?}", letter_series);
240
241    // Additional information about categorical data
242    println!("\n--- Characteristics of Categorical Data ---");
243    println!(
244        "Categorical data is stored in memory only once, regardless of repeated string values."
245    );
246    println!(
247        "This makes it particularly efficient for datasets with many duplicate string values."
248    );
249    println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251    println!("\n=== Sample Complete ===");
252    Ok(())
253}
Source

pub fn value_counts(&self, column_name: &str) -> Result<Series<usize>>

Calculate the occurrence count of a column

Examples found in repository?
examples/categorical_example.rs (line 223)
8fn main() -> Result<()> {
9    println!("=== Example of Using Categorical Data Type ===\n");
10
11    // ===========================================================
12    // Creating Basic Categorical Data
13    // ===========================================================
14
15    println!("--- Creating Basic Categorical Data ---");
16    let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17    let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19    // Create categorical data (unique values are automatically extracted)
20    // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21    let cat = StringCategorical::new(
22        values_str, None,  // Automatically detect categories
23        false, // Unordered
24    )?;
25
26    println!("Original Data: {:?}", values);
27    println!("Categories: {:?}", cat.categories());
28    println!("Order Type: {:?}", cat.ordered());
29    println!("Data Length: {}", cat.len());
30
31    // Retrieve actual values from categorical data
32    println!(
33        "\nFirst 3 values: {} {} {}",
34        cat.get(0).unwrap_or(&"None".to_string()),
35        cat.get(1).unwrap_or(&"None".to_string()),
36        cat.get(2).unwrap_or(&"None".to_string())
37    );
38    println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40    // ===========================================================
41    // Creating with Explicit Category List
42    // ===========================================================
43
44    println!("\n--- Creating with Explicit Category List ---");
45    let values2 = vec!["Red", "Blue", "Red"];
46    let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48    // Define all categories beforehand
49    let categories = vec!["Red", "Blue", "Green", "Yellow"];
50    let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52    // Create ordered categorical data
53    // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54    let cat2 = StringCategorical::new(
55        values2_str,
56        Some(categories_str), // Explicit category list
57        true,                 // Ordered
58    )?;
59
60    println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61    println!("Codes: {:?}", cat2.codes());
62
63    // ===========================================================
64    // Operations on Categorical Data
65    // ===========================================================
66
67    println!("\n--- Example of Categorical Operations ---");
68
69    // Base categorical data
70    // Changed: Using false instead of None for the ordered parameter
71    let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72    let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73    let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75    println!("Original Categories: {:?}", fruit_cat.categories());
76
77    // Add categories
78    let new_cats = vec!["Grape", "Strawberry"];
79    let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80    fruit_cat.add_categories(new_cats_str)?;
81
82    println!("Categories after addition: {:?}", fruit_cat.categories());
83
84    // Change category order
85    let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86    let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87    fruit_cat.reorder_categories(reordered_str)?;
88
89    println!("Categories after reordering: {:?}", fruit_cat.categories());
90    println!("Codes: {:?}", fruit_cat.codes());
91
92    // ===========================================================
93    // Integration with DataFrame
94    // ===========================================================
95
96    println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98    // Create a basic DataFrame
99    let mut df = DataFrame::new();
100
101    // Add regular columns
102    let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104    let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105    let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107    df.add_column(
108        "Region".to_string(),
109        Series::new(regions_str, Some("Region".to_string()))?,
110    )?;
111    df.add_column(
112        "Population".to_string(),
113        Series::new(pop_str, Some("Population".to_string()))?,
114    )?;
115
116    println!("Original DataFrame:\n{:?}", df);
117
118    // ===========================================================
119    // Creating Simplified Categorical DataFrame
120    // ===========================================================
121
122    // Create a DataFrame directly from categorical data
123    println!("\n--- Creating DataFrame with Categorical Data ---");
124
125    // Create categorical data
126    // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127    let populations = vec!["Low", "Medium", "High"];
128    let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129    let pop_cat = StringCategorical::new(
130        populations_str,
131        None, // Automatically detect
132        true, // Ordered
133    )?;
134
135    // Region data
136    let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137    let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139    // Create DataFrame from both categorical data
140    let categoricals = vec![("Population".to_string(), pop_cat)];
141
142    let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144    // Add region column
145    df_cat.add_column(
146        "Region".to_string(),
147        Series::new(regions_str, Some("Region".to_string()))?,
148    )?;
149
150    println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152    // Check if columns are categorical
153    println!(
154        "\nIs 'Population' column categorical: {}",
155        df_cat.is_categorical("Population")
156    );
157    println!(
158        "Is 'Region' column categorical: {}",
159        df_cat.is_categorical("Region")
160    );
161
162    // ===========================================================
163    // Example of Multi-Categorical DataFrame
164    // ===========================================================
165
166    println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168    // Create product and color data as separate categories
169    // Changed: Using false instead of None for the ordered parameter
170    let products = vec!["A", "B", "C"];
171    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172    let product_cat = StringCategorical::new(products_str, None, false)?;
173
174    let colors = vec!["Red", "Blue", "Green"];
175    let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176    let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178    // Create a DataFrame containing both categories
179    let multi_categoricals = vec![
180        ("Product".to_string(), product_cat),
181        ("Color".to_string(), color_cat),
182    ];
183
184    let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186    println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187    println!(
188        "\nIs 'Product' column categorical: {}",
189        multi_df.is_categorical("Product")
190    );
191    println!(
192        "Is 'Color' column categorical: {}",
193        multi_df.is_categorical("Color")
194    );
195
196    // ===========================================================
197    // Aggregation and Analysis of Categorical Data
198    // ===========================================================
199
200    println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202    // Start with a simple DataFrame
203    let mut df_simple = DataFrame::new();
204
205    // Add product data
206    let products = vec!["A", "B", "C", "A", "B"];
207    let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208    let sales = vec!["100", "150", "200", "120", "180"];
209    let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211    df_simple.add_column(
212        "Product".to_string(),
213        Series::new(products_str.clone(), Some("Product".to_string()))?,
214    )?;
215    df_simple.add_column(
216        "Sales".to_string(),
217        Series::new(sales_str, Some("Sales".to_string()))?,
218    )?;
219
220    println!("Original DataFrame:\n{:?}", df_simple);
221
222    // Aggregate by product
223    let product_counts = df_simple.value_counts("Product")?;
224    println!("\nProduct Counts:\n{:?}", product_counts);
225
226    // Transformation and interaction between categorical and series
227    println!("\n--- Interaction between Categorical and Series ---");
228
229    // Create a simple categorical series
230    // Changed: Using false instead of None for the ordered parameter
231    let letter_cat = StringCategorical::new(
232        vec!["A".to_string(), "B".to_string(), "C".to_string()],
233        None,
234        false,
235    )?;
236
237    // Convert to series
238    let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239    println!("Converted from categorical to series: {:?}", letter_series);
240
241    // Additional information about categorical data
242    println!("\n--- Characteristics of Categorical Data ---");
243    println!(
244        "Categorical data is stored in memory only once, regardless of repeated string values."
245    );
246    println!(
247        "This makes it particularly efficient for datasets with many duplicate string values."
248    );
249    println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251    println!("\n=== Sample Complete ===");
252    Ok(())
253}
Source§

impl DataFrame

Parallel processing extension: DataFrame parallel processing

Source

pub fn par_apply<F>(&self, f: F) -> Result<DataFrame>
where F: Fn(&str, usize, &str) -> String + Send + Sync,

Apply a function to all columns in parallel

Examples found in repository?
examples/parallel_example.rs (lines 58-72)
4fn main() -> Result<(), Box<dyn Error>> {
5    println!("=== Example of Parallel Processing Features ===\n");
6
7    // Create sample data
8    let numbers = Series::new(
9        vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10        Some("numbers".to_string()),
11    )?;
12
13    // Parallel map: square each number
14    println!("Example of parallel map processing:");
15    let squared = numbers.par_map(|x| x * x);
16    println!("Original values: {:?}", numbers.values());
17    println!("Squared values: {:?}", squared.values());
18
19    // Parallel filter: keep only even numbers
20    println!("\nExample of parallel filtering:");
21    let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22    println!("Even Numbers: {:?}", even_numbers.values());
23
24    // Processing data containing NA
25    let na_data = vec![
26        NA::Value(10),
27        NA::Value(20),
28        NA::NA,
29        NA::Value(40),
30        NA::NA,
31        NA::Value(60),
32    ];
33    let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35    println!("\nParallel processing of data containing NA:");
36    let na_tripled = na_series.par_map(|x| x * 3);
37    println!("Original values: {:?}", na_series.values());
38    println!("Tripled values: {:?}", na_tripled.values());
39
40    // Parallel processing of DataFrame
41    println!("\nParallel processing of DataFrame:");
42
43    // Creating a sample DataFrame
44    let mut df = DataFrame::new();
45    let names = Series::new(
46        vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47        Some("name".to_string()),
48    )?;
49    let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50    let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52    df.add_column("name".to_string(), names)?;
53    df.add_column("age".to_string(), ages)?;
54    df.add_column("score".to_string(), scores)?;
55
56    // Parallel transformation of DataFrame
57    println!("Example of DataFrame.par_apply:");
58    let transformed_df = df.par_apply(|col, _row, val| {
59        match col {
60            "age" => {
61                // Add 1 to age
62                let age: i32 = val.parse().unwrap_or(0);
63                (age + 1).to_string()
64            }
65            "score" => {
66                // Add 5 to score
67                let score: i32 = val.parse().unwrap_or(0);
68                (score + 5).to_string()
69            }
70            _ => val.to_string(),
71        }
72    })?;
73
74    println!(
75        "Original DF row count: {}, column count: {}",
76        df.row_count(),
77        df.column_count()
78    );
79    println!(
80        "Transformed DF row count: {}, column count: {}",
81        transformed_df.row_count(),
82        transformed_df.column_count()
83    );
84
85    // Filtering rows
86    println!("\nExample of DataFrame.par_filter_rows:");
87    let filtered_df = df.par_filter_rows(|row| {
88        // Keep only rows where score > 85
89        if let Ok(values) = df.get_column_numeric_values("score") {
90            if row < values.len() {
91                return values[row] > 85.0;
92            }
93        }
94        false
95    })?;
96
97    println!("Row count after filtering: {}", filtered_df.row_count());
98
99    // Example of using ParallelUtils
100    println!("\nExample of ParallelUtils features:");
101
102    let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103    let sorted = ParallelUtils::par_sort(unsorted.clone());
104    println!("Before sorting: {:?}", unsorted);
105    println!("After sorting: {:?}", sorted);
106
107    let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108    let sum = ParallelUtils::par_sum(&numbers_vec);
109    let mean = ParallelUtils::par_mean(&numbers_vec);
110    let min = ParallelUtils::par_min(&numbers_vec);
111    let max = ParallelUtils::par_max(&numbers_vec);
112
113    println!("Sum: {}", sum);
114    println!("Mean: {}", mean.unwrap());
115    println!("Min: {}", min.unwrap());
116    println!("Max: {}", max.unwrap());
117
118    println!("\n=== Example of Parallel Processing Features Complete ===");
119    Ok(())
120}
Source

pub fn par_filter_rows<F>(&self, f: F) -> Result<DataFrame>
where F: Fn(usize) -> bool + Send + Sync,

Filter rows in parallel

Examples found in repository?
examples/parallel_example.rs (lines 87-95)
4fn main() -> Result<(), Box<dyn Error>> {
5    println!("=== Example of Parallel Processing Features ===\n");
6
7    // Create sample data
8    let numbers = Series::new(
9        vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10        Some("numbers".to_string()),
11    )?;
12
13    // Parallel map: square each number
14    println!("Example of parallel map processing:");
15    let squared = numbers.par_map(|x| x * x);
16    println!("Original values: {:?}", numbers.values());
17    println!("Squared values: {:?}", squared.values());
18
19    // Parallel filter: keep only even numbers
20    println!("\nExample of parallel filtering:");
21    let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22    println!("Even Numbers: {:?}", even_numbers.values());
23
24    // Processing data containing NA
25    let na_data = vec![
26        NA::Value(10),
27        NA::Value(20),
28        NA::NA,
29        NA::Value(40),
30        NA::NA,
31        NA::Value(60),
32    ];
33    let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35    println!("\nParallel processing of data containing NA:");
36    let na_tripled = na_series.par_map(|x| x * 3);
37    println!("Original values: {:?}", na_series.values());
38    println!("Tripled values: {:?}", na_tripled.values());
39
40    // Parallel processing of DataFrame
41    println!("\nParallel processing of DataFrame:");
42
43    // Creating a sample DataFrame
44    let mut df = DataFrame::new();
45    let names = Series::new(
46        vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47        Some("name".to_string()),
48    )?;
49    let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50    let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52    df.add_column("name".to_string(), names)?;
53    df.add_column("age".to_string(), ages)?;
54    df.add_column("score".to_string(), scores)?;
55
56    // Parallel transformation of DataFrame
57    println!("Example of DataFrame.par_apply:");
58    let transformed_df = df.par_apply(|col, _row, val| {
59        match col {
60            "age" => {
61                // Add 1 to age
62                let age: i32 = val.parse().unwrap_or(0);
63                (age + 1).to_string()
64            }
65            "score" => {
66                // Add 5 to score
67                let score: i32 = val.parse().unwrap_or(0);
68                (score + 5).to_string()
69            }
70            _ => val.to_string(),
71        }
72    })?;
73
74    println!(
75        "Original DF row count: {}, column count: {}",
76        df.row_count(),
77        df.column_count()
78    );
79    println!(
80        "Transformed DF row count: {}, column count: {}",
81        transformed_df.row_count(),
82        transformed_df.column_count()
83    );
84
85    // Filtering rows
86    println!("\nExample of DataFrame.par_filter_rows:");
87    let filtered_df = df.par_filter_rows(|row| {
88        // Keep only rows where score > 85
89        if let Ok(values) = df.get_column_numeric_values("score") {
90            if row < values.len() {
91                return values[row] > 85.0;
92            }
93        }
94        false
95    })?;
96
97    println!("Row count after filtering: {}", filtered_df.row_count());
98
99    // Example of using ParallelUtils
100    println!("\nExample of ParallelUtils features:");
101
102    let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103    let sorted = ParallelUtils::par_sort(unsorted.clone());
104    println!("Before sorting: {:?}", unsorted);
105    println!("After sorting: {:?}", sorted);
106
107    let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108    let sum = ParallelUtils::par_sum(&numbers_vec);
109    let mean = ParallelUtils::par_mean(&numbers_vec);
110    let min = ParallelUtils::par_min(&numbers_vec);
111    let max = ParallelUtils::par_max(&numbers_vec);
112
113    println!("Sum: {}", sum);
114    println!("Mean: {}", mean.unwrap());
115    println!("Min: {}", min.unwrap());
116    println!("Max: {}", max.unwrap());
117
118    println!("\n=== Example of Parallel Processing Features Complete ===");
119    Ok(())
120}
Source

pub fn par_groupby<K>(&self, key_func: K) -> Result<HashMap<String, DataFrame>>
where K: Fn(usize) -> String + Send + Sync,

Execute groupby operation in parallel

Source§

impl DataFrame

DataFrame extension: Pivot table functionality

Source

pub fn pivot_table( &self, index: &str, columns: &str, values: &str, aggfunc: AggFunction, ) -> Result<DataFrame>

Create a pivot table

Examples found in repository?
examples/pivot_example.rs (line 65)
5fn main() -> Result<()> {
6    println!("=== Pivot Table and Grouping Example ===");
7
8    // Create sample data
9    let mut df = DataFrame::new();
10
11    // Create column data
12    let category = Series::new(
13        vec![
14            "A".to_string(),
15            "B".to_string(),
16            "A".to_string(),
17            "C".to_string(),
18            "B".to_string(),
19            "A".to_string(),
20            "C".to_string(),
21            "B".to_string(),
22        ],
23        Some("category".to_string()),
24    )?;
25
26    let region = Series::new(
27        vec![
28            "East".to_string(),
29            "West".to_string(),
30            "West".to_string(),
31            "East".to_string(),
32            "East".to_string(),
33            "West".to_string(),
34            "West".to_string(),
35            "East".to_string(),
36        ],
37        Some("region".to_string()),
38    )?;
39
40    let sales = Series::new(
41        vec![100, 150, 200, 120, 180, 90, 250, 160],
42        Some("sales".to_string()),
43    )?;
44
45    // Add columns to DataFrame
46    df.add_column("category".to_string(), category)?;
47    df.add_column("region".to_string(), region)?;
48    df.add_column("sales".to_string(), sales)?;
49
50    println!("DataFrame Info:");
51    println!("  Number of columns: {}", df.column_count());
52    println!("  Number of rows: {}", df.row_count());
53    println!("  Column names: {:?}", df.column_names());
54
55    // Grouping and aggregation
56    println!("\n=== Grouping by Category ===");
57    let category_group = df.groupby("category")?;
58
59    println!("Sum by category (in progress):");
60    let _category_sum = category_group.sum(&["sales"])?;
61
62    // Pivot table (in progress)
63    println!("\n=== Pivot Table ===");
64    println!("Sum of sales by category and region (in progress):");
65    let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67    // Note: Pivot table and grouping features are still under development,
68    // so actual results are not displayed
69
70    println!("\n=== Aggregation Function Examples ===");
71    let functions = [
72        AggFunction::Sum,
73        AggFunction::Mean,
74        AggFunction::Min,
75        AggFunction::Max,
76        AggFunction::Count,
77    ];
78
79    for func in &functions {
80        println!(
81            "Aggregation Function: {} ({})",
82            func.name(),
83            match func {
84                AggFunction::Sum => "Sum",
85                AggFunction::Mean => "Mean",
86                AggFunction::Min => "Min",
87                AggFunction::Max => "Max",
88                AggFunction::Count => "Count",
89            }
90        );
91    }
92
93    println!("\n=== Pivot Table Example (Complete) ===");
94    Ok(())
95}
Source

pub fn groupby(&self, by: &str) -> Result<GroupBy<'_>>

Group by specified column

Examples found in repository?
examples/pivot_example.rs (line 57)
5fn main() -> Result<()> {
6    println!("=== Pivot Table and Grouping Example ===");
7
8    // Create sample data
9    let mut df = DataFrame::new();
10
11    // Create column data
12    let category = Series::new(
13        vec![
14            "A".to_string(),
15            "B".to_string(),
16            "A".to_string(),
17            "C".to_string(),
18            "B".to_string(),
19            "A".to_string(),
20            "C".to_string(),
21            "B".to_string(),
22        ],
23        Some("category".to_string()),
24    )?;
25
26    let region = Series::new(
27        vec![
28            "East".to_string(),
29            "West".to_string(),
30            "West".to_string(),
31            "East".to_string(),
32            "East".to_string(),
33            "West".to_string(),
34            "West".to_string(),
35            "East".to_string(),
36        ],
37        Some("region".to_string()),
38    )?;
39
40    let sales = Series::new(
41        vec![100, 150, 200, 120, 180, 90, 250, 160],
42        Some("sales".to_string()),
43    )?;
44
45    // Add columns to DataFrame
46    df.add_column("category".to_string(), category)?;
47    df.add_column("region".to_string(), region)?;
48    df.add_column("sales".to_string(), sales)?;
49
50    println!("DataFrame Info:");
51    println!("  Number of columns: {}", df.column_count());
52    println!("  Number of rows: {}", df.row_count());
53    println!("  Column names: {:?}", df.column_names());
54
55    // Grouping and aggregation
56    println!("\n=== Grouping by Category ===");
57    let category_group = df.groupby("category")?;
58
59    println!("Sum by category (in progress):");
60    let _category_sum = category_group.sum(&["sales"])?;
61
62    // Pivot table (in progress)
63    println!("\n=== Pivot Table ===");
64    println!("Sum of sales by category and region (in progress):");
65    let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67    // Note: Pivot table and grouping features are still under development,
68    // so actual results are not displayed
69
70    println!("\n=== Aggregation Function Examples ===");
71    let functions = [
72        AggFunction::Sum,
73        AggFunction::Mean,
74        AggFunction::Min,
75        AggFunction::Max,
76        AggFunction::Count,
77    ];
78
79    for func in &functions {
80        println!(
81            "Aggregation Function: {} ({})",
82            func.name(),
83            match func {
84                AggFunction::Sum => "Sum",
85                AggFunction::Mean => "Mean",
86                AggFunction::Min => "Min",
87                AggFunction::Max => "Max",
88                AggFunction::Count => "Count",
89            }
90        );
91    }
92
93    println!("\n=== Pivot Table Example (Complete) ===");
94    Ok(())
95}
Source§

impl DataFrame

Source

pub fn plot_xy<P: AsRef<Path>>( &self, x_col: &str, y_col: &str, path: P, config: PlotConfig, ) -> Result<()>

👎Deprecated since 0.1.0-alpha.2: Use DataFrame.scatter_xy() instead

Plot two columns as XY coordinates

Note: This implementation is kept for backward compatibility. New code should use the scatter_xy method instead.

Source

pub fn plot_lines<P: AsRef<Path>>( &self, columns: &[&str], path: P, config: PlotConfig, ) -> Result<()>

👎Deprecated since 0.1.0-alpha.2: Use DataFrame.multi_line_plot() instead

Draw line graphs for multiple columns

Note: This implementation is kept for backward compatibility. New code should use the multi_line_plot method instead.

Trait Implementations§

Source§

impl ApplyExt for DataFrame

Implementation of ApplyExt for DataFrame

Source§

fn apply<F, R>( &self, f: F, axis: Axis, result_name: Option<String>, ) -> Result<Series<R>>
where F: Fn(&Series<String>) -> R, R: Debug + Clone,

Apply a function to each column or row
Source§

fn applymap<F, R>(&self, f: F) -> Result<Self>
where F: Fn(&str) -> R, R: Debug + Clone + ToString,

Apply a function to each element
Source§

fn mask<F>(&self, condition: F, other: &str) -> Result<Self>
where F: Fn(&str) -> bool,

Replace values based on a condition
Source§

fn where_func<F>(&self, condition: F, other: &str) -> Result<Self>
where F: Fn(&str) -> bool,

Replace values based on a condition (inverse of mask)
Source§

fn replace(&self, replace_map: &HashMap<String, String>) -> Result<Self>

Replace values with corresponding values
Source§

fn duplicated( &self, subset: Option<&[String]>, keep: Option<&str>, ) -> Result<Series<bool>>

Detect duplicate rows
Source§

fn drop_duplicates( &self, subset: Option<&[String]>, keep: Option<&str>, ) -> Result<Self>

Drop duplicate rows
Source§

fn rolling( &self, window_size: usize, column_name: &str, operation: &str, result_column: Option<&str>, ) -> Result<Self>

Apply a fixed-length window (rolling window) operation
Source§

fn expanding( &self, min_periods: usize, column_name: &str, operation: &str, result_column: Option<&str>, ) -> Result<Self>

Apply an expanding window operation
Source§

fn ewm( &self, column_name: &str, operation: &str, span: Option<usize>, alpha: Option<f64>, result_column: Option<&str>, ) -> Result<Self>

Apply an exponentially weighted window operation
Source§

impl Clone for DataFrame

Source§

fn clone(&self) -> DataFrame

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl DataFramePlotExt for DataFrame

Source§

fn plot_column<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Plot a column from this DataFrame with minimal configuration
Source§

fn line_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create a line plot for a column
Source§

fn scatter_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create a scatter plot for a column
Source§

fn bar_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create a bar plot for a column
Source§

fn area_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create an area plot for a column
Source§

fn box_plot<P: AsRef<Path>>( &self, _value_column: &str, _category_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create a box plot for a column grouped by a category
Source§

fn scatter_xy<P: AsRef<Path>>( &self, _x_column: &str, _y_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>

Create a scatter plot between two columns
Source§

fn multi_line_plot<P: AsRef<Path>>( &self, _columns: &[&str], _path: P, _title: Option<&str>, ) -> Result<()>

Create a line plot for multiple columns
Source§

fn plot_svg<P: AsRef<Path>>( &self, _column: &str, _path: P, _plot_kind: PlotKind, _title: Option<&str>, ) -> Result<()>

Save the plot as SVG format
Source§

impl Debug for DataFrame

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl JoinExt for DataFrame

Implementation of JoinExt for DataFrame

Source§

fn join(&self, other: &Self, on: &str, join_type: JoinType) -> Result<Self>

Join two DataFrames
Source§

fn inner_join(&self, other: &Self, on: &str) -> Result<Self>

Perform inner join
Source§

fn left_join(&self, other: &Self, on: &str) -> Result<Self>

Perform left join
Source§

fn right_join(&self, other: &Self, on: &str) -> Result<Self>

Perform right join
Source§

fn outer_join(&self, other: &Self, on: &str) -> Result<Self>

Perform outer join
Source§

impl SerializeExt for DataFrame

Source§

fn to_csv<P: AsRef<Path>>(&self, _path: P) -> Result<()>

Save DataFrame to a CSV file
Source§

fn from_csv<P: AsRef<Path>>(_path: P, has_header: bool) -> Result<Self>

Load DataFrame from a CSV file
Source§

fn to_json(&self) -> Result<String>

Convert DataFrame to JSON string
Source§

fn from_json(json: &str) -> Result<Self>

Create DataFrame from JSON string
Source§

fn to_parquet<P: AsRef<Path>>(&self, path: P) -> Result<()>

Save DataFrame to a Parquet file
Source§

fn from_parquet<P: AsRef<Path>>(path: P) -> Result<Self>

Load DataFrame from a Parquet file
Source§

impl TransformExt for DataFrame

Implementation of TransformExt for DataFrame

Source§

fn melt(&self, options: &MeltOptions) -> Result<Self>

Transform DataFrame to long format (wide to long)
Source§

fn stack(&self, options: &StackOptions) -> Result<Self>

Stack DataFrame (columns to rows)
Source§

fn unstack(&self, options: &UnstackOptions) -> Result<Self>

Unstack DataFrame (rows to columns)
Source§

fn conditional_aggregate<F, G>( &self, group_by: &str, agg_column: &str, filter_fn: F, agg_fn: G, ) -> Result<Self>
where F: Fn(&HashMap<String, String>) -> bool, G: Fn(&[String]) -> String,

Aggregate values based on conditions (combination of pivot and filtering)
Source§

fn concat(dfs: &[&Self], ignore_index: bool) -> Result<Self>

Concatenate multiple DataFrames along rows
Source§

impl ViewExt for DataFrame

Source§

fn get_column_view<'a>(&'a self, column_name: &str) -> Result<ColumnView<'a>>

Get a column view
Source§

fn head(&self, n: usize) -> Result<Self>

Get a head view (first n rows)
Source§

fn tail(&self, n: usize) -> Result<Self>

Get a tail view (last n rows)

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V