pub struct DataFrame { /* private fields */ }
Expand description
DataFrame struct: Column-oriented 2D data structure
Implementations§
Source§impl DataFrame
impl DataFrame
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new empty DataFrame
Examples found in repository?
81fn create_sample_dataframe() -> Result<DataFrame> {
82 let mut df = DataFrame::new();
83
84 // Add date column
85 let dates = vec![
86 "2023-01-01",
87 "2023-01-02",
88 "2023-01-03",
89 "2023-01-04",
90 "2023-01-05",
91 "2023-01-06",
92 "2023-01-07",
93 "2023-01-08",
94 "2023-01-09",
95 "2023-01-10",
96 ];
97 let date_series = Series::new(dates, Some("Date".to_string()))?;
98 df.add_column("Date".to_string(), date_series)?;
99
100 // Add product column
101 let products = vec![
102 "ProductA", "ProductB", "ProductA", "ProductC", "ProductB", "ProductA", "ProductC",
103 "ProductA", "ProductB", "ProductC",
104 ];
105 let product_series = Series::new(products, Some("Product".to_string()))?;
106 df.add_column("Product".to_string(), product_series)?;
107
108 // Add price column
109 let prices = vec![
110 "100", "150", "110", "200", "160", "120", "210", "115", "165", "220",
111 ];
112 let price_series = Series::new(prices, Some("Price".to_string()))?;
113 df.add_column("Price".to_string(), price_series)?;
114
115 // Add quantity column
116 let quantities = vec!["5", "3", "6", "2", "4", "7", "3", "8", "5", "4"];
117 let quantity_series = Series::new(quantities, Some("Quantity".to_string()))?;
118 df.add_column("Quantity".to_string(), quantity_series)?;
119
120 Ok(df)
121}
More examples
96fn create_sample_dataframe(size: usize) -> Result<DataFrame> {
97 // Create data for columns
98 let mut x1 = Vec::with_capacity(size);
99 let mut x2 = Vec::with_capacity(size);
100 let mut x3 = Vec::with_capacity(size);
101 let mut x4 = Vec::with_capacity(size);
102 let mut y = Vec::with_capacity(size);
103
104 for i in 0..size {
105 // Create features with some correlation to the target
106 let x1_val = (i % 100) as f64 / 100.0;
107 let x2_val = ((i * 2) % 100) as f64 / 100.0;
108 let x3_val = ((i * 3) % 100) as f64 / 100.0;
109 let x4_val = ((i * 5) % 100) as f64 / 100.0;
110
111 // Create a target variable that depends on the features
112 let y_val = 2.0 * x1_val + 1.5 * x2_val - 0.5 * x3_val
113 + 3.0 * x4_val
114 + 0.1 * (rand::random::<f64>() - 0.5); // Add some noise
115
116 x1.push(x1_val);
117 x2.push(x2_val);
118 x3.push(x3_val);
119 x4.push(x4_val);
120 y.push(y_val);
121 }
122
123 // Create DataFrame
124 let mut df = DataFrame::new();
125 df.add_column("x1".to_string(), Series::new(x1, Some("x1".to_string()))?)?;
126 df.add_column("x2".to_string(), Series::new(x2, Some("x2".to_string()))?)?;
127 df.add_column("x3".to_string(), Series::new(x3, Some("x3".to_string()))?)?;
128 df.add_column("x4".to_string(), Series::new(x4, Some("x4".to_string()))?)?;
129 df.add_column("y".to_string(), Series::new(y, Some("y".to_string()))?)?;
130
131 Ok(df)
132}
22fn descriptive_stats_example() -> Result<()> {
23 println!("1. Descriptive Statistics Sample");
24 println!("-----------------");
25
26 // Create dataset
27 let mut df = DataFrame::new();
28 let values = Series::new(
29 vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30 Some("Values".to_string()),
31 )?;
32
33 df.add_column("Values".to_string(), values)?;
34
35 // Descriptive statistics
36 let stats = pandrs::stats::describe(
37 df.get_column("Values")
38 .unwrap()
39 .values()
40 .iter()
41 .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42 .collect::<Vec<f64>>(),
43 )?;
44
45 // Display results
46 println!("Count: {}", stats.count);
47 println!("Mean: {:.2}", stats.mean);
48 println!("Standard Deviation: {:.2}", stats.std);
49 println!("Min: {:.2}", stats.min);
50 println!("First Quartile: {:.2}", stats.q1);
51 println!("Median: {:.2}", stats.median);
52 println!("Third Quartile: {:.2}", stats.q3);
53 println!("Max: {:.2}", stats.max);
54
55 // Covariance and correlation coefficient
56 let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57 let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59 let cov = pandrs::stats::covariance(&data1, &data2)?;
60 let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62 println!("\nCovariance and Correlation Coefficient:");
63 println!("Covariance: {:.4}", cov);
64 println!("Correlation Coefficient: {:.4}", corr);
65
66 println!();
67 Ok(())
68}
69
70fn ttest_example() -> Result<()> {
71 println!("2. t-test Sample");
72 println!("--------------");
73
74 // Create sample data
75 let group1 = vec![5.2, 5.8, 6.1, 5.5, 5.9, 6.2, 5.7, 6.0, 5.6, 5.8];
76 let group2 = vec![4.8, 5.1, 5.3, 4.9, 5.0, 5.2, 4.7, 5.1, 4.9, 5.0];
77
78 // Perform t-test with significance level 0.05 (5%)
79 let alpha = 0.05;
80
81 // t-test assuming equal variances
82 let result_equal = pandrs::stats::ttest(&group1, &group2, alpha, true)?;
83
84 println!("t-test result assuming equal variances:");
85 print_ttest_result(&result_equal);
86
87 // Welch's t-test (not assuming equal variances)
88 let result_welch = pandrs::stats::ttest(&group1, &group2, alpha, false)?;
89
90 println!("\nWelch's t-test result (not assuming equal variances):");
91 print_ttest_result(&result_welch);
92
93 println!();
94 Ok(())
95}
96
97fn print_ttest_result(result: &TTestResult) {
98 println!("t-statistic: {:.4}", result.statistic);
99 println!("p-value: {:.4}", result.pvalue);
100 println!("Degrees of Freedom: {}", result.df);
101 println!(
102 "Significant: {}",
103 if result.significant { "Yes" } else { "No" }
104 );
105}
106
107fn regression_example() -> Result<()> {
108 println!("3. Regression Analysis Sample");
109 println!("-----------------");
110
111 // Create dataset
112 let mut df = DataFrame::new();
113
114 // Explanatory variables
115 let x1 = Series::new(
116 vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
117 Some("x1".to_string()),
118 )?;
119 let x2 = Series::new(
120 vec![5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0, 32.0],
121 Some("x2".to_string()),
122 )?;
123
124 // Dependent variable (y = 2*x1 + 1.5*x2 + 3 + noise)
125 let mut y_values = Vec::with_capacity(10);
126 let mut rng = rand::rng();
127
128 for i in 0..10 {
129 let noise = rng.random_range(-1.0..1.0);
130 let y_val = 2.0 * (i as f64 + 1.0) + 1.5 * (5.0 + 3.0 * i as f64) + 3.0 + noise;
131 y_values.push(y_val);
132 }
133
134 let y = Series::new(y_values, Some("y".to_string()))?;
135
136 // Add to DataFrame
137 df.add_column("x1".to_string(), x1)?;
138 df.add_column("x2".to_string(), x2)?;
139 df.add_column("y".to_string(), y)?;
140
141 // Perform regression analysis
142 let model = pandrs::stats::linear_regression(&df, "y", &["x1", "x2"])?;
143
144 // Display results
145 println!(
146 "Linear Regression Model: y = {:.4} + {:.4} × x1 + {:.4} × x2",
147 model.intercept, model.coefficients[0], model.coefficients[1]
148 );
149 println!("R²: {:.4}", model.r_squared);
150 println!("Adjusted R²: {:.4}", model.adj_r_squared);
151 println!("p-values of regression coefficients: {:?}", model.p_values);
152
153 // Simple regression example
154 println!("\nSimple Regression Model (x1 only):");
155 let model_simple = pandrs::stats::linear_regression(&df, "y", &["x1"])?;
156 println!(
157 "Linear Regression Model: y = {:.4} + {:.4} × x1",
158 model_simple.intercept, model_simple.coefficients[0]
159 );
160 println!("R²: {:.4}", model_simple.r_squared);
161
162 Ok(())
163}
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
4fn main() -> Result<(), Box<dyn Error>> {
5 // Create a sample DataFrame
6 let mut df = DataFrame::new();
7
8 // Add an integer column
9 let int_data = Series::new(vec![1, 2, 3, 4, 5], Some("id".to_string()))?;
10 df.add_column("id".to_string(), int_data)?;
11
12 // Add a floating-point column
13 let float_data = Series::new(vec![1.1, 2.2, 3.3, 4.4, 5.5], Some("value".to_string()))?;
14 df.add_column("value".to_string(), float_data)?;
15
16 // Add a string column
17 let string_data = Series::new(
18 vec![
19 "A".to_string(),
20 "B".to_string(),
21 "C".to_string(),
22 "D".to_string(),
23 "E".to_string(),
24 ],
25 Some("category".to_string()),
26 )?;
27 df.add_column("category".to_string(), string_data)?;
28
29 println!("Original DataFrame:");
30 println!("{:?}", df);
31
32 // Parquet support is still under development
33 println!("\nNote: Parquet support is currently under development.");
34 println!("It is planned to be available in a future release.");
35
36 /*
37 // Although Parquet functionality is not yet implemented, dependencies have been introduced.
38 // The following code is expected to work in a future version.
39
40 // Write the DataFrame to a Parquet file
41 let path = "example.parquet";
42 match write_parquet(&df, path, Some(ParquetCompression::Snappy)) {
43 Ok(_) => {
44 println!("DataFrame written to {}", path);
45
46 // Read the DataFrame from the Parquet file
47 match read_parquet(path) {
48 Ok(df_read) => {
49 println!("\nDataFrame read from Parquet file:");
50 println!("{:?}", df_read);
51
52 // Verify the results
53 assert_eq!(df.row_count(), df_read.row_count());
54 assert_eq!(df.column_count(), df_read.column_count());
55
56 println!("\nVerification successful: Data matches");
57 },
58 Err(e) => println!("Error reading Parquet file: {}", e),
59 }
60 },
61 Err(e) => println!("Error writing Parquet file: {}", e),
62 }
63 */
64
65 Ok(())
66}
6fn main() -> Result<()> {
7 println!("=== Benchmark with One Million Rows ===\n");
8
9 // Benchmark function
10 fn bench<F>(name: &str, f: F) -> Duration
11 where
12 F: FnOnce() -> (),
13 {
14 println!("Running: {}", name);
15 let start = Instant::now();
16 f();
17 let duration = start.elapsed();
18 println!(" Completed: {:?}\n", duration);
19 duration
20 }
21
22 // Benchmark for creating a DataFrame with one million rows
23 println!("--- DataFrame with One Million Rows ---");
24
25 bench("Creating Series x3 (One Million Rows)", || {
26 let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27 let _ = Series::new(
28 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29 Some("B".to_string()),
30 )
31 .unwrap();
32 let _ = Series::new(
33 (0..1_000_000)
34 .map(|i| format!("val_{}", i))
35 .collect::<Vec<_>>(),
36 Some("C".to_string()),
37 )
38 .unwrap();
39 });
40
41 let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42 let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43 let col_b = Series::new(
44 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45 Some("B".to_string()),
46 )
47 .unwrap();
48 let col_c = Series::new(
49 (0..1_000_000)
50 .map(|i| format!("val_{}", i))
51 .collect::<Vec<_>>(),
52 Some("C".to_string()),
53 )
54 .unwrap();
55
56 let mut df = DataFrame::new();
57 df.add_column("A".to_string(), col_a).unwrap();
58 df.add_column("B".to_string(), col_b).unwrap();
59 df.add_column("C".to_string(), col_c).unwrap();
60 });
61
62 bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63 let mut data = HashMap::new();
64 data.insert(
65 "A".to_string(),
66 (0..1_000_000).map(|n| n.to_string()).collect(),
67 );
68 data.insert(
69 "B".to_string(),
70 (0..1_000_000)
71 .map(|n| format!("{:.1}", n as f64 * 0.5))
72 .collect(),
73 );
74 data.insert(
75 "C".to_string(),
76 (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77 );
78
79 let _ = DataFrame::from_map(data, None).unwrap();
80 });
81
82 println!(
83 "Time to create DataFrame with one million rows in pure Rust: {:?}",
84 large_duration
85 );
86
87 Ok(())
88}
Sourcepub fn with_index(index: Index<String>) -> Self
pub fn with_index(index: Index<String>) -> Self
Create a new DataFrame with a simple index
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== Example of Using MultiIndex ===\n");
6
7 // =========================================
8 // Creating a MultiIndex
9 // =========================================
10
11 println!("--- Creating MultiIndex from Tuples ---");
12
13 // Create MultiIndex from tuples (vector of vectors)
14 let tuples = vec![
15 vec!["A".to_string(), "a".to_string()],
16 vec!["A".to_string(), "b".to_string()],
17 vec!["B".to_string(), "a".to_string()],
18 vec!["B".to_string(), "b".to_string()],
19 ];
20
21 let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22 let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24 println!("MultiIndex: {:?}\n", multi_idx);
25 println!("Number of Levels: {}", multi_idx.n_levels());
26 println!("Number of Rows: {}\n", multi_idx.len());
27
28 // =========================================
29 // Operations on MultiIndex
30 // =========================================
31
32 println!("--- Retrieving Level Values ---");
33 let level0_values = multi_idx.get_level_values(0)?;
34 println!("Values in Level 0: {:?}", level0_values);
35
36 let level1_values = multi_idx.get_level_values(1)?;
37 println!("Values in Level 1: {:?}", level1_values);
38
39 println!("--- Swapping Levels ---");
40 let swapped = multi_idx.swaplevel(0, 1)?;
41 println!("After Swapping Levels: {:?}\n", swapped);
42
43 // =========================================
44 // DataFrame with MultiIndex
45 // =========================================
46
47 println!("--- DataFrame with MultiIndex ---");
48
49 // Create DataFrame
50 let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52 // Add data
53 let data = vec![
54 "data1".to_string(),
55 "data2".to_string(),
56 "data3".to_string(),
57 "data4".to_string(),
58 ];
59 df.add_column(
60 "data".to_string(),
61 pandrs::Series::new(data, Some("data".to_string()))?,
62 )?;
63
64 println!("DataFrame: {:?}\n", df);
65 println!("Number of Rows: {}", df.row_count());
66 println!("Number of Columns: {}", df.column_count());
67
68 // =========================================
69 // Conversion Between Simple Index and MultiIndex
70 // =========================================
71
72 println!("\n--- Example of Index Conversion ---");
73
74 // Create DataFrame from simple index
75 let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76 let mut simple_df = DataFrame::with_index(simple_idx);
77
78 // Add data
79 let values = vec![100, 200, 300];
80 let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81 simple_df.add_column(
82 "values".to_string(),
83 pandrs::Series::new(str_values, Some("values".to_string()))?,
84 )?;
85
86 println!("Simple Index DataFrame: {:?}", simple_df);
87
88 // Prepare for conversion to MultiIndex
89 let tuples = vec![
90 vec!["Category".to_string(), "X".to_string()],
91 vec!["Category".to_string(), "Y".to_string()],
92 vec!["Category".to_string(), "Z".to_string()],
93 ];
94
95 // Create and set MultiIndex
96 let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97 simple_df.set_multi_index(new_multi_idx)?;
98
99 println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101 println!("\n=== Sample Complete ===");
102 Ok(())
103}
Sourcepub fn with_multi_index(multi_index: MultiIndex<String>) -> Self
pub fn with_multi_index(multi_index: MultiIndex<String>) -> Self
Create a new DataFrame with a multi index
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== Example of Using MultiIndex ===\n");
6
7 // =========================================
8 // Creating a MultiIndex
9 // =========================================
10
11 println!("--- Creating MultiIndex from Tuples ---");
12
13 // Create MultiIndex from tuples (vector of vectors)
14 let tuples = vec![
15 vec!["A".to_string(), "a".to_string()],
16 vec!["A".to_string(), "b".to_string()],
17 vec!["B".to_string(), "a".to_string()],
18 vec!["B".to_string(), "b".to_string()],
19 ];
20
21 let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22 let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24 println!("MultiIndex: {:?}\n", multi_idx);
25 println!("Number of Levels: {}", multi_idx.n_levels());
26 println!("Number of Rows: {}\n", multi_idx.len());
27
28 // =========================================
29 // Operations on MultiIndex
30 // =========================================
31
32 println!("--- Retrieving Level Values ---");
33 let level0_values = multi_idx.get_level_values(0)?;
34 println!("Values in Level 0: {:?}", level0_values);
35
36 let level1_values = multi_idx.get_level_values(1)?;
37 println!("Values in Level 1: {:?}", level1_values);
38
39 println!("--- Swapping Levels ---");
40 let swapped = multi_idx.swaplevel(0, 1)?;
41 println!("After Swapping Levels: {:?}\n", swapped);
42
43 // =========================================
44 // DataFrame with MultiIndex
45 // =========================================
46
47 println!("--- DataFrame with MultiIndex ---");
48
49 // Create DataFrame
50 let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52 // Add data
53 let data = vec![
54 "data1".to_string(),
55 "data2".to_string(),
56 "data3".to_string(),
57 "data4".to_string(),
58 ];
59 df.add_column(
60 "data".to_string(),
61 pandrs::Series::new(data, Some("data".to_string()))?,
62 )?;
63
64 println!("DataFrame: {:?}\n", df);
65 println!("Number of Rows: {}", df.row_count());
66 println!("Number of Columns: {}", df.column_count());
67
68 // =========================================
69 // Conversion Between Simple Index and MultiIndex
70 // =========================================
71
72 println!("\n--- Example of Index Conversion ---");
73
74 // Create DataFrame from simple index
75 let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76 let mut simple_df = DataFrame::with_index(simple_idx);
77
78 // Add data
79 let values = vec![100, 200, 300];
80 let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81 simple_df.add_column(
82 "values".to_string(),
83 pandrs::Series::new(str_values, Some("values".to_string()))?,
84 )?;
85
86 println!("Simple Index DataFrame: {:?}", simple_df);
87
88 // Prepare for conversion to MultiIndex
89 let tuples = vec![
90 vec!["Category".to_string(), "X".to_string()],
91 vec!["Category".to_string(), "Y".to_string()],
92 vec!["Category".to_string(), "Z".to_string()],
93 ];
94
95 // Create and set MultiIndex
96 let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97 simple_df.set_multi_index(new_multi_idx)?;
98
99 println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101 println!("\n=== Sample Complete ===");
102 Ok(())
103}
Sourcepub fn contains_column(&self, column_name: &str) -> bool
pub fn contains_column(&self, column_name: &str) -> bool
Check if the DataFrame contains a column with the given name
Sourcepub fn row_count(&self) -> usize
pub fn row_count(&self) -> usize
Get the number of rows in the DataFrame
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
More examples
5fn main() -> Result<()> {
6 println!("=== Pivot Table and Grouping Example ===");
7
8 // Create sample data
9 let mut df = DataFrame::new();
10
11 // Create column data
12 let category = Series::new(
13 vec![
14 "A".to_string(),
15 "B".to_string(),
16 "A".to_string(),
17 "C".to_string(),
18 "B".to_string(),
19 "A".to_string(),
20 "C".to_string(),
21 "B".to_string(),
22 ],
23 Some("category".to_string()),
24 )?;
25
26 let region = Series::new(
27 vec![
28 "East".to_string(),
29 "West".to_string(),
30 "West".to_string(),
31 "East".to_string(),
32 "East".to_string(),
33 "West".to_string(),
34 "West".to_string(),
35 "East".to_string(),
36 ],
37 Some("region".to_string()),
38 )?;
39
40 let sales = Series::new(
41 vec![100, 150, 200, 120, 180, 90, 250, 160],
42 Some("sales".to_string()),
43 )?;
44
45 // Add columns to DataFrame
46 df.add_column("category".to_string(), category)?;
47 df.add_column("region".to_string(), region)?;
48 df.add_column("sales".to_string(), sales)?;
49
50 println!("DataFrame Info:");
51 println!(" Number of columns: {}", df.column_count());
52 println!(" Number of rows: {}", df.row_count());
53 println!(" Column names: {:?}", df.column_names());
54
55 // Grouping and aggregation
56 println!("\n=== Grouping by Category ===");
57 let category_group = df.groupby("category")?;
58
59 println!("Sum by category (in progress):");
60 let _category_sum = category_group.sum(&["sales"])?;
61
62 // Pivot table (in progress)
63 println!("\n=== Pivot Table ===");
64 println!("Sum of sales by category and region (in progress):");
65 let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67 // Note: Pivot table and grouping features are still under development,
68 // so actual results are not displayed
69
70 println!("\n=== Aggregation Function Examples ===");
71 let functions = [
72 AggFunction::Sum,
73 AggFunction::Mean,
74 AggFunction::Min,
75 AggFunction::Max,
76 AggFunction::Count,
77 ];
78
79 for func in &functions {
80 println!(
81 "Aggregation Function: {} ({})",
82 func.name(),
83 match func {
84 AggFunction::Sum => "Sum",
85 AggFunction::Mean => "Mean",
86 AggFunction::Min => "Min",
87 AggFunction::Max => "Max",
88 AggFunction::Count => "Count",
89 }
90 );
91 }
92
93 println!("\n=== Pivot Table Example (Complete) ===");
94 Ok(())
95}
4fn main() -> Result<()> {
5 println!("=== Example of Using MultiIndex ===\n");
6
7 // =========================================
8 // Creating a MultiIndex
9 // =========================================
10
11 println!("--- Creating MultiIndex from Tuples ---");
12
13 // Create MultiIndex from tuples (vector of vectors)
14 let tuples = vec![
15 vec!["A".to_string(), "a".to_string()],
16 vec!["A".to_string(), "b".to_string()],
17 vec!["B".to_string(), "a".to_string()],
18 vec!["B".to_string(), "b".to_string()],
19 ];
20
21 let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22 let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24 println!("MultiIndex: {:?}\n", multi_idx);
25 println!("Number of Levels: {}", multi_idx.n_levels());
26 println!("Number of Rows: {}\n", multi_idx.len());
27
28 // =========================================
29 // Operations on MultiIndex
30 // =========================================
31
32 println!("--- Retrieving Level Values ---");
33 let level0_values = multi_idx.get_level_values(0)?;
34 println!("Values in Level 0: {:?}", level0_values);
35
36 let level1_values = multi_idx.get_level_values(1)?;
37 println!("Values in Level 1: {:?}", level1_values);
38
39 println!("--- Swapping Levels ---");
40 let swapped = multi_idx.swaplevel(0, 1)?;
41 println!("After Swapping Levels: {:?}\n", swapped);
42
43 // =========================================
44 // DataFrame with MultiIndex
45 // =========================================
46
47 println!("--- DataFrame with MultiIndex ---");
48
49 // Create DataFrame
50 let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52 // Add data
53 let data = vec![
54 "data1".to_string(),
55 "data2".to_string(),
56 "data3".to_string(),
57 "data4".to_string(),
58 ];
59 df.add_column(
60 "data".to_string(),
61 pandrs::Series::new(data, Some("data".to_string()))?,
62 )?;
63
64 println!("DataFrame: {:?}\n", df);
65 println!("Number of Rows: {}", df.row_count());
66 println!("Number of Columns: {}", df.column_count());
67
68 // =========================================
69 // Conversion Between Simple Index and MultiIndex
70 // =========================================
71
72 println!("\n--- Example of Index Conversion ---");
73
74 // Create DataFrame from simple index
75 let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76 let mut simple_df = DataFrame::with_index(simple_idx);
77
78 // Add data
79 let values = vec![100, 200, 300];
80 let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81 simple_df.add_column(
82 "values".to_string(),
83 pandrs::Series::new(str_values, Some("values".to_string()))?,
84 )?;
85
86 println!("Simple Index DataFrame: {:?}", simple_df);
87
88 // Prepare for conversion to MultiIndex
89 let tuples = vec![
90 vec!["Category".to_string(), "X".to_string()],
91 vec!["Category".to_string(), "Y".to_string()],
92 vec!["Category".to_string(), "Z".to_string()],
93 ];
94
95 // Create and set MultiIndex
96 let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97 simple_df.set_multi_index(new_multi_idx)?;
98
99 println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101 println!("\n=== Sample Complete ===");
102 Ok(())
103}
6fn main() -> Result<()> {
7 // Path to a large CSV file (replace with actual path)
8 let file_path = "examples/data/large_dataset.csv";
9
10 println!("Working with large datasets example");
11 println!("----------------------------------");
12
13 // Create a disk-based DataFrame with custom configuration
14 let config = DiskConfig {
15 memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16 chunk_size: 50_000, // Process in chunks of 50,000 rows
17 use_memory_mapping: true, // Use memory mapping for efficiency
18 temp_dir: None, // Use system temp directory
19 };
20
21 let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23 // Get schema information
24 println!("DataFrame Schema:");
25 for column in disk_df.schema().column_names() {
26 println!(" - {}", column);
27 }
28
29 // Process in chunks for counting rows
30 let mut chunked_df = disk_df.chunked()?;
31 let mut total_rows = 0;
32
33 println!("\nProcessing in chunks:");
34 while let Some(chunk) = chunked_df.next_chunk()? {
35 let chunk_rows = chunk.row_count();
36 total_rows += chunk_rows;
37 println!(" - Processed chunk with {} rows", chunk_rows);
38 }
39
40 println!("\nTotal rows in dataset: {}", total_rows);
41
42 // Example of filtering data
43 println!("\nFiltering data:");
44 let filtered = disk_df.filter(|value, _| {
45 // Example filter: keep only values starting with 'A'
46 value.starts_with('A')
47 })?;
48
49 println!("Filtered result has {} rows", filtered.len());
50
51 // Example of selecting columns
52 println!("\nSelecting columns:");
53 let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54 let selected = disk_df.select(&columns_to_select)?;
55
56 println!("Selected result has {} rows and columns:", selected.len());
57 // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58 if !selected.is_empty() {
59 for column in selected[0].keys() {
60 println!(" - {}", column);
61 }
62 }
63
64 // Example of grouping and aggregation
65 println!("\nGrouping and aggregation:");
66 let grouped = disk_df.group_by("category_column", "value_column", |values| {
67 // Example aggregation: calculate average
68 let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69 let count = values.len();
70
71 if count > 0 {
72 Ok(format!("{:.2}", sum / count as f64))
73 } else {
74 Ok("0.0".to_string())
75 }
76 })?;
77
78 println!("Grouped result has {} groups", grouped.len());
79
80 // Example of parallel processing
81 println!("\nParallel processing example:");
82 let chunk_results = chunked_df.parallel_process(
83 // Process each chunk
84 |chunk| {
85 let mut counts = HashMap::new();
86
87 // Example: count occurrences of values in a column
88 for row_idx in 0..chunk.row_count() {
89 if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90 *counts.entry(value.to_string()).or_insert(0) += 1;
91 }
92 }
93
94 Ok(counts)
95 },
96 // Combine results
97 |chunk_maps| {
98 let mut result_map = HashMap::new();
99
100 // Merge all maps
101 for chunk_map in chunk_maps {
102 for (key, count) in chunk_map {
103 *result_map.entry(key).or_insert(0) += count;
104 }
105 }
106
107 Ok(result_map)
108 },
109 )?;
110
111 println!("Category counts from parallel processing:");
112 for (category, count) in chunk_results.iter().take(5) {
113 println!(" - {}: {}", category, count);
114 }
115
116 Ok(())
117}
4fn main() -> Result<(), Box<dyn Error>> {
5 println!("=== Example of Parallel Processing Features ===\n");
6
7 // Create sample data
8 let numbers = Series::new(
9 vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10 Some("numbers".to_string()),
11 )?;
12
13 // Parallel map: square each number
14 println!("Example of parallel map processing:");
15 let squared = numbers.par_map(|x| x * x);
16 println!("Original values: {:?}", numbers.values());
17 println!("Squared values: {:?}", squared.values());
18
19 // Parallel filter: keep only even numbers
20 println!("\nExample of parallel filtering:");
21 let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22 println!("Even Numbers: {:?}", even_numbers.values());
23
24 // Processing data containing NA
25 let na_data = vec![
26 NA::Value(10),
27 NA::Value(20),
28 NA::NA,
29 NA::Value(40),
30 NA::NA,
31 NA::Value(60),
32 ];
33 let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35 println!("\nParallel processing of data containing NA:");
36 let na_tripled = na_series.par_map(|x| x * 3);
37 println!("Original values: {:?}", na_series.values());
38 println!("Tripled values: {:?}", na_tripled.values());
39
40 // Parallel processing of DataFrame
41 println!("\nParallel processing of DataFrame:");
42
43 // Creating a sample DataFrame
44 let mut df = DataFrame::new();
45 let names = Series::new(
46 vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47 Some("name".to_string()),
48 )?;
49 let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50 let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52 df.add_column("name".to_string(), names)?;
53 df.add_column("age".to_string(), ages)?;
54 df.add_column("score".to_string(), scores)?;
55
56 // Parallel transformation of DataFrame
57 println!("Example of DataFrame.par_apply:");
58 let transformed_df = df.par_apply(|col, _row, val| {
59 match col {
60 "age" => {
61 // Add 1 to age
62 let age: i32 = val.parse().unwrap_or(0);
63 (age + 1).to_string()
64 }
65 "score" => {
66 // Add 5 to score
67 let score: i32 = val.parse().unwrap_or(0);
68 (score + 5).to_string()
69 }
70 _ => val.to_string(),
71 }
72 })?;
73
74 println!(
75 "Original DF row count: {}, column count: {}",
76 df.row_count(),
77 df.column_count()
78 );
79 println!(
80 "Transformed DF row count: {}, column count: {}",
81 transformed_df.row_count(),
82 transformed_df.column_count()
83 );
84
85 // Filtering rows
86 println!("\nExample of DataFrame.par_filter_rows:");
87 let filtered_df = df.par_filter_rows(|row| {
88 // Keep only rows where score > 85
89 if let Ok(values) = df.get_column_numeric_values("score") {
90 if row < values.len() {
91 return values[row] > 85.0;
92 }
93 }
94 false
95 })?;
96
97 println!("Row count after filtering: {}", filtered_df.row_count());
98
99 // Example of using ParallelUtils
100 println!("\nExample of ParallelUtils features:");
101
102 let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103 let sorted = ParallelUtils::par_sort(unsorted.clone());
104 println!("Before sorting: {:?}", unsorted);
105 println!("After sorting: {:?}", sorted);
106
107 let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108 let sum = ParallelUtils::par_sum(&numbers_vec);
109 let mean = ParallelUtils::par_mean(&numbers_vec);
110 let min = ParallelUtils::par_min(&numbers_vec);
111 let max = ParallelUtils::par_max(&numbers_vec);
112
113 println!("Sum: {}", sum);
114 println!("Mean: {}", mean.unwrap());
115 println!("Min: {}", min.unwrap());
116 println!("Max: {}", max.unwrap());
117
118 println!("\n=== Example of Parallel Processing Features Complete ===");
119 Ok(())
120}
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
Sourcepub fn get_string_value(
&self,
column_name: &str,
row_idx: usize,
) -> Result<&str>
pub fn get_string_value( &self, column_name: &str, row_idx: usize, ) -> Result<&str>
Get a string value from the DataFrame
Examples found in repository?
6fn main() -> Result<()> {
7 // Path to a large CSV file (replace with actual path)
8 let file_path = "examples/data/large_dataset.csv";
9
10 println!("Working with large datasets example");
11 println!("----------------------------------");
12
13 // Create a disk-based DataFrame with custom configuration
14 let config = DiskConfig {
15 memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16 chunk_size: 50_000, // Process in chunks of 50,000 rows
17 use_memory_mapping: true, // Use memory mapping for efficiency
18 temp_dir: None, // Use system temp directory
19 };
20
21 let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23 // Get schema information
24 println!("DataFrame Schema:");
25 for column in disk_df.schema().column_names() {
26 println!(" - {}", column);
27 }
28
29 // Process in chunks for counting rows
30 let mut chunked_df = disk_df.chunked()?;
31 let mut total_rows = 0;
32
33 println!("\nProcessing in chunks:");
34 while let Some(chunk) = chunked_df.next_chunk()? {
35 let chunk_rows = chunk.row_count();
36 total_rows += chunk_rows;
37 println!(" - Processed chunk with {} rows", chunk_rows);
38 }
39
40 println!("\nTotal rows in dataset: {}", total_rows);
41
42 // Example of filtering data
43 println!("\nFiltering data:");
44 let filtered = disk_df.filter(|value, _| {
45 // Example filter: keep only values starting with 'A'
46 value.starts_with('A')
47 })?;
48
49 println!("Filtered result has {} rows", filtered.len());
50
51 // Example of selecting columns
52 println!("\nSelecting columns:");
53 let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54 let selected = disk_df.select(&columns_to_select)?;
55
56 println!("Selected result has {} rows and columns:", selected.len());
57 // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58 if !selected.is_empty() {
59 for column in selected[0].keys() {
60 println!(" - {}", column);
61 }
62 }
63
64 // Example of grouping and aggregation
65 println!("\nGrouping and aggregation:");
66 let grouped = disk_df.group_by("category_column", "value_column", |values| {
67 // Example aggregation: calculate average
68 let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69 let count = values.len();
70
71 if count > 0 {
72 Ok(format!("{:.2}", sum / count as f64))
73 } else {
74 Ok("0.0".to_string())
75 }
76 })?;
77
78 println!("Grouped result has {} groups", grouped.len());
79
80 // Example of parallel processing
81 println!("\nParallel processing example:");
82 let chunk_results = chunked_df.parallel_process(
83 // Process each chunk
84 |chunk| {
85 let mut counts = HashMap::new();
86
87 // Example: count occurrences of values in a column
88 for row_idx in 0..chunk.row_count() {
89 if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90 *counts.entry(value.to_string()).or_insert(0) += 1;
91 }
92 }
93
94 Ok(counts)
95 },
96 // Combine results
97 |chunk_maps| {
98 let mut result_map = HashMap::new();
99
100 // Merge all maps
101 for chunk_map in chunk_maps {
102 for (key, count) in chunk_map {
103 *result_map.entry(key).or_insert(0) += count;
104 }
105 }
106
107 Ok(result_map)
108 },
109 )?;
110
111 println!("Category counts from parallel processing:");
112 for (category, count) in chunk_results.iter().take(5) {
113 println!(" - {}: {}", category, count);
114 }
115
116 Ok(())
117}
Sourcepub fn add_column<T: 'static + Debug + Clone + Send + Sync>(
&mut self,
column_name: String,
series: Series<T>,
) -> Result<()>
pub fn add_column<T: 'static + Debug + Clone + Send + Sync>( &mut self, column_name: String, series: Series<T>, ) -> Result<()>
Add a column to the DataFrame
Examples found in repository?
81fn create_sample_dataframe() -> Result<DataFrame> {
82 let mut df = DataFrame::new();
83
84 // Add date column
85 let dates = vec![
86 "2023-01-01",
87 "2023-01-02",
88 "2023-01-03",
89 "2023-01-04",
90 "2023-01-05",
91 "2023-01-06",
92 "2023-01-07",
93 "2023-01-08",
94 "2023-01-09",
95 "2023-01-10",
96 ];
97 let date_series = Series::new(dates, Some("Date".to_string()))?;
98 df.add_column("Date".to_string(), date_series)?;
99
100 // Add product column
101 let products = vec![
102 "ProductA", "ProductB", "ProductA", "ProductC", "ProductB", "ProductA", "ProductC",
103 "ProductA", "ProductB", "ProductC",
104 ];
105 let product_series = Series::new(products, Some("Product".to_string()))?;
106 df.add_column("Product".to_string(), product_series)?;
107
108 // Add price column
109 let prices = vec![
110 "100", "150", "110", "200", "160", "120", "210", "115", "165", "220",
111 ];
112 let price_series = Series::new(prices, Some("Price".to_string()))?;
113 df.add_column("Price".to_string(), price_series)?;
114
115 // Add quantity column
116 let quantities = vec!["5", "3", "6", "2", "4", "7", "3", "8", "5", "4"];
117 let quantity_series = Series::new(quantities, Some("Quantity".to_string()))?;
118 df.add_column("Quantity".to_string(), quantity_series)?;
119
120 Ok(df)
121}
More examples
96fn create_sample_dataframe(size: usize) -> Result<DataFrame> {
97 // Create data for columns
98 let mut x1 = Vec::with_capacity(size);
99 let mut x2 = Vec::with_capacity(size);
100 let mut x3 = Vec::with_capacity(size);
101 let mut x4 = Vec::with_capacity(size);
102 let mut y = Vec::with_capacity(size);
103
104 for i in 0..size {
105 // Create features with some correlation to the target
106 let x1_val = (i % 100) as f64 / 100.0;
107 let x2_val = ((i * 2) % 100) as f64 / 100.0;
108 let x3_val = ((i * 3) % 100) as f64 / 100.0;
109 let x4_val = ((i * 5) % 100) as f64 / 100.0;
110
111 // Create a target variable that depends on the features
112 let y_val = 2.0 * x1_val + 1.5 * x2_val - 0.5 * x3_val
113 + 3.0 * x4_val
114 + 0.1 * (rand::random::<f64>() - 0.5); // Add some noise
115
116 x1.push(x1_val);
117 x2.push(x2_val);
118 x3.push(x3_val);
119 x4.push(x4_val);
120 y.push(y_val);
121 }
122
123 // Create DataFrame
124 let mut df = DataFrame::new();
125 df.add_column("x1".to_string(), Series::new(x1, Some("x1".to_string()))?)?;
126 df.add_column("x2".to_string(), Series::new(x2, Some("x2".to_string()))?)?;
127 df.add_column("x3".to_string(), Series::new(x3, Some("x3".to_string()))?)?;
128 df.add_column("x4".to_string(), Series::new(x4, Some("x4".to_string()))?)?;
129 df.add_column("y".to_string(), Series::new(y, Some("y".to_string()))?)?;
130
131 Ok(df)
132}
22fn descriptive_stats_example() -> Result<()> {
23 println!("1. Descriptive Statistics Sample");
24 println!("-----------------");
25
26 // Create dataset
27 let mut df = DataFrame::new();
28 let values = Series::new(
29 vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30 Some("Values".to_string()),
31 )?;
32
33 df.add_column("Values".to_string(), values)?;
34
35 // Descriptive statistics
36 let stats = pandrs::stats::describe(
37 df.get_column("Values")
38 .unwrap()
39 .values()
40 .iter()
41 .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42 .collect::<Vec<f64>>(),
43 )?;
44
45 // Display results
46 println!("Count: {}", stats.count);
47 println!("Mean: {:.2}", stats.mean);
48 println!("Standard Deviation: {:.2}", stats.std);
49 println!("Min: {:.2}", stats.min);
50 println!("First Quartile: {:.2}", stats.q1);
51 println!("Median: {:.2}", stats.median);
52 println!("Third Quartile: {:.2}", stats.q3);
53 println!("Max: {:.2}", stats.max);
54
55 // Covariance and correlation coefficient
56 let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57 let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59 let cov = pandrs::stats::covariance(&data1, &data2)?;
60 let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62 println!("\nCovariance and Correlation Coefficient:");
63 println!("Covariance: {:.4}", cov);
64 println!("Correlation Coefficient: {:.4}", corr);
65
66 println!();
67 Ok(())
68}
69
70fn ttest_example() -> Result<()> {
71 println!("2. t-test Sample");
72 println!("--------------");
73
74 // Create sample data
75 let group1 = vec![5.2, 5.8, 6.1, 5.5, 5.9, 6.2, 5.7, 6.0, 5.6, 5.8];
76 let group2 = vec![4.8, 5.1, 5.3, 4.9, 5.0, 5.2, 4.7, 5.1, 4.9, 5.0];
77
78 // Perform t-test with significance level 0.05 (5%)
79 let alpha = 0.05;
80
81 // t-test assuming equal variances
82 let result_equal = pandrs::stats::ttest(&group1, &group2, alpha, true)?;
83
84 println!("t-test result assuming equal variances:");
85 print_ttest_result(&result_equal);
86
87 // Welch's t-test (not assuming equal variances)
88 let result_welch = pandrs::stats::ttest(&group1, &group2, alpha, false)?;
89
90 println!("\nWelch's t-test result (not assuming equal variances):");
91 print_ttest_result(&result_welch);
92
93 println!();
94 Ok(())
95}
96
97fn print_ttest_result(result: &TTestResult) {
98 println!("t-statistic: {:.4}", result.statistic);
99 println!("p-value: {:.4}", result.pvalue);
100 println!("Degrees of Freedom: {}", result.df);
101 println!(
102 "Significant: {}",
103 if result.significant { "Yes" } else { "No" }
104 );
105}
106
107fn regression_example() -> Result<()> {
108 println!("3. Regression Analysis Sample");
109 println!("-----------------");
110
111 // Create dataset
112 let mut df = DataFrame::new();
113
114 // Explanatory variables
115 let x1 = Series::new(
116 vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
117 Some("x1".to_string()),
118 )?;
119 let x2 = Series::new(
120 vec![5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0, 32.0],
121 Some("x2".to_string()),
122 )?;
123
124 // Dependent variable (y = 2*x1 + 1.5*x2 + 3 + noise)
125 let mut y_values = Vec::with_capacity(10);
126 let mut rng = rand::rng();
127
128 for i in 0..10 {
129 let noise = rng.random_range(-1.0..1.0);
130 let y_val = 2.0 * (i as f64 + 1.0) + 1.5 * (5.0 + 3.0 * i as f64) + 3.0 + noise;
131 y_values.push(y_val);
132 }
133
134 let y = Series::new(y_values, Some("y".to_string()))?;
135
136 // Add to DataFrame
137 df.add_column("x1".to_string(), x1)?;
138 df.add_column("x2".to_string(), x2)?;
139 df.add_column("y".to_string(), y)?;
140
141 // Perform regression analysis
142 let model = pandrs::stats::linear_regression(&df, "y", &["x1", "x2"])?;
143
144 // Display results
145 println!(
146 "Linear Regression Model: y = {:.4} + {:.4} × x1 + {:.4} × x2",
147 model.intercept, model.coefficients[0], model.coefficients[1]
148 );
149 println!("R²: {:.4}", model.r_squared);
150 println!("Adjusted R²: {:.4}", model.adj_r_squared);
151 println!("p-values of regression coefficients: {:?}", model.p_values);
152
153 // Simple regression example
154 println!("\nSimple Regression Model (x1 only):");
155 let model_simple = pandrs::stats::linear_regression(&df, "y", &["x1"])?;
156 println!(
157 "Linear Regression Model: y = {:.4} + {:.4} × x1",
158 model_simple.intercept, model_simple.coefficients[0]
159 );
160 println!("R²: {:.4}", model_simple.r_squared);
161
162 Ok(())
163}
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
4fn main() -> Result<(), Box<dyn Error>> {
5 // Create a sample DataFrame
6 let mut df = DataFrame::new();
7
8 // Add an integer column
9 let int_data = Series::new(vec![1, 2, 3, 4, 5], Some("id".to_string()))?;
10 df.add_column("id".to_string(), int_data)?;
11
12 // Add a floating-point column
13 let float_data = Series::new(vec![1.1, 2.2, 3.3, 4.4, 5.5], Some("value".to_string()))?;
14 df.add_column("value".to_string(), float_data)?;
15
16 // Add a string column
17 let string_data = Series::new(
18 vec![
19 "A".to_string(),
20 "B".to_string(),
21 "C".to_string(),
22 "D".to_string(),
23 "E".to_string(),
24 ],
25 Some("category".to_string()),
26 )?;
27 df.add_column("category".to_string(), string_data)?;
28
29 println!("Original DataFrame:");
30 println!("{:?}", df);
31
32 // Parquet support is still under development
33 println!("\nNote: Parquet support is currently under development.");
34 println!("It is planned to be available in a future release.");
35
36 /*
37 // Although Parquet functionality is not yet implemented, dependencies have been introduced.
38 // The following code is expected to work in a future version.
39
40 // Write the DataFrame to a Parquet file
41 let path = "example.parquet";
42 match write_parquet(&df, path, Some(ParquetCompression::Snappy)) {
43 Ok(_) => {
44 println!("DataFrame written to {}", path);
45
46 // Read the DataFrame from the Parquet file
47 match read_parquet(path) {
48 Ok(df_read) => {
49 println!("\nDataFrame read from Parquet file:");
50 println!("{:?}", df_read);
51
52 // Verify the results
53 assert_eq!(df.row_count(), df_read.row_count());
54 assert_eq!(df.column_count(), df_read.column_count());
55
56 println!("\nVerification successful: Data matches");
57 },
58 Err(e) => println!("Error reading Parquet file: {}", e),
59 }
60 },
61 Err(e) => println!("Error writing Parquet file: {}", e),
62 }
63 */
64
65 Ok(())
66}
6fn main() -> Result<()> {
7 println!("=== Benchmark with One Million Rows ===\n");
8
9 // Benchmark function
10 fn bench<F>(name: &str, f: F) -> Duration
11 where
12 F: FnOnce() -> (),
13 {
14 println!("Running: {}", name);
15 let start = Instant::now();
16 f();
17 let duration = start.elapsed();
18 println!(" Completed: {:?}\n", duration);
19 duration
20 }
21
22 // Benchmark for creating a DataFrame with one million rows
23 println!("--- DataFrame with One Million Rows ---");
24
25 bench("Creating Series x3 (One Million Rows)", || {
26 let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27 let _ = Series::new(
28 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29 Some("B".to_string()),
30 )
31 .unwrap();
32 let _ = Series::new(
33 (0..1_000_000)
34 .map(|i| format!("val_{}", i))
35 .collect::<Vec<_>>(),
36 Some("C".to_string()),
37 )
38 .unwrap();
39 });
40
41 let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42 let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43 let col_b = Series::new(
44 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45 Some("B".to_string()),
46 )
47 .unwrap();
48 let col_c = Series::new(
49 (0..1_000_000)
50 .map(|i| format!("val_{}", i))
51 .collect::<Vec<_>>(),
52 Some("C".to_string()),
53 )
54 .unwrap();
55
56 let mut df = DataFrame::new();
57 df.add_column("A".to_string(), col_a).unwrap();
58 df.add_column("B".to_string(), col_b).unwrap();
59 df.add_column("C".to_string(), col_c).unwrap();
60 });
61
62 bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63 let mut data = HashMap::new();
64 data.insert(
65 "A".to_string(),
66 (0..1_000_000).map(|n| n.to_string()).collect(),
67 );
68 data.insert(
69 "B".to_string(),
70 (0..1_000_000)
71 .map(|n| format!("{:.1}", n as f64 * 0.5))
72 .collect(),
73 );
74 data.insert(
75 "C".to_string(),
76 (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77 );
78
79 let _ = DataFrame::from_map(data, None).unwrap();
80 });
81
82 println!(
83 "Time to create DataFrame with one million rows in pure Rust: {:?}",
84 large_duration
85 );
86
87 Ok(())
88}
Sourcepub fn column_names(&self) -> Vec<String>
pub fn column_names(&self) -> Vec<String>
Get column names in the DataFrame
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
More examples
5fn main() -> Result<()> {
6 println!("=== Pivot Table and Grouping Example ===");
7
8 // Create sample data
9 let mut df = DataFrame::new();
10
11 // Create column data
12 let category = Series::new(
13 vec![
14 "A".to_string(),
15 "B".to_string(),
16 "A".to_string(),
17 "C".to_string(),
18 "B".to_string(),
19 "A".to_string(),
20 "C".to_string(),
21 "B".to_string(),
22 ],
23 Some("category".to_string()),
24 )?;
25
26 let region = Series::new(
27 vec![
28 "East".to_string(),
29 "West".to_string(),
30 "West".to_string(),
31 "East".to_string(),
32 "East".to_string(),
33 "West".to_string(),
34 "West".to_string(),
35 "East".to_string(),
36 ],
37 Some("region".to_string()),
38 )?;
39
40 let sales = Series::new(
41 vec![100, 150, 200, 120, 180, 90, 250, 160],
42 Some("sales".to_string()),
43 )?;
44
45 // Add columns to DataFrame
46 df.add_column("category".to_string(), category)?;
47 df.add_column("region".to_string(), region)?;
48 df.add_column("sales".to_string(), sales)?;
49
50 println!("DataFrame Info:");
51 println!(" Number of columns: {}", df.column_count());
52 println!(" Number of rows: {}", df.row_count());
53 println!(" Column names: {:?}", df.column_names());
54
55 // Grouping and aggregation
56 println!("\n=== Grouping by Category ===");
57 let category_group = df.groupby("category")?;
58
59 println!("Sum by category (in progress):");
60 let _category_sum = category_group.sum(&["sales"])?;
61
62 // Pivot table (in progress)
63 println!("\n=== Pivot Table ===");
64 println!("Sum of sales by category and region (in progress):");
65 let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67 // Note: Pivot table and grouping features are still under development,
68 // so actual results are not displayed
69
70 println!("\n=== Aggregation Function Examples ===");
71 let functions = [
72 AggFunction::Sum,
73 AggFunction::Mean,
74 AggFunction::Min,
75 AggFunction::Max,
76 AggFunction::Count,
77 ];
78
79 for func in &functions {
80 println!(
81 "Aggregation Function: {} ({})",
82 func.name(),
83 match func {
84 AggFunction::Sum => "Sum",
85 AggFunction::Mean => "Mean",
86 AggFunction::Min => "Min",
87 AggFunction::Max => "Max",
88 AggFunction::Count => "Count",
89 }
90 );
91 }
92
93 println!("\n=== Pivot Table Example (Complete) ===");
94 Ok(())
95}
6fn main() -> Result<()> {
7 // Path to a large CSV file (replace with actual path)
8 let file_path = "examples/data/large_dataset.csv";
9
10 println!("Working with large datasets example");
11 println!("----------------------------------");
12
13 // Create a disk-based DataFrame with custom configuration
14 let config = DiskConfig {
15 memory_limit: 500 * 1024 * 1024, // 500MB memory limit
16 chunk_size: 50_000, // Process in chunks of 50,000 rows
17 use_memory_mapping: true, // Use memory mapping for efficiency
18 temp_dir: None, // Use system temp directory
19 };
20
21 let disk_df = DiskBasedDataFrame::new(file_path, Some(config))?;
22
23 // Get schema information
24 println!("DataFrame Schema:");
25 for column in disk_df.schema().column_names() {
26 println!(" - {}", column);
27 }
28
29 // Process in chunks for counting rows
30 let mut chunked_df = disk_df.chunked()?;
31 let mut total_rows = 0;
32
33 println!("\nProcessing in chunks:");
34 while let Some(chunk) = chunked_df.next_chunk()? {
35 let chunk_rows = chunk.row_count();
36 total_rows += chunk_rows;
37 println!(" - Processed chunk with {} rows", chunk_rows);
38 }
39
40 println!("\nTotal rows in dataset: {}", total_rows);
41
42 // Example of filtering data
43 println!("\nFiltering data:");
44 let filtered = disk_df.filter(|value, _| {
45 // Example filter: keep only values starting with 'A'
46 value.starts_with('A')
47 })?;
48
49 println!("Filtered result has {} rows", filtered.len());
50
51 // Example of selecting columns
52 println!("\nSelecting columns:");
53 let columns_to_select = vec!["column1", "column2"]; // Replace with actual column names
54 let selected = disk_df.select(&columns_to_select)?;
55
56 println!("Selected result has {} rows and columns:", selected.len());
57 // Since the result is a Vec<HashMap<String, String>>, we need to check the keys of the first element
58 if !selected.is_empty() {
59 for column in selected[0].keys() {
60 println!(" - {}", column);
61 }
62 }
63
64 // Example of grouping and aggregation
65 println!("\nGrouping and aggregation:");
66 let grouped = disk_df.group_by("category_column", "value_column", |values| {
67 // Example aggregation: calculate average
68 let sum: f64 = values.iter().filter_map(|v| v.parse::<f64>().ok()).sum();
69 let count = values.len();
70
71 if count > 0 {
72 Ok(format!("{:.2}", sum / count as f64))
73 } else {
74 Ok("0.0".to_string())
75 }
76 })?;
77
78 println!("Grouped result has {} groups", grouped.len());
79
80 // Example of parallel processing
81 println!("\nParallel processing example:");
82 let chunk_results = chunked_df.parallel_process(
83 // Process each chunk
84 |chunk| {
85 let mut counts = HashMap::new();
86
87 // Example: count occurrences of values in a column
88 for row_idx in 0..chunk.row_count() {
89 if let Ok(value) = chunk.get_string_value("category_column", row_idx) {
90 *counts.entry(value.to_string()).or_insert(0) += 1;
91 }
92 }
93
94 Ok(counts)
95 },
96 // Combine results
97 |chunk_maps| {
98 let mut result_map = HashMap::new();
99
100 // Merge all maps
101 for chunk_map in chunk_maps {
102 for (key, count) in chunk_map {
103 *result_map.entry(key).or_insert(0) += count;
104 }
105 }
106
107 Ok(result_map)
108 },
109 )?;
110
111 println!("Category counts from parallel processing:");
112 for (category, count) in chunk_results.iter().take(5) {
113 println!(" - {}: {}", category, count);
114 }
115
116 Ok(())
117}
Sourcepub fn get_column<T: 'static + Debug + Clone + Send + Sync>(
&self,
column_name: &str,
) -> Result<&Series<T>>
pub fn get_column<T: 'static + Debug + Clone + Send + Sync>( &self, column_name: &str, ) -> Result<&Series<T>>
Get a column from the DataFrame with generic type
Examples found in repository?
22fn descriptive_stats_example() -> Result<()> {
23 println!("1. Descriptive Statistics Sample");
24 println!("-----------------");
25
26 // Create dataset
27 let mut df = DataFrame::new();
28 let values = Series::new(
29 vec![10.5, 12.3, 15.2, 9.8, 11.5, 13.7, 14.3, 12.9, 8.5, 10.2],
30 Some("Values".to_string()),
31 )?;
32
33 df.add_column("Values".to_string(), values)?;
34
35 // Descriptive statistics
36 let stats = pandrs::stats::describe(
37 df.get_column("Values")
38 .unwrap()
39 .values()
40 .iter()
41 .map(|v: &String| v.parse::<f64>().unwrap_or(0.0))
42 .collect::<Vec<f64>>(),
43 )?;
44
45 // Display results
46 println!("Count: {}", stats.count);
47 println!("Mean: {:.2}", stats.mean);
48 println!("Standard Deviation: {:.2}", stats.std);
49 println!("Min: {:.2}", stats.min);
50 println!("First Quartile: {:.2}", stats.q1);
51 println!("Median: {:.2}", stats.median);
52 println!("Third Quartile: {:.2}", stats.q3);
53 println!("Max: {:.2}", stats.max);
54
55 // Covariance and correlation coefficient
56 let data1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
57 let data2 = vec![1.5, 3.1, 4.2, 5.8, 7.1];
58
59 let cov = pandrs::stats::covariance(&data1, &data2)?;
60 let corr = pandrs::stats::correlation(&data1, &data2)?;
61
62 println!("\nCovariance and Correlation Coefficient:");
63 println!("Covariance: {:.4}", cov);
64 println!("Correlation Coefficient: {:.4}", corr);
65
66 println!();
67 Ok(())
68}
More examples
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
Sourcepub fn get_column_string_values(&self, column_name: &str) -> Result<Vec<String>>
pub fn get_column_string_values(&self, column_name: &str) -> Result<Vec<String>>
Get string values from a column (stub implementation for tests)
Examples found in repository?
38fn run_benchmark_suite() {
39 // Header
40 println!("\n=== PandRS Performance Optimization Benchmark ===\n");
41
42 // Benchmark data sizes
43 let sizes = [1000, 10_000, 100_000, 1_000_000];
44
45 for &size in &sizes {
46 println!("\n## Data Size: {} rows ##", size);
47
48 // Data preparation
49 let int_data: Vec<i32> = (0..size).collect();
50 let float_data: Vec<f64> = (0..size).map(|i| i as f64 * 0.5).collect();
51 let string_data: Vec<String> = (0..size).map(|i| format!("val_{}", i % 100)).collect();
52
53 // Legacy implementation: Series creation
54 let (legacy_series_time, (legacy_int_series, legacy_float_series, legacy_string_series)) =
55 bench("Legacy Implementation - Series Creation", || {
56 let int_series =
57 Series::new(int_data.clone(), Some("int_col".to_string())).unwrap();
58 let float_series =
59 Series::new(float_data.clone(), Some("float_col".to_string())).unwrap();
60 let string_series =
61 Series::new(string_data.clone(), Some("string_col".to_string())).unwrap();
62 (int_series, float_series, string_series)
63 });
64
65 // Optimized implementation: Column creation
66 let (optimized_series_time, (opt_int_col, opt_float_col, opt_string_col)) =
67 bench("Optimized Implementation - Column Creation", || {
68 let int_col =
69 prototype::Int64Column::new(int_data.iter().map(|&i| i as i64).collect())
70 .with_name("int_col");
71 let float_col =
72 prototype::Float64Column::new(float_data.clone()).with_name("float_col");
73 let string_col =
74 prototype::StringColumn::new(string_data.clone()).with_name("string_col");
75 (int_col, float_col, string_col)
76 });
77
78 // Legacy implementation: DataFrame creation
79 let (legacy_df_time, legacy_df) =
80 bench("Legacy Implementation - DataFrame Creation", || {
81 let mut df = DataFrame::new();
82 df.add_column("int_col".to_string(), legacy_int_series.clone())
83 .unwrap();
84 df.add_column("float_col".to_string(), legacy_float_series.clone())
85 .unwrap();
86 df.add_column("string_col".to_string(), legacy_string_series.clone())
87 .unwrap();
88 df
89 });
90
91 // Optimized implementation: OptimizedDataFrame creation
92 let (optimized_df_time, optimized_df) =
93 bench("Optimized Implementation - DataFrame Creation", || {
94 let mut df = prototype::OptimizedDataFrame::new();
95 df.add_column("int_col", opt_int_col.clone()).unwrap();
96 df.add_column("float_col", opt_float_col.clone()).unwrap();
97 df.add_column("string_col", opt_string_col.clone()).unwrap();
98 df
99 });
100
101 // Legacy implementation: DataFrame aggregation operations
102 let (legacy_agg_time, _) = bench("Legacy Implementation - Aggregation Operations", || {
103 // Legacy implementation has low efficiency due to numerical operations via DataBox
104 // Legacy implementation requires string conversion for numerical operations
105 let int_values = legacy_df.get_column_string_values("int_col").unwrap();
106 let float_values = legacy_df.get_column_string_values("float_col").unwrap();
107
108 // Conversion from string to numeric
109 let int_numeric: Vec<i32> = int_values
110 .iter()
111 .filter_map(|s| s.parse::<i32>().ok())
112 .collect();
113
114 let float_numeric: Vec<f64> = float_values
115 .iter()
116 .filter_map(|s| s.parse::<f64>().ok())
117 .collect();
118
119 // Aggregation calculations
120 let int_sum: i32 = int_numeric.iter().sum();
121 let int_mean = int_sum as f64 / int_numeric.len() as f64;
122
123 let float_sum: f64 = float_numeric.iter().sum();
124 let float_mean = float_sum / float_numeric.len() as f64;
125
126 (int_sum, int_mean, float_sum, float_mean)
127 });
128
129 // Optimized implementation: DataFrame aggregation operations
130 let (optimized_agg_time, _) =
131 bench("Optimized Implementation - Aggregation Operations", || {
132 // Optimized implementation has type-safe access and direct numerical operations
133 let int_col = optimized_df.get_int64_column("int_col").unwrap();
134 let float_col = optimized_df.get_float64_column("float_col").unwrap();
135
136 // Direct aggregation calculations
137 let int_sum = int_col.sum();
138 let int_mean = int_col.mean().unwrap();
139
140 let float_sum = float_col.sum();
141 let float_mean = float_col.mean().unwrap();
142
143 (int_sum, int_mean, float_sum, float_mean)
144 });
145
146 // Result summary
147 println!("\nResult Summary ({} rows):", size);
148 println!(
149 " Series Creation: {:.2}x speedup ({} → {})",
150 legacy_series_time.as_secs_f64() / optimized_series_time.as_secs_f64(),
151 format_duration(legacy_series_time),
152 format_duration(optimized_series_time)
153 );
154
155 println!(
156 " DataFrame Creation: {:.2}x speedup ({} → {})",
157 legacy_df_time.as_secs_f64() / optimized_df_time.as_secs_f64(),
158 format_duration(legacy_df_time),
159 format_duration(optimized_df_time)
160 );
161
162 println!(
163 " Aggregation Operations: {:.2}x speedup ({} → {})",
164 legacy_agg_time.as_secs_f64() / optimized_agg_time.as_secs_f64(),
165 format_duration(legacy_agg_time),
166 format_duration(optimized_agg_time)
167 );
168 }
169}
Sourcepub fn column_name(&self, idx: usize) -> Option<&String>
pub fn column_name(&self, idx: usize) -> Option<&String>
Get a column by index (compatibility method)
Sourcepub fn concat_rows(&self, _other: &DataFrame) -> Result<DataFrame>
pub fn concat_rows(&self, _other: &DataFrame) -> Result<DataFrame>
Concat rows from another DataFrame
Sourcepub fn to_csv<P: AsRef<Path>>(&self, _path: P) -> Result<()>
pub fn to_csv<P: AsRef<Path>>(&self, _path: P) -> Result<()>
Convert DataFrame to CSV
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
More examples
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
Sourcepub fn from_csv<P: AsRef<Path>>(_path: P, _has_header: bool) -> Result<Self>
pub fn from_csv<P: AsRef<Path>>(_path: P, _has_header: bool) -> Result<Self>
Create DataFrame from CSV
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
More examples
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
Sourcepub fn from_csv_reader<R: Read>(
_reader: &mut Reader<R>,
_has_header: bool,
) -> Result<Self>
pub fn from_csv_reader<R: Read>( _reader: &mut Reader<R>, _has_header: bool, ) -> Result<Self>
Create DataFrame from CSV reader
Sourcepub fn column_count(&self) -> usize
pub fn column_count(&self) -> usize
Get the number of columns in the DataFrame
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== PandRS Basic Usage Example ===");
6
7 // Creating Series
8 let ages = Series::new(vec![30, 25, 40], Some("age".to_string()))?;
9 let heights = Series::new(vec![180, 175, 182], Some("height".to_string()))?;
10 let names = Series::new(
11 vec![
12 "Alice".to_string(),
13 "Bob".to_string(),
14 "Charlie".to_string(),
15 ],
16 Some("name".to_string()),
17 )?;
18
19 println!("Age Series: {:?}", ages);
20 println!("Height Series: {:?}", heights);
21 println!("Name Series: {:?}", names);
22
23 // Statistics for numeric series
24 println!("\n=== Statistics for Age Series ===");
25 println!("Sum: {}", ages.sum());
26 println!("Mean: {}", ages.mean()?);
27 println!("Min: {}", ages.min()?);
28 println!("Max: {}", ages.max()?);
29
30 // Creating a DataFrame
31 println!("\n=== Creating a DataFrame ===");
32 let mut df = DataFrame::new();
33 df.add_column("name".to_string(), names)?;
34 df.add_column("age".to_string(), ages)?;
35 df.add_column("height".to_string(), heights)?;
36
37 println!("DataFrame: {:?}", df);
38 println!("Number of Columns: {}", df.column_count());
39 println!("Number of Rows: {}", df.row_count());
40 println!("Column Names: {:?}", df.column_names());
41
42 // Testing saving to and loading from CSV
43 let file_path = "example_data.csv";
44 df.to_csv(file_path)?;
45 println!("\nSaved to CSV file: {}", file_path);
46
47 // Testing loading from CSV (may not be fully implemented yet)
48 match DataFrame::from_csv(file_path, true) {
49 Ok(loaded_df) => {
50 println!("DataFrame loaded from CSV: {:?}", loaded_df);
51 println!("Number of Columns: {}", loaded_df.column_count());
52 println!("Number of Rows: {}", loaded_df.row_count());
53 println!("Column Names: {:?}", loaded_df.column_names());
54 }
55 Err(e) => {
56 println!("Failed to load CSV: {:?}", e);
57 }
58 }
59
60 println!("\n=== Sample Complete ===");
61 Ok(())
62}
More examples
5fn main() -> Result<()> {
6 println!("=== Pivot Table and Grouping Example ===");
7
8 // Create sample data
9 let mut df = DataFrame::new();
10
11 // Create column data
12 let category = Series::new(
13 vec![
14 "A".to_string(),
15 "B".to_string(),
16 "A".to_string(),
17 "C".to_string(),
18 "B".to_string(),
19 "A".to_string(),
20 "C".to_string(),
21 "B".to_string(),
22 ],
23 Some("category".to_string()),
24 )?;
25
26 let region = Series::new(
27 vec![
28 "East".to_string(),
29 "West".to_string(),
30 "West".to_string(),
31 "East".to_string(),
32 "East".to_string(),
33 "West".to_string(),
34 "West".to_string(),
35 "East".to_string(),
36 ],
37 Some("region".to_string()),
38 )?;
39
40 let sales = Series::new(
41 vec![100, 150, 200, 120, 180, 90, 250, 160],
42 Some("sales".to_string()),
43 )?;
44
45 // Add columns to DataFrame
46 df.add_column("category".to_string(), category)?;
47 df.add_column("region".to_string(), region)?;
48 df.add_column("sales".to_string(), sales)?;
49
50 println!("DataFrame Info:");
51 println!(" Number of columns: {}", df.column_count());
52 println!(" Number of rows: {}", df.row_count());
53 println!(" Column names: {:?}", df.column_names());
54
55 // Grouping and aggregation
56 println!("\n=== Grouping by Category ===");
57 let category_group = df.groupby("category")?;
58
59 println!("Sum by category (in progress):");
60 let _category_sum = category_group.sum(&["sales"])?;
61
62 // Pivot table (in progress)
63 println!("\n=== Pivot Table ===");
64 println!("Sum of sales by category and region (in progress):");
65 let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67 // Note: Pivot table and grouping features are still under development,
68 // so actual results are not displayed
69
70 println!("\n=== Aggregation Function Examples ===");
71 let functions = [
72 AggFunction::Sum,
73 AggFunction::Mean,
74 AggFunction::Min,
75 AggFunction::Max,
76 AggFunction::Count,
77 ];
78
79 for func in &functions {
80 println!(
81 "Aggregation Function: {} ({})",
82 func.name(),
83 match func {
84 AggFunction::Sum => "Sum",
85 AggFunction::Mean => "Mean",
86 AggFunction::Min => "Min",
87 AggFunction::Max => "Max",
88 AggFunction::Count => "Count",
89 }
90 );
91 }
92
93 println!("\n=== Pivot Table Example (Complete) ===");
94 Ok(())
95}
4fn main() -> Result<()> {
5 println!("=== Example of Using MultiIndex ===\n");
6
7 // =========================================
8 // Creating a MultiIndex
9 // =========================================
10
11 println!("--- Creating MultiIndex from Tuples ---");
12
13 // Create MultiIndex from tuples (vector of vectors)
14 let tuples = vec![
15 vec!["A".to_string(), "a".to_string()],
16 vec!["A".to_string(), "b".to_string()],
17 vec!["B".to_string(), "a".to_string()],
18 vec!["B".to_string(), "b".to_string()],
19 ];
20
21 let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22 let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24 println!("MultiIndex: {:?}\n", multi_idx);
25 println!("Number of Levels: {}", multi_idx.n_levels());
26 println!("Number of Rows: {}\n", multi_idx.len());
27
28 // =========================================
29 // Operations on MultiIndex
30 // =========================================
31
32 println!("--- Retrieving Level Values ---");
33 let level0_values = multi_idx.get_level_values(0)?;
34 println!("Values in Level 0: {:?}", level0_values);
35
36 let level1_values = multi_idx.get_level_values(1)?;
37 println!("Values in Level 1: {:?}", level1_values);
38
39 println!("--- Swapping Levels ---");
40 let swapped = multi_idx.swaplevel(0, 1)?;
41 println!("After Swapping Levels: {:?}\n", swapped);
42
43 // =========================================
44 // DataFrame with MultiIndex
45 // =========================================
46
47 println!("--- DataFrame with MultiIndex ---");
48
49 // Create DataFrame
50 let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52 // Add data
53 let data = vec![
54 "data1".to_string(),
55 "data2".to_string(),
56 "data3".to_string(),
57 "data4".to_string(),
58 ];
59 df.add_column(
60 "data".to_string(),
61 pandrs::Series::new(data, Some("data".to_string()))?,
62 )?;
63
64 println!("DataFrame: {:?}\n", df);
65 println!("Number of Rows: {}", df.row_count());
66 println!("Number of Columns: {}", df.column_count());
67
68 // =========================================
69 // Conversion Between Simple Index and MultiIndex
70 // =========================================
71
72 println!("\n--- Example of Index Conversion ---");
73
74 // Create DataFrame from simple index
75 let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76 let mut simple_df = DataFrame::with_index(simple_idx);
77
78 // Add data
79 let values = vec![100, 200, 300];
80 let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81 simple_df.add_column(
82 "values".to_string(),
83 pandrs::Series::new(str_values, Some("values".to_string()))?,
84 )?;
85
86 println!("Simple Index DataFrame: {:?}", simple_df);
87
88 // Prepare for conversion to MultiIndex
89 let tuples = vec![
90 vec!["Category".to_string(), "X".to_string()],
91 vec!["Category".to_string(), "Y".to_string()],
92 vec!["Category".to_string(), "Z".to_string()],
93 ];
94
95 // Create and set MultiIndex
96 let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97 simple_df.set_multi_index(new_multi_idx)?;
98
99 println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101 println!("\n=== Sample Complete ===");
102 Ok(())
103}
4fn main() -> Result<(), Box<dyn Error>> {
5 println!("=== Example of Parallel Processing Features ===\n");
6
7 // Create sample data
8 let numbers = Series::new(
9 vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10 Some("numbers".to_string()),
11 )?;
12
13 // Parallel map: square each number
14 println!("Example of parallel map processing:");
15 let squared = numbers.par_map(|x| x * x);
16 println!("Original values: {:?}", numbers.values());
17 println!("Squared values: {:?}", squared.values());
18
19 // Parallel filter: keep only even numbers
20 println!("\nExample of parallel filtering:");
21 let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22 println!("Even Numbers: {:?}", even_numbers.values());
23
24 // Processing data containing NA
25 let na_data = vec![
26 NA::Value(10),
27 NA::Value(20),
28 NA::NA,
29 NA::Value(40),
30 NA::NA,
31 NA::Value(60),
32 ];
33 let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35 println!("\nParallel processing of data containing NA:");
36 let na_tripled = na_series.par_map(|x| x * 3);
37 println!("Original values: {:?}", na_series.values());
38 println!("Tripled values: {:?}", na_tripled.values());
39
40 // Parallel processing of DataFrame
41 println!("\nParallel processing of DataFrame:");
42
43 // Creating a sample DataFrame
44 let mut df = DataFrame::new();
45 let names = Series::new(
46 vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47 Some("name".to_string()),
48 )?;
49 let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50 let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52 df.add_column("name".to_string(), names)?;
53 df.add_column("age".to_string(), ages)?;
54 df.add_column("score".to_string(), scores)?;
55
56 // Parallel transformation of DataFrame
57 println!("Example of DataFrame.par_apply:");
58 let transformed_df = df.par_apply(|col, _row, val| {
59 match col {
60 "age" => {
61 // Add 1 to age
62 let age: i32 = val.parse().unwrap_or(0);
63 (age + 1).to_string()
64 }
65 "score" => {
66 // Add 5 to score
67 let score: i32 = val.parse().unwrap_or(0);
68 (score + 5).to_string()
69 }
70 _ => val.to_string(),
71 }
72 })?;
73
74 println!(
75 "Original DF row count: {}, column count: {}",
76 df.row_count(),
77 df.column_count()
78 );
79 println!(
80 "Transformed DF row count: {}, column count: {}",
81 transformed_df.row_count(),
82 transformed_df.column_count()
83 );
84
85 // Filtering rows
86 println!("\nExample of DataFrame.par_filter_rows:");
87 let filtered_df = df.par_filter_rows(|row| {
88 // Keep only rows where score > 85
89 if let Ok(values) = df.get_column_numeric_values("score") {
90 if row < values.len() {
91 return values[row] > 85.0;
92 }
93 }
94 false
95 })?;
96
97 println!("Row count after filtering: {}", filtered_df.row_count());
98
99 // Example of using ParallelUtils
100 println!("\nExample of ParallelUtils features:");
101
102 let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103 let sorted = ParallelUtils::par_sort(unsorted.clone());
104 println!("Before sorting: {:?}", unsorted);
105 println!("After sorting: {:?}", sorted);
106
107 let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108 let sum = ParallelUtils::par_sum(&numbers_vec);
109 let mean = ParallelUtils::par_mean(&numbers_vec);
110 let min = ParallelUtils::par_min(&numbers_vec);
111 let max = ParallelUtils::par_max(&numbers_vec);
112
113 println!("Sum: {}", sum);
114 println!("Mean: {}", mean.unwrap());
115 println!("Min: {}", min.unwrap());
116 println!("Max: {}", max.unwrap());
117
118 println!("\n=== Example of Parallel Processing Features Complete ===");
119 Ok(())
120}
Sourcepub fn select_columns(&self, columns: &[&str]) -> Result<Self>
pub fn select_columns(&self, columns: &[&str]) -> Result<Self>
Create a new DataFrame with only the specified columns
Sourcepub fn from_map(
data: HashMap<String, Vec<String>>,
index: Option<Index<String>>,
) -> Result<Self>
pub fn from_map( data: HashMap<String, Vec<String>>, index: Option<Index<String>>, ) -> Result<Self>
Create a new DataFrame from a HashMap of column names to string vectors
Examples found in repository?
6fn main() -> Result<()> {
7 println!("=== Benchmark with One Million Rows ===\n");
8
9 // Benchmark function
10 fn bench<F>(name: &str, f: F) -> Duration
11 where
12 F: FnOnce() -> (),
13 {
14 println!("Running: {}", name);
15 let start = Instant::now();
16 f();
17 let duration = start.elapsed();
18 println!(" Completed: {:?}\n", duration);
19 duration
20 }
21
22 // Benchmark for creating a DataFrame with one million rows
23 println!("--- DataFrame with One Million Rows ---");
24
25 bench("Creating Series x3 (One Million Rows)", || {
26 let _ = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
27 let _ = Series::new(
28 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
29 Some("B".to_string()),
30 )
31 .unwrap();
32 let _ = Series::new(
33 (0..1_000_000)
34 .map(|i| format!("val_{}", i))
35 .collect::<Vec<_>>(),
36 Some("C".to_string()),
37 )
38 .unwrap();
39 });
40
41 let large_duration = bench("Creating DataFrame (3 Columns x One Million Rows)", || {
42 let col_a = Series::new((0..1_000_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
43 let col_b = Series::new(
44 (0..1_000_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
45 Some("B".to_string()),
46 )
47 .unwrap();
48 let col_c = Series::new(
49 (0..1_000_000)
50 .map(|i| format!("val_{}", i))
51 .collect::<Vec<_>>(),
52 Some("C".to_string()),
53 )
54 .unwrap();
55
56 let mut df = DataFrame::new();
57 df.add_column("A".to_string(), col_a).unwrap();
58 df.add_column("B".to_string(), col_b).unwrap();
59 df.add_column("C".to_string(), col_c).unwrap();
60 });
61
62 bench("DataFrame from_map (3 Columns x One Million Rows)", || {
63 let mut data = HashMap::new();
64 data.insert(
65 "A".to_string(),
66 (0..1_000_000).map(|n| n.to_string()).collect(),
67 );
68 data.insert(
69 "B".to_string(),
70 (0..1_000_000)
71 .map(|n| format!("{:.1}", n as f64 * 0.5))
72 .collect(),
73 );
74 data.insert(
75 "C".to_string(),
76 (0..1_000_000).map(|i| format!("val_{}", i)).collect(),
77 );
78
79 let _ = DataFrame::from_map(data, None).unwrap();
80 });
81
82 println!(
83 "Time to create DataFrame with one million rows in pure Rust: {:?}",
84 large_duration
85 );
86
87 Ok(())
88}
More examples
5fn main() -> Result<(), PandRSError> {
6 println!("=== PandRS Performance Benchmark ===\n");
7
8 // Benchmark function
9 fn bench<F>(name: &str, f: F) -> Duration
10 where
11 F: FnOnce() -> (),
12 {
13 println!("Running: {}", name);
14 let start = Instant::now();
15 f();
16 let duration = start.elapsed();
17 println!(" Completed: {:?}\n", duration);
18 duration
19 }
20
21 // Benchmark for creating a small DataFrame
22 println!("--- Small DataFrame (10 rows) ---");
23
24 bench("Create Series x3", || {
25 let _ = Series::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], Some("A".to_string())).unwrap();
26 let _ = Series::new(
27 vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10],
28 Some("B".to_string()),
29 )
30 .unwrap();
31 let _ = Series::new(
32 vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
33 .into_iter()
34 .map(|s| s.to_string())
35 .collect::<Vec<_>>(),
36 Some("C".to_string()),
37 )
38 .unwrap();
39 });
40
41 bench("Create DataFrame (3 columns x 10 rows)", || {
42 let col_a =
43 Series::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], Some("A".to_string())).unwrap();
44 let col_b = Series::new(
45 vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10],
46 Some("B".to_string()),
47 )
48 .unwrap();
49 let col_c = Series::new(
50 vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
51 .into_iter()
52 .map(|s| s.to_string())
53 .collect::<Vec<_>>(),
54 Some("C".to_string()),
55 )
56 .unwrap();
57
58 let mut df = DataFrame::new();
59 df.add_column("A".to_string(), col_a).unwrap();
60 df.add_column("B".to_string(), col_b).unwrap();
61 df.add_column("C".to_string(), col_c).unwrap();
62 });
63
64 bench("DataFrame from_map (3 columns x 10 rows)", || {
65 let mut data = HashMap::new();
66 data.insert("A".to_string(), (0..10).map(|n| n.to_string()).collect());
67 data.insert(
68 "B".to_string(),
69 (0..10).map(|n| format!("{:.1}", n as f64 + 0.1)).collect(),
70 );
71 data.insert(
72 "C".to_string(),
73 vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
74 .into_iter()
75 .map(|s| s.to_string())
76 .collect(),
77 );
78
79 let _ = DataFrame::from_map(data, None).unwrap();
80 });
81
82 // Benchmark for creating a medium DataFrame
83 println!("\n--- Medium DataFrame (1,000 rows) ---");
84
85 bench("Create Series x3 (1000 rows)", || {
86 let _ = Series::new((0..1000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
87 let _ = Series::new(
88 (0..1000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
89 Some("B".to_string()),
90 )
91 .unwrap();
92 let _ = Series::new(
93 (0..1000).map(|i| format!("val_{}", i)).collect::<Vec<_>>(),
94 Some("C".to_string()),
95 )
96 .unwrap();
97 });
98
99 bench("Create DataFrame (3 columns x 1000 rows)", || {
100 let col_a = Series::new((0..1000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
101 let col_b = Series::new(
102 (0..1000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
103 Some("B".to_string()),
104 )
105 .unwrap();
106 let col_c = Series::new(
107 (0..1000).map(|i| format!("val_{}", i)).collect::<Vec<_>>(),
108 Some("C".to_string()),
109 )
110 .unwrap();
111
112 let mut df = DataFrame::new();
113 df.add_column("A".to_string(), col_a).unwrap();
114 df.add_column("B".to_string(), col_b).unwrap();
115 df.add_column("C".to_string(), col_c).unwrap();
116 });
117
118 bench("DataFrame from_map (3 columns x 1000 rows)", || {
119 let mut data = HashMap::new();
120 data.insert("A".to_string(), (0..1000).map(|n| n.to_string()).collect());
121 data.insert(
122 "B".to_string(),
123 (0..1000)
124 .map(|n| format!("{:.1}", n as f64 * 0.5))
125 .collect(),
126 );
127 data.insert(
128 "C".to_string(),
129 (0..1000).map(|i| format!("val_{}", i)).collect(),
130 );
131
132 let _ = DataFrame::from_map(data, None).unwrap();
133 });
134
135 // Benchmark for creating a large DataFrame
136 println!("\n--- Large DataFrame (100,000 rows) ---");
137
138 bench("Create Series x3 (100,000 rows)", || {
139 let _ = Series::new((0..100_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
140 let _ = Series::new(
141 (0..100_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
142 Some("B".to_string()),
143 )
144 .unwrap();
145 let _ = Series::new(
146 (0..100_000)
147 .map(|i| format!("val_{}", i))
148 .collect::<Vec<_>>(),
149 Some("C".to_string()),
150 )
151 .unwrap();
152 });
153
154 let large_duration = bench("Create DataFrame (3 columns x 100,000 rows)", || {
155 let col_a = Series::new((0..100_000).collect::<Vec<_>>(), Some("A".to_string())).unwrap();
156 let col_b = Series::new(
157 (0..100_000).map(|i| i as f64 * 0.5).collect::<Vec<_>>(),
158 Some("B".to_string()),
159 )
160 .unwrap();
161 let col_c = Series::new(
162 (0..100_000)
163 .map(|i| format!("val_{}", i))
164 .collect::<Vec<_>>(),
165 Some("C".to_string()),
166 )
167 .unwrap();
168
169 let mut df = DataFrame::new();
170 df.add_column("A".to_string(), col_a).unwrap();
171 df.add_column("B".to_string(), col_b).unwrap();
172 df.add_column("C".to_string(), col_c).unwrap();
173 });
174
175 bench("DataFrame from_map (3 columns x 100,000 rows)", || {
176 let mut data = HashMap::new();
177 data.insert(
178 "A".to_string(),
179 (0..100_000).map(|n| n.to_string()).collect(),
180 );
181 data.insert(
182 "B".to_string(),
183 (0..100_000)
184 .map(|n| format!("{:.1}", n as f64 * 0.5))
185 .collect(),
186 );
187 data.insert(
188 "C".to_string(),
189 (0..100_000).map(|i| format!("val_{}", i)).collect(),
190 );
191
192 let _ = DataFrame::from_map(data, None).unwrap();
193 });
194
195 println!(
196 "Pure Rust code DataFrame creation time for 100,000 rows: {:?}",
197 large_duration
198 );
199 println!("(Equivalent operation in Python: approximately 0.35 seconds)");
200
201 Ok(())
202}
Sourcepub fn has_column(&self, column_name: &str) -> bool
pub fn has_column(&self, column_name: &str) -> bool
Check if the DataFrame has the specified column (alias for contains_column)
Sourcepub fn get_index(&self) -> DataFrameIndex<String>
pub fn get_index(&self) -> DataFrameIndex<String>
Get the DataFrame’s index
Sourcepub fn set_index(&mut self, index: Index<String>) -> Result<()>
pub fn set_index(&mut self, index: Index<String>) -> Result<()>
Set the DataFrame’s index from an Index
Sourcepub fn set_multi_index(&mut self, multi_index: MultiIndex<String>) -> Result<()>
pub fn set_multi_index(&mut self, multi_index: MultiIndex<String>) -> Result<()>
Set a multi-index for the DataFrame
Examples found in repository?
4fn main() -> Result<()> {
5 println!("=== Example of Using MultiIndex ===\n");
6
7 // =========================================
8 // Creating a MultiIndex
9 // =========================================
10
11 println!("--- Creating MultiIndex from Tuples ---");
12
13 // Create MultiIndex from tuples (vector of vectors)
14 let tuples = vec![
15 vec!["A".to_string(), "a".to_string()],
16 vec!["A".to_string(), "b".to_string()],
17 vec!["B".to_string(), "a".to_string()],
18 vec!["B".to_string(), "b".to_string()],
19 ];
20
21 let names = Some(vec![Some("first".to_string()), Some("second".to_string())]);
22 let multi_idx = MultiIndex::from_tuples(tuples, names)?;
23
24 println!("MultiIndex: {:?}\n", multi_idx);
25 println!("Number of Levels: {}", multi_idx.n_levels());
26 println!("Number of Rows: {}\n", multi_idx.len());
27
28 // =========================================
29 // Operations on MultiIndex
30 // =========================================
31
32 println!("--- Retrieving Level Values ---");
33 let level0_values = multi_idx.get_level_values(0)?;
34 println!("Values in Level 0: {:?}", level0_values);
35
36 let level1_values = multi_idx.get_level_values(1)?;
37 println!("Values in Level 1: {:?}", level1_values);
38
39 println!("--- Swapping Levels ---");
40 let swapped = multi_idx.swaplevel(0, 1)?;
41 println!("After Swapping Levels: {:?}\n", swapped);
42
43 // =========================================
44 // DataFrame with MultiIndex
45 // =========================================
46
47 println!("--- DataFrame with MultiIndex ---");
48
49 // Create DataFrame
50 let mut df = DataFrame::with_multi_index(multi_idx.clone());
51
52 // Add data
53 let data = vec![
54 "data1".to_string(),
55 "data2".to_string(),
56 "data3".to_string(),
57 "data4".to_string(),
58 ];
59 df.add_column(
60 "data".to_string(),
61 pandrs::Series::new(data, Some("data".to_string()))?,
62 )?;
63
64 println!("DataFrame: {:?}\n", df);
65 println!("Number of Rows: {}", df.row_count());
66 println!("Number of Columns: {}", df.column_count());
67
68 // =========================================
69 // Conversion Between Simple Index and MultiIndex
70 // =========================================
71
72 println!("\n--- Example of Index Conversion ---");
73
74 // Create DataFrame from simple index
75 let simple_idx = Index::new(vec!["X".to_string(), "Y".to_string(), "Z".to_string()])?;
76 let mut simple_df = DataFrame::with_index(simple_idx);
77
78 // Add data
79 let values = vec![100, 200, 300];
80 let str_values: Vec<String> = values.iter().map(|v| v.to_string()).collect();
81 simple_df.add_column(
82 "values".to_string(),
83 pandrs::Series::new(str_values, Some("values".to_string()))?,
84 )?;
85
86 println!("Simple Index DataFrame: {:?}", simple_df);
87
88 // Prepare for conversion to MultiIndex
89 let tuples = vec![
90 vec!["Category".to_string(), "X".to_string()],
91 vec!["Category".to_string(), "Y".to_string()],
92 vec!["Category".to_string(), "Z".to_string()],
93 ];
94
95 // Create and set MultiIndex
96 let new_multi_idx = MultiIndex::from_tuples(tuples, None)?;
97 simple_df.set_multi_index(new_multi_idx)?;
98
99 println!("After Conversion to MultiIndex: {:?}", simple_df);
100
101 println!("\n=== Sample Complete ===");
102 Ok(())
103}
Sourcepub fn get_column_numeric_values(&self, column_name: &str) -> Result<Vec<f64>>
pub fn get_column_numeric_values(&self, column_name: &str) -> Result<Vec<f64>>
Get numeric values from a column
Examples found in repository?
4fn main() -> Result<(), Box<dyn Error>> {
5 println!("=== Example of Parallel Processing Features ===\n");
6
7 // Create sample data
8 let numbers = Series::new(
9 vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10 Some("numbers".to_string()),
11 )?;
12
13 // Parallel map: square each number
14 println!("Example of parallel map processing:");
15 let squared = numbers.par_map(|x| x * x);
16 println!("Original values: {:?}", numbers.values());
17 println!("Squared values: {:?}", squared.values());
18
19 // Parallel filter: keep only even numbers
20 println!("\nExample of parallel filtering:");
21 let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22 println!("Even Numbers: {:?}", even_numbers.values());
23
24 // Processing data containing NA
25 let na_data = vec![
26 NA::Value(10),
27 NA::Value(20),
28 NA::NA,
29 NA::Value(40),
30 NA::NA,
31 NA::Value(60),
32 ];
33 let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35 println!("\nParallel processing of data containing NA:");
36 let na_tripled = na_series.par_map(|x| x * 3);
37 println!("Original values: {:?}", na_series.values());
38 println!("Tripled values: {:?}", na_tripled.values());
39
40 // Parallel processing of DataFrame
41 println!("\nParallel processing of DataFrame:");
42
43 // Creating a sample DataFrame
44 let mut df = DataFrame::new();
45 let names = Series::new(
46 vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47 Some("name".to_string()),
48 )?;
49 let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50 let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52 df.add_column("name".to_string(), names)?;
53 df.add_column("age".to_string(), ages)?;
54 df.add_column("score".to_string(), scores)?;
55
56 // Parallel transformation of DataFrame
57 println!("Example of DataFrame.par_apply:");
58 let transformed_df = df.par_apply(|col, _row, val| {
59 match col {
60 "age" => {
61 // Add 1 to age
62 let age: i32 = val.parse().unwrap_or(0);
63 (age + 1).to_string()
64 }
65 "score" => {
66 // Add 5 to score
67 let score: i32 = val.parse().unwrap_or(0);
68 (score + 5).to_string()
69 }
70 _ => val.to_string(),
71 }
72 })?;
73
74 println!(
75 "Original DF row count: {}, column count: {}",
76 df.row_count(),
77 df.column_count()
78 );
79 println!(
80 "Transformed DF row count: {}, column count: {}",
81 transformed_df.row_count(),
82 transformed_df.column_count()
83 );
84
85 // Filtering rows
86 println!("\nExample of DataFrame.par_filter_rows:");
87 let filtered_df = df.par_filter_rows(|row| {
88 // Keep only rows where score > 85
89 if let Ok(values) = df.get_column_numeric_values("score") {
90 if row < values.len() {
91 return values[row] > 85.0;
92 }
93 }
94 false
95 })?;
96
97 println!("Row count after filtering: {}", filtered_df.row_count());
98
99 // Example of using ParallelUtils
100 println!("\nExample of ParallelUtils features:");
101
102 let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103 let sorted = ParallelUtils::par_sort(unsorted.clone());
104 println!("Before sorting: {:?}", unsorted);
105 println!("After sorting: {:?}", sorted);
106
107 let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108 let sum = ParallelUtils::par_sum(&numbers_vec);
109 let mean = ParallelUtils::par_mean(&numbers_vec);
110 let min = ParallelUtils::par_min(&numbers_vec);
111 let max = ParallelUtils::par_max(&numbers_vec);
112
113 println!("Sum: {}", sum);
114 println!("Mean: {}", mean.unwrap());
115 println!("Min: {}", min.unwrap());
116 println!("Max: {}", max.unwrap());
117
118 println!("\n=== Example of Parallel Processing Features Complete ===");
119 Ok(())
120}
Sourcepub fn add_row_data(&mut self, row_data: Vec<Box<dyn DValue>>) -> Result<()>
pub fn add_row_data(&mut self, row_data: Vec<Box<dyn DValue>>) -> Result<()>
Add a row to the DataFrame
Sourcepub fn filter<F>(&self, column_name: &str, predicate: F) -> Result<Self>
pub fn filter<F>(&self, column_name: &str, predicate: F) -> Result<Self>
Filter rows based on a predicate
Sourcepub fn gpu_accelerate(&self) -> Result<Self>
pub fn gpu_accelerate(&self) -> Result<Self>
Enable GPU acceleration for a DataFrame
Sourcepub fn corr_matrix(&self, _columns: &[&str]) -> Result<()>
pub fn corr_matrix(&self, _columns: &[&str]) -> Result<()>
Calculate a correlation matrix
Sourcepub fn add_row_data_from_hashmap(
&mut self,
row_data: HashMap<String, String>,
) -> Result<()>
pub fn add_row_data_from_hashmap( &mut self, row_data: HashMap<String, String>, ) -> Result<()>
Add a row to the DataFrame using a HashMap of column names to values
Sourcepub fn is_categorical(&self, column_name: &str) -> bool
pub fn is_categorical(&self, column_name: &str) -> bool
Check if a column is categorical
Examples found in repository?
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
More examples
8fn main() -> Result<()> {
9 println!("=== Example of Using Categorical Data Type ===\n");
10
11 // ===========================================================
12 // Creating Basic Categorical Data
13 // ===========================================================
14
15 println!("--- Creating Basic Categorical Data ---");
16 let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17 let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19 // Create categorical data (unique values are automatically extracted)
20 // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21 let cat = StringCategorical::new(
22 values_str, None, // Automatically detect categories
23 false, // Unordered
24 )?;
25
26 println!("Original Data: {:?}", values);
27 println!("Categories: {:?}", cat.categories());
28 println!("Order Type: {:?}", cat.ordered());
29 println!("Data Length: {}", cat.len());
30
31 // Retrieve actual values from categorical data
32 println!(
33 "\nFirst 3 values: {} {} {}",
34 cat.get(0).unwrap_or(&"None".to_string()),
35 cat.get(1).unwrap_or(&"None".to_string()),
36 cat.get(2).unwrap_or(&"None".to_string())
37 );
38 println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40 // ===========================================================
41 // Creating with Explicit Category List
42 // ===========================================================
43
44 println!("\n--- Creating with Explicit Category List ---");
45 let values2 = vec!["Red", "Blue", "Red"];
46 let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48 // Define all categories beforehand
49 let categories = vec!["Red", "Blue", "Green", "Yellow"];
50 let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52 // Create ordered categorical data
53 // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54 let cat2 = StringCategorical::new(
55 values2_str,
56 Some(categories_str), // Explicit category list
57 true, // Ordered
58 )?;
59
60 println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61 println!("Codes: {:?}", cat2.codes());
62
63 // ===========================================================
64 // Operations on Categorical Data
65 // ===========================================================
66
67 println!("\n--- Example of Categorical Operations ---");
68
69 // Base categorical data
70 // Changed: Using false instead of None for the ordered parameter
71 let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72 let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73 let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75 println!("Original Categories: {:?}", fruit_cat.categories());
76
77 // Add categories
78 let new_cats = vec!["Grape", "Strawberry"];
79 let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80 fruit_cat.add_categories(new_cats_str)?;
81
82 println!("Categories after addition: {:?}", fruit_cat.categories());
83
84 // Change category order
85 let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86 let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87 fruit_cat.reorder_categories(reordered_str)?;
88
89 println!("Categories after reordering: {:?}", fruit_cat.categories());
90 println!("Codes: {:?}", fruit_cat.codes());
91
92 // ===========================================================
93 // Integration with DataFrame
94 // ===========================================================
95
96 println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98 // Create a basic DataFrame
99 let mut df = DataFrame::new();
100
101 // Add regular columns
102 let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104 let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105 let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107 df.add_column(
108 "Region".to_string(),
109 Series::new(regions_str, Some("Region".to_string()))?,
110 )?;
111 df.add_column(
112 "Population".to_string(),
113 Series::new(pop_str, Some("Population".to_string()))?,
114 )?;
115
116 println!("Original DataFrame:\n{:?}", df);
117
118 // ===========================================================
119 // Creating Simplified Categorical DataFrame
120 // ===========================================================
121
122 // Create a DataFrame directly from categorical data
123 println!("\n--- Creating DataFrame with Categorical Data ---");
124
125 // Create categorical data
126 // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127 let populations = vec!["Low", "Medium", "High"];
128 let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129 let pop_cat = StringCategorical::new(
130 populations_str,
131 None, // Automatically detect
132 true, // Ordered
133 )?;
134
135 // Region data
136 let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139 // Create DataFrame from both categorical data
140 let categoricals = vec![("Population".to_string(), pop_cat)];
141
142 let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144 // Add region column
145 df_cat.add_column(
146 "Region".to_string(),
147 Series::new(regions_str, Some("Region".to_string()))?,
148 )?;
149
150 println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152 // Check if columns are categorical
153 println!(
154 "\nIs 'Population' column categorical: {}",
155 df_cat.is_categorical("Population")
156 );
157 println!(
158 "Is 'Region' column categorical: {}",
159 df_cat.is_categorical("Region")
160 );
161
162 // ===========================================================
163 // Example of Multi-Categorical DataFrame
164 // ===========================================================
165
166 println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168 // Create product and color data as separate categories
169 // Changed: Using false instead of None for the ordered parameter
170 let products = vec!["A", "B", "C"];
171 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172 let product_cat = StringCategorical::new(products_str, None, false)?;
173
174 let colors = vec!["Red", "Blue", "Green"];
175 let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176 let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178 // Create a DataFrame containing both categories
179 let multi_categoricals = vec![
180 ("Product".to_string(), product_cat),
181 ("Color".to_string(), color_cat),
182 ];
183
184 let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186 println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187 println!(
188 "\nIs 'Product' column categorical: {}",
189 multi_df.is_categorical("Product")
190 );
191 println!(
192 "Is 'Color' column categorical: {}",
193 multi_df.is_categorical("Color")
194 );
195
196 // ===========================================================
197 // Aggregation and Analysis of Categorical Data
198 // ===========================================================
199
200 println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202 // Start with a simple DataFrame
203 let mut df_simple = DataFrame::new();
204
205 // Add product data
206 let products = vec!["A", "B", "C", "A", "B"];
207 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208 let sales = vec!["100", "150", "200", "120", "180"];
209 let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211 df_simple.add_column(
212 "Product".to_string(),
213 Series::new(products_str.clone(), Some("Product".to_string()))?,
214 )?;
215 df_simple.add_column(
216 "Sales".to_string(),
217 Series::new(sales_str, Some("Sales".to_string()))?,
218 )?;
219
220 println!("Original DataFrame:\n{:?}", df_simple);
221
222 // Aggregate by product
223 let product_counts = df_simple.value_counts("Product")?;
224 println!("\nProduct Counts:\n{:?}", product_counts);
225
226 // Transformation and interaction between categorical and series
227 println!("\n--- Interaction between Categorical and Series ---");
228
229 // Create a simple categorical series
230 // Changed: Using false instead of None for the ordered parameter
231 let letter_cat = StringCategorical::new(
232 vec!["A".to_string(), "B".to_string(), "C".to_string()],
233 None,
234 false,
235 )?;
236
237 // Convert to series
238 let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239 println!("Converted from categorical to series: {:?}", letter_series);
240
241 // Additional information about categorical data
242 println!("\n--- Characteristics of Categorical Data ---");
243 println!(
244 "Categorical data is stored in memory only once, regardless of repeated string values."
245 );
246 println!(
247 "This makes it particularly efficient for datasets with many duplicate string values."
248 );
249 println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251 println!("\n=== Sample Complete ===");
252 Ok(())
253}
Sourcepub fn sample(&self, indices: &[usize]) -> Result<Self>
pub fn sample(&self, indices: &[usize]) -> Result<Self>
Get a categorical column with generic type
Sourcepub fn get_categorical<T: 'static + Debug + Clone + Eq + Hash + Send + Sync>(
&self,
column_name: &str,
) -> Result<Categorical<T>>
pub fn get_categorical<T: 'static + Debug + Clone + Eq + Hash + Send + Sync>( &self, column_name: &str, ) -> Result<Categorical<T>>
Get a categorical column with generic type
Examples found in repository?
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
Sourcepub fn is_numeric_column(&self, column_name: &str) -> bool
pub fn is_numeric_column(&self, column_name: &str) -> bool
Check if a column is numeric
Sourcepub fn add_na_series_as_categorical(
&mut self,
name: String,
series: NASeries<String>,
categories: Option<Vec<String>>,
ordered: Option<CategoricalOrder>,
) -> Result<&mut Self>
pub fn add_na_series_as_categorical( &mut self, name: String, series: NASeries<String>, categories: Option<Vec<String>>, ordered: Option<CategoricalOrder>, ) -> Result<&mut Self>
Add a NASeries as a categorical column
Sourcepub fn from_categoricals(
categoricals: Vec<(String, StringCategorical)>,
) -> Result<Self>
pub fn from_categoricals( categoricals: Vec<(String, StringCategorical)>, ) -> Result<Self>
Create a DataFrame from multiple categorical data
Examples found in repository?
6fn main() -> Result<()> {
7 println!("=== Example of Categorical Data with Missing Values ===\n");
8
9 // 1. Create categorical data
10 println!("1. Create categorical data");
11
12 // Create a vector with NA values
13 let values = vec![
14 NA::Value("Red".to_string()),
15 NA::Value("Blue".to_string()),
16 NA::NA, // Missing value
17 NA::Value("Green".to_string()),
18 NA::Value("Red".to_string()), // Duplicate value
19 ];
20
21 // Create categorical data type from vector
22 // Create as unordered category
23 let cat = StringCategorical::from_na_vec(
24 values.clone(),
25 None, // Auto-detect categories
26 Some(CategoricalOrder::Unordered), // Unordered
27 )?;
28
29 println!("Categories: {:?}", cat.categories());
30 println!("Number of categories: {}", cat.categories().len());
31 println!("Number of data: {}", cat.len());
32
33 // Display category codes
34 println!("Internal codes: {:?}", cat.codes());
35 println!();
36
37 // 2. Create ordered categorical data
38 println!("2. Create ordered categorical data");
39
40 // Explicitly ordered category list
41 let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
42
43 // Create a vector with NA values
44 let values = vec![
45 NA::Value("Medium".to_string()),
46 NA::Value("Low".to_string()),
47 NA::NA, // Missing value
48 NA::Value("High".to_string()),
49 NA::Value("Medium".to_string()), // Duplicate value
50 ];
51
52 // Create as ordered category
53 let ordered_cat = StringCategorical::from_na_vec(
54 values.clone(),
55 Some(ordered_categories), // Explicit category list
56 Some(CategoricalOrder::Ordered), // Ordered
57 )?;
58
59 println!("Ordered categories: {:?}", ordered_cat.categories());
60 println!("Number of categories: {}", ordered_cat.categories().len());
61 println!("Number of data: {}", ordered_cat.len());
62
63 // Display category codes
64 println!("Internal codes: {:?}", ordered_cat.codes());
65 println!();
66
67 // 3. Operations on categorical data
68 println!("3. Operations on categorical data");
69
70 // Create two categorical data
71 let values1 = vec![
72 NA::Value("A".to_string()),
73 NA::Value("B".to_string()),
74 NA::NA,
75 NA::Value("C".to_string()),
76 ];
77
78 let values2 = vec![
79 NA::Value("B".to_string()),
80 NA::Value("C".to_string()),
81 NA::Value("D".to_string()),
82 NA::NA,
83 ];
84
85 let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
86 let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
87
88 // Set operations
89 let union = cat1.union(&cat2)?; // Union
90 let intersection = cat1.intersection(&cat2)?; // Intersection
91 let difference = cat1.difference(&cat2)?; // Difference
92
93 println!("Categories of set 1: {:?}", cat1.categories());
94 println!("Categories of set 2: {:?}", cat2.categories());
95 println!("Union: {:?}", union.categories());
96 println!("Intersection: {:?}", intersection.categories());
97 println!("Difference (set 1 - set 2): {:?}", difference.categories());
98 println!();
99
100 // 4. Using categorical columns in DataFrame
101 println!("4. Using categorical columns in DataFrame");
102
103 // Create a vector with NA values (first create for categorical)
104 let values = vec![
105 NA::Value("High".to_string()),
106 NA::Value("Medium".to_string()),
107 NA::NA,
108 NA::Value("Low".to_string()),
109 ];
110
111 // Simplified for sample code
112 let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
113
114 // Create categorical data
115 let cat_eval = StringCategorical::from_na_vec(
116 values.clone(), // Clone it
117 Some(order_cats),
118 Some(CategoricalOrder::Ordered),
119 )?;
120
121 // Output the size of the created categorical data
122 println!("Size of created categorical data: {}", cat_eval.len());
123
124 // Add as categorical column
125 let categoricals = vec![("Evaluation".to_string(), cat_eval)];
126 let mut df = DataFrame::from_categoricals(categoricals)?;
127
128 // Check the number of rows in the data and match it
129 println!("Number of rows in DataFrame: {}", df.row_count());
130 println!("Note: NA values are excluded when creating DataFrame");
131
132 // Add numeric column (match the number of rows)
133 let scores = vec![95, 80, 0]; // Match the number of rows in DataFrame
134 println!("Size of scores: {}", scores.len());
135
136 df.add_column(
137 "Score".to_string(),
138 Series::new(scores, Some("Score".to_string()))?,
139 )?;
140
141 println!("DataFrame: ");
142 println!("{:#?}", df);
143
144 // Retrieve and verify categorical data
145 println!(
146 "Is 'Evaluation' column categorical: {}",
147 df.is_categorical("Evaluation")
148 );
149
150 // Explicitly handle errors
151 match df.get_categorical::<String>("Evaluation") {
152 Ok(cat_col) => println!(
153 "Categories of 'Evaluation' column: {:?}",
154 cat_col.categories()
155 ),
156 Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
157 }
158 println!();
159
160 // 5. Input and output with CSV file
161 println!("5. Input and output with CSV file");
162
163 // Save to temporary file
164 let temp_path = Path::new("/tmp/categorical_example.csv");
165 df.to_csv(temp_path)?;
166
167 println!("Saved to CSV file: {}", temp_path.display());
168
169 // Load from file
170 let df_loaded = DataFrame::from_csv(temp_path, true)?;
171
172 // After loading from CSV, categorical information is lost (loaded as regular string column)
173 println!("Data loaded from CSV:");
174 println!("{:#?}", df_loaded);
175
176 // Check data loaded from CSV
177
178 // Note that data loaded from CSV is in a special format
179 println!("Example of data format loaded from CSV:");
180 println!(
181 "First value of 'Evaluation' column: {:?}",
182 df_loaded
183 .get_column::<String>("Evaluation")
184 .unwrap()
185 .values()[0]
186 );
187
188 // To reconstruct categorical data from this CSV loaded data,
189 // more complex processing is required, so the following is a simple example
190
191 // Create new categorical data as an example
192 let new_values = vec![
193 NA::Value("High".to_string()),
194 NA::Value("Medium".to_string()),
195 NA::NA,
196 NA::Value("Low".to_string()),
197 ];
198
199 let new_cat =
200 StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
201
202 println!("Example of newly created categorical data:");
203 println!("Categories: {:?}", new_cat.categories());
204 println!("Order: {:?}", new_cat.ordered());
205
206 println!("\nTo actually convert data loaded from CSV to categorical data,");
207 println!("parsing processing according to the format and string escaping method of the CSV is required.");
208
209 println!("\n=== Sample End ===");
210 Ok(())
211}
More examples
8fn main() -> Result<()> {
9 println!("=== Example of Using Categorical Data Type ===\n");
10
11 // ===========================================================
12 // Creating Basic Categorical Data
13 // ===========================================================
14
15 println!("--- Creating Basic Categorical Data ---");
16 let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17 let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19 // Create categorical data (unique values are automatically extracted)
20 // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21 let cat = StringCategorical::new(
22 values_str, None, // Automatically detect categories
23 false, // Unordered
24 )?;
25
26 println!("Original Data: {:?}", values);
27 println!("Categories: {:?}", cat.categories());
28 println!("Order Type: {:?}", cat.ordered());
29 println!("Data Length: {}", cat.len());
30
31 // Retrieve actual values from categorical data
32 println!(
33 "\nFirst 3 values: {} {} {}",
34 cat.get(0).unwrap_or(&"None".to_string()),
35 cat.get(1).unwrap_or(&"None".to_string()),
36 cat.get(2).unwrap_or(&"None".to_string())
37 );
38 println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40 // ===========================================================
41 // Creating with Explicit Category List
42 // ===========================================================
43
44 println!("\n--- Creating with Explicit Category List ---");
45 let values2 = vec!["Red", "Blue", "Red"];
46 let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48 // Define all categories beforehand
49 let categories = vec!["Red", "Blue", "Green", "Yellow"];
50 let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52 // Create ordered categorical data
53 // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54 let cat2 = StringCategorical::new(
55 values2_str,
56 Some(categories_str), // Explicit category list
57 true, // Ordered
58 )?;
59
60 println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61 println!("Codes: {:?}", cat2.codes());
62
63 // ===========================================================
64 // Operations on Categorical Data
65 // ===========================================================
66
67 println!("\n--- Example of Categorical Operations ---");
68
69 // Base categorical data
70 // Changed: Using false instead of None for the ordered parameter
71 let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72 let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73 let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75 println!("Original Categories: {:?}", fruit_cat.categories());
76
77 // Add categories
78 let new_cats = vec!["Grape", "Strawberry"];
79 let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80 fruit_cat.add_categories(new_cats_str)?;
81
82 println!("Categories after addition: {:?}", fruit_cat.categories());
83
84 // Change category order
85 let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86 let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87 fruit_cat.reorder_categories(reordered_str)?;
88
89 println!("Categories after reordering: {:?}", fruit_cat.categories());
90 println!("Codes: {:?}", fruit_cat.codes());
91
92 // ===========================================================
93 // Integration with DataFrame
94 // ===========================================================
95
96 println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98 // Create a basic DataFrame
99 let mut df = DataFrame::new();
100
101 // Add regular columns
102 let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104 let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105 let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107 df.add_column(
108 "Region".to_string(),
109 Series::new(regions_str, Some("Region".to_string()))?,
110 )?;
111 df.add_column(
112 "Population".to_string(),
113 Series::new(pop_str, Some("Population".to_string()))?,
114 )?;
115
116 println!("Original DataFrame:\n{:?}", df);
117
118 // ===========================================================
119 // Creating Simplified Categorical DataFrame
120 // ===========================================================
121
122 // Create a DataFrame directly from categorical data
123 println!("\n--- Creating DataFrame with Categorical Data ---");
124
125 // Create categorical data
126 // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127 let populations = vec!["Low", "Medium", "High"];
128 let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129 let pop_cat = StringCategorical::new(
130 populations_str,
131 None, // Automatically detect
132 true, // Ordered
133 )?;
134
135 // Region data
136 let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139 // Create DataFrame from both categorical data
140 let categoricals = vec![("Population".to_string(), pop_cat)];
141
142 let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144 // Add region column
145 df_cat.add_column(
146 "Region".to_string(),
147 Series::new(regions_str, Some("Region".to_string()))?,
148 )?;
149
150 println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152 // Check if columns are categorical
153 println!(
154 "\nIs 'Population' column categorical: {}",
155 df_cat.is_categorical("Population")
156 );
157 println!(
158 "Is 'Region' column categorical: {}",
159 df_cat.is_categorical("Region")
160 );
161
162 // ===========================================================
163 // Example of Multi-Categorical DataFrame
164 // ===========================================================
165
166 println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168 // Create product and color data as separate categories
169 // Changed: Using false instead of None for the ordered parameter
170 let products = vec!["A", "B", "C"];
171 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172 let product_cat = StringCategorical::new(products_str, None, false)?;
173
174 let colors = vec!["Red", "Blue", "Green"];
175 let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176 let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178 // Create a DataFrame containing both categories
179 let multi_categoricals = vec![
180 ("Product".to_string(), product_cat),
181 ("Color".to_string(), color_cat),
182 ];
183
184 let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186 println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187 println!(
188 "\nIs 'Product' column categorical: {}",
189 multi_df.is_categorical("Product")
190 );
191 println!(
192 "Is 'Color' column categorical: {}",
193 multi_df.is_categorical("Color")
194 );
195
196 // ===========================================================
197 // Aggregation and Analysis of Categorical Data
198 // ===========================================================
199
200 println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202 // Start with a simple DataFrame
203 let mut df_simple = DataFrame::new();
204
205 // Add product data
206 let products = vec!["A", "B", "C", "A", "B"];
207 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208 let sales = vec!["100", "150", "200", "120", "180"];
209 let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211 df_simple.add_column(
212 "Product".to_string(),
213 Series::new(products_str.clone(), Some("Product".to_string()))?,
214 )?;
215 df_simple.add_column(
216 "Sales".to_string(),
217 Series::new(sales_str, Some("Sales".to_string()))?,
218 )?;
219
220 println!("Original DataFrame:\n{:?}", df_simple);
221
222 // Aggregate by product
223 let product_counts = df_simple.value_counts("Product")?;
224 println!("\nProduct Counts:\n{:?}", product_counts);
225
226 // Transformation and interaction between categorical and series
227 println!("\n--- Interaction between Categorical and Series ---");
228
229 // Create a simple categorical series
230 // Changed: Using false instead of None for the ordered parameter
231 let letter_cat = StringCategorical::new(
232 vec!["A".to_string(), "B".to_string(), "C".to_string()],
233 None,
234 false,
235 )?;
236
237 // Convert to series
238 let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239 println!("Converted from categorical to series: {:?}", letter_series);
240
241 // Additional information about categorical data
242 println!("\n--- Characteristics of Categorical Data ---");
243 println!(
244 "Categorical data is stored in memory only once, regardless of repeated string values."
245 );
246 println!(
247 "This makes it particularly efficient for datasets with many duplicate string values."
248 );
249 println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251 println!("\n=== Sample Complete ===");
252 Ok(())
253}
Sourcepub fn value_counts(&self, column_name: &str) -> Result<Series<usize>>
pub fn value_counts(&self, column_name: &str) -> Result<Series<usize>>
Calculate the occurrence count of a column
Examples found in repository?
8fn main() -> Result<()> {
9 println!("=== Example of Using Categorical Data Type ===\n");
10
11 // ===========================================================
12 // Creating Basic Categorical Data
13 // ===========================================================
14
15 println!("--- Creating Basic Categorical Data ---");
16 let values = vec!["Tokyo", "Osaka", "Tokyo", "Nagoya", "Osaka", "Tokyo"];
17 let values_str: Vec<String> = values.iter().map(|s| s.to_string()).collect();
18
19 // Create categorical data (unique values are automatically extracted)
20 // Changed: Now using boolean instead of Some(CategoricalOrder::Unordered)
21 let cat = StringCategorical::new(
22 values_str, None, // Automatically detect categories
23 false, // Unordered
24 )?;
25
26 println!("Original Data: {:?}", values);
27 println!("Categories: {:?}", cat.categories());
28 println!("Order Type: {:?}", cat.ordered());
29 println!("Data Length: {}", cat.len());
30
31 // Retrieve actual values from categorical data
32 println!(
33 "\nFirst 3 values: {} {} {}",
34 cat.get(0).unwrap_or(&"None".to_string()),
35 cat.get(1).unwrap_or(&"None".to_string()),
36 cat.get(2).unwrap_or(&"None".to_string())
37 );
38 println!("\nValues stored internally as codes: {:?}", cat.codes());
39
40 // ===========================================================
41 // Creating with Explicit Category List
42 // ===========================================================
43
44 println!("\n--- Creating with Explicit Category List ---");
45 let values2 = vec!["Red", "Blue", "Red"];
46 let values2_str: Vec<String> = values2.iter().map(|s| s.to_string()).collect();
47
48 // Define all categories beforehand
49 let categories = vec!["Red", "Blue", "Green", "Yellow"];
50 let categories_str: Vec<String> = categories.iter().map(|s| s.to_string()).collect();
51
52 // Create ordered categorical data
53 // Changed: Now using boolean instead of Some(CategoricalOrder::Ordered)
54 let cat2 = StringCategorical::new(
55 values2_str,
56 Some(categories_str), // Explicit category list
57 true, // Ordered
58 )?;
59
60 println!("Categories: {:?}", cat2.categories()); // Red, Blue, Green, Yellow
61 println!("Codes: {:?}", cat2.codes());
62
63 // ===========================================================
64 // Operations on Categorical Data
65 // ===========================================================
66
67 println!("\n--- Example of Categorical Operations ---");
68
69 // Base categorical data
70 // Changed: Using false instead of None for the ordered parameter
71 let fruits = vec!["Apple", "Banana", "Apple", "Orange"];
72 let fruits_str: Vec<String> = fruits.iter().map(|s| s.to_string()).collect();
73 let mut fruit_cat = StringCategorical::new(fruits_str, None, false)?;
74
75 println!("Original Categories: {:?}", fruit_cat.categories());
76
77 // Add categories
78 let new_cats = vec!["Grape", "Strawberry"];
79 let new_cats_str: Vec<String> = new_cats.iter().map(|s| s.to_string()).collect();
80 fruit_cat.add_categories(new_cats_str)?;
81
82 println!("Categories after addition: {:?}", fruit_cat.categories());
83
84 // Change category order
85 let reordered = vec!["Banana", "Strawberry", "Orange", "Apple", "Grape"];
86 let reordered_str: Vec<String> = reordered.iter().map(|s| s.to_string()).collect();
87 fruit_cat.reorder_categories(reordered_str)?;
88
89 println!("Categories after reordering: {:?}", fruit_cat.categories());
90 println!("Codes: {:?}", fruit_cat.codes());
91
92 // ===========================================================
93 // Integration with DataFrame
94 // ===========================================================
95
96 println!("\n--- Integration of Categorical Data with DataFrame ---");
97
98 // Create a basic DataFrame
99 let mut df = DataFrame::new();
100
101 // Add regular columns
102 let regions = vec!["Hokkaido", "Kanto", "Kansai", "Kyushu", "Kanto", "Kansai"];
103 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
104 let pop = vec!["Low", "High", "High", "Medium", "High", "High"];
105 let pop_str: Vec<String> = pop.iter().map(|s| s.to_string()).collect();
106
107 df.add_column(
108 "Region".to_string(),
109 Series::new(regions_str, Some("Region".to_string()))?,
110 )?;
111 df.add_column(
112 "Population".to_string(),
113 Series::new(pop_str, Some("Population".to_string()))?,
114 )?;
115
116 println!("Original DataFrame:\n{:?}", df);
117
118 // ===========================================================
119 // Creating Simplified Categorical DataFrame
120 // ===========================================================
121
122 // Create a DataFrame directly from categorical data
123 println!("\n--- Creating DataFrame with Categorical Data ---");
124
125 // Create categorical data
126 // Changed: Using boolean instead of Some(CategoricalOrder::Ordered)
127 let populations = vec!["Low", "Medium", "High"];
128 let populations_str: Vec<String> = populations.iter().map(|s| s.to_string()).collect();
129 let pop_cat = StringCategorical::new(
130 populations_str,
131 None, // Automatically detect
132 true, // Ordered
133 )?;
134
135 // Region data
136 let regions = vec!["Hokkaido", "Kanto", "Kansai"];
137 let regions_str: Vec<String> = regions.iter().map(|s| s.to_string()).collect();
138
139 // Create DataFrame from both categorical data
140 let categoricals = vec![("Population".to_string(), pop_cat)];
141
142 let mut df_cat = DataFrame::from_categoricals(categoricals)?;
143
144 // Add region column
145 df_cat.add_column(
146 "Region".to_string(),
147 Series::new(regions_str, Some("Region".to_string()))?,
148 )?;
149
150 println!("\nDataFrame after adding categorical data:\n{:?}", df_cat);
151
152 // Check if columns are categorical
153 println!(
154 "\nIs 'Population' column categorical: {}",
155 df_cat.is_categorical("Population")
156 );
157 println!(
158 "Is 'Region' column categorical: {}",
159 df_cat.is_categorical("Region")
160 );
161
162 // ===========================================================
163 // Example of Multi-Categorical DataFrame
164 // ===========================================================
165
166 println!("\n--- Example of Multi-Categorical DataFrame ---");
167
168 // Create product and color data as separate categories
169 // Changed: Using false instead of None for the ordered parameter
170 let products = vec!["A", "B", "C"];
171 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
172 let product_cat = StringCategorical::new(products_str, None, false)?;
173
174 let colors = vec!["Red", "Blue", "Green"];
175 let colors_str: Vec<String> = colors.iter().map(|s| s.to_string()).collect();
176 let color_cat = StringCategorical::new(colors_str, None, false)?;
177
178 // Create a DataFrame containing both categories
179 let multi_categoricals = vec![
180 ("Product".to_string(), product_cat),
181 ("Color".to_string(), color_cat),
182 ];
183
184 let multi_df = DataFrame::from_categoricals(multi_categoricals)?;
185
186 println!("Multi-Categorical DataFrame:\n{:?}", multi_df);
187 println!(
188 "\nIs 'Product' column categorical: {}",
189 multi_df.is_categorical("Product")
190 );
191 println!(
192 "Is 'Color' column categorical: {}",
193 multi_df.is_categorical("Color")
194 );
195
196 // ===========================================================
197 // Aggregation and Analysis of Categorical Data
198 // ===========================================================
199
200 println!("\n--- Aggregation and Grouping of Categorical Data ---");
201
202 // Start with a simple DataFrame
203 let mut df_simple = DataFrame::new();
204
205 // Add product data
206 let products = vec!["A", "B", "C", "A", "B"];
207 let products_str: Vec<String> = products.iter().map(|s| s.to_string()).collect();
208 let sales = vec!["100", "150", "200", "120", "180"];
209 let sales_str: Vec<String> = sales.iter().map(|s| s.to_string()).collect();
210
211 df_simple.add_column(
212 "Product".to_string(),
213 Series::new(products_str.clone(), Some("Product".to_string()))?,
214 )?;
215 df_simple.add_column(
216 "Sales".to_string(),
217 Series::new(sales_str, Some("Sales".to_string()))?,
218 )?;
219
220 println!("Original DataFrame:\n{:?}", df_simple);
221
222 // Aggregate by product
223 let product_counts = df_simple.value_counts("Product")?;
224 println!("\nProduct Counts:\n{:?}", product_counts);
225
226 // Transformation and interaction between categorical and series
227 println!("\n--- Interaction between Categorical and Series ---");
228
229 // Create a simple categorical series
230 // Changed: Using false instead of None for the ordered parameter
231 let letter_cat = StringCategorical::new(
232 vec!["A".to_string(), "B".to_string(), "C".to_string()],
233 None,
234 false,
235 )?;
236
237 // Convert to series
238 let letter_series = letter_cat.to_series(Some("Letter".to_string()))?;
239 println!("Converted from categorical to series: {:?}", letter_series);
240
241 // Additional information about categorical data
242 println!("\n--- Characteristics of Categorical Data ---");
243 println!(
244 "Categorical data is stored in memory only once, regardless of repeated string values."
245 );
246 println!(
247 "This makes it particularly efficient for datasets with many duplicate string values."
248 );
249 println!("Additionally, ordered categorical data allows meaningful sorting of data.");
250
251 println!("\n=== Sample Complete ===");
252 Ok(())
253}
Source§impl DataFrame
Parallel processing extension: DataFrame parallel processing
impl DataFrame
Parallel processing extension: DataFrame parallel processing
Sourcepub fn par_apply<F>(&self, f: F) -> Result<DataFrame>
pub fn par_apply<F>(&self, f: F) -> Result<DataFrame>
Apply a function to all columns in parallel
Examples found in repository?
4fn main() -> Result<(), Box<dyn Error>> {
5 println!("=== Example of Parallel Processing Features ===\n");
6
7 // Create sample data
8 let numbers = Series::new(
9 vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10 Some("numbers".to_string()),
11 )?;
12
13 // Parallel map: square each number
14 println!("Example of parallel map processing:");
15 let squared = numbers.par_map(|x| x * x);
16 println!("Original values: {:?}", numbers.values());
17 println!("Squared values: {:?}", squared.values());
18
19 // Parallel filter: keep only even numbers
20 println!("\nExample of parallel filtering:");
21 let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22 println!("Even Numbers: {:?}", even_numbers.values());
23
24 // Processing data containing NA
25 let na_data = vec![
26 NA::Value(10),
27 NA::Value(20),
28 NA::NA,
29 NA::Value(40),
30 NA::NA,
31 NA::Value(60),
32 ];
33 let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35 println!("\nParallel processing of data containing NA:");
36 let na_tripled = na_series.par_map(|x| x * 3);
37 println!("Original values: {:?}", na_series.values());
38 println!("Tripled values: {:?}", na_tripled.values());
39
40 // Parallel processing of DataFrame
41 println!("\nParallel processing of DataFrame:");
42
43 // Creating a sample DataFrame
44 let mut df = DataFrame::new();
45 let names = Series::new(
46 vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47 Some("name".to_string()),
48 )?;
49 let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50 let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52 df.add_column("name".to_string(), names)?;
53 df.add_column("age".to_string(), ages)?;
54 df.add_column("score".to_string(), scores)?;
55
56 // Parallel transformation of DataFrame
57 println!("Example of DataFrame.par_apply:");
58 let transformed_df = df.par_apply(|col, _row, val| {
59 match col {
60 "age" => {
61 // Add 1 to age
62 let age: i32 = val.parse().unwrap_or(0);
63 (age + 1).to_string()
64 }
65 "score" => {
66 // Add 5 to score
67 let score: i32 = val.parse().unwrap_or(0);
68 (score + 5).to_string()
69 }
70 _ => val.to_string(),
71 }
72 })?;
73
74 println!(
75 "Original DF row count: {}, column count: {}",
76 df.row_count(),
77 df.column_count()
78 );
79 println!(
80 "Transformed DF row count: {}, column count: {}",
81 transformed_df.row_count(),
82 transformed_df.column_count()
83 );
84
85 // Filtering rows
86 println!("\nExample of DataFrame.par_filter_rows:");
87 let filtered_df = df.par_filter_rows(|row| {
88 // Keep only rows where score > 85
89 if let Ok(values) = df.get_column_numeric_values("score") {
90 if row < values.len() {
91 return values[row] > 85.0;
92 }
93 }
94 false
95 })?;
96
97 println!("Row count after filtering: {}", filtered_df.row_count());
98
99 // Example of using ParallelUtils
100 println!("\nExample of ParallelUtils features:");
101
102 let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103 let sorted = ParallelUtils::par_sort(unsorted.clone());
104 println!("Before sorting: {:?}", unsorted);
105 println!("After sorting: {:?}", sorted);
106
107 let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108 let sum = ParallelUtils::par_sum(&numbers_vec);
109 let mean = ParallelUtils::par_mean(&numbers_vec);
110 let min = ParallelUtils::par_min(&numbers_vec);
111 let max = ParallelUtils::par_max(&numbers_vec);
112
113 println!("Sum: {}", sum);
114 println!("Mean: {}", mean.unwrap());
115 println!("Min: {}", min.unwrap());
116 println!("Max: {}", max.unwrap());
117
118 println!("\n=== Example of Parallel Processing Features Complete ===");
119 Ok(())
120}
Sourcepub fn par_filter_rows<F>(&self, f: F) -> Result<DataFrame>
pub fn par_filter_rows<F>(&self, f: F) -> Result<DataFrame>
Filter rows in parallel
Examples found in repository?
4fn main() -> Result<(), Box<dyn Error>> {
5 println!("=== Example of Parallel Processing Features ===\n");
6
7 // Create sample data
8 let numbers = Series::new(
9 vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
10 Some("numbers".to_string()),
11 )?;
12
13 // Parallel map: square each number
14 println!("Example of parallel map processing:");
15 let squared = numbers.par_map(|x| x * x);
16 println!("Original values: {:?}", numbers.values());
17 println!("Squared values: {:?}", squared.values());
18
19 // Parallel filter: keep only even numbers
20 println!("\nExample of parallel filtering:");
21 let even_numbers = numbers.par_filter(|x| x % 2 == 0);
22 println!("Even Numbers: {:?}", even_numbers.values());
23
24 // Processing data containing NA
25 let na_data = vec![
26 NA::Value(10),
27 NA::Value(20),
28 NA::NA,
29 NA::Value(40),
30 NA::NA,
31 NA::Value(60),
32 ];
33 let na_series = NASeries::new(na_data, Some("na_numbers".to_string()))?;
34
35 println!("\nParallel processing of data containing NA:");
36 let na_tripled = na_series.par_map(|x| x * 3);
37 println!("Original values: {:?}", na_series.values());
38 println!("Tripled values: {:?}", na_tripled.values());
39
40 // Parallel processing of DataFrame
41 println!("\nParallel processing of DataFrame:");
42
43 // Creating a sample DataFrame
44 let mut df = DataFrame::new();
45 let names = Series::new(
46 vec!["Alice", "Bob", "Charlie", "David", "Eve"],
47 Some("name".to_string()),
48 )?;
49 let ages = Series::new(vec![25, 30, 35, 40, 45], Some("age".to_string()))?;
50 let scores = Series::new(vec![85, 90, 78, 92, 88], Some("score".to_string()))?;
51
52 df.add_column("name".to_string(), names)?;
53 df.add_column("age".to_string(), ages)?;
54 df.add_column("score".to_string(), scores)?;
55
56 // Parallel transformation of DataFrame
57 println!("Example of DataFrame.par_apply:");
58 let transformed_df = df.par_apply(|col, _row, val| {
59 match col {
60 "age" => {
61 // Add 1 to age
62 let age: i32 = val.parse().unwrap_or(0);
63 (age + 1).to_string()
64 }
65 "score" => {
66 // Add 5 to score
67 let score: i32 = val.parse().unwrap_or(0);
68 (score + 5).to_string()
69 }
70 _ => val.to_string(),
71 }
72 })?;
73
74 println!(
75 "Original DF row count: {}, column count: {}",
76 df.row_count(),
77 df.column_count()
78 );
79 println!(
80 "Transformed DF row count: {}, column count: {}",
81 transformed_df.row_count(),
82 transformed_df.column_count()
83 );
84
85 // Filtering rows
86 println!("\nExample of DataFrame.par_filter_rows:");
87 let filtered_df = df.par_filter_rows(|row| {
88 // Keep only rows where score > 85
89 if let Ok(values) = df.get_column_numeric_values("score") {
90 if row < values.len() {
91 return values[row] > 85.0;
92 }
93 }
94 false
95 })?;
96
97 println!("Row count after filtering: {}", filtered_df.row_count());
98
99 // Example of using ParallelUtils
100 println!("\nExample of ParallelUtils features:");
101
102 let unsorted = vec![5, 3, 8, 1, 9, 4, 7, 2, 6];
103 let sorted = ParallelUtils::par_sort(unsorted.clone());
104 println!("Before sorting: {:?}", unsorted);
105 println!("After sorting: {:?}", sorted);
106
107 let numbers_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
108 let sum = ParallelUtils::par_sum(&numbers_vec);
109 let mean = ParallelUtils::par_mean(&numbers_vec);
110 let min = ParallelUtils::par_min(&numbers_vec);
111 let max = ParallelUtils::par_max(&numbers_vec);
112
113 println!("Sum: {}", sum);
114 println!("Mean: {}", mean.unwrap());
115 println!("Min: {}", min.unwrap());
116 println!("Max: {}", max.unwrap());
117
118 println!("\n=== Example of Parallel Processing Features Complete ===");
119 Ok(())
120}
Source§impl DataFrame
DataFrame extension: Pivot table functionality
impl DataFrame
DataFrame extension: Pivot table functionality
Sourcepub fn pivot_table(
&self,
index: &str,
columns: &str,
values: &str,
aggfunc: AggFunction,
) -> Result<DataFrame>
pub fn pivot_table( &self, index: &str, columns: &str, values: &str, aggfunc: AggFunction, ) -> Result<DataFrame>
Create a pivot table
Examples found in repository?
5fn main() -> Result<()> {
6 println!("=== Pivot Table and Grouping Example ===");
7
8 // Create sample data
9 let mut df = DataFrame::new();
10
11 // Create column data
12 let category = Series::new(
13 vec![
14 "A".to_string(),
15 "B".to_string(),
16 "A".to_string(),
17 "C".to_string(),
18 "B".to_string(),
19 "A".to_string(),
20 "C".to_string(),
21 "B".to_string(),
22 ],
23 Some("category".to_string()),
24 )?;
25
26 let region = Series::new(
27 vec![
28 "East".to_string(),
29 "West".to_string(),
30 "West".to_string(),
31 "East".to_string(),
32 "East".to_string(),
33 "West".to_string(),
34 "West".to_string(),
35 "East".to_string(),
36 ],
37 Some("region".to_string()),
38 )?;
39
40 let sales = Series::new(
41 vec![100, 150, 200, 120, 180, 90, 250, 160],
42 Some("sales".to_string()),
43 )?;
44
45 // Add columns to DataFrame
46 df.add_column("category".to_string(), category)?;
47 df.add_column("region".to_string(), region)?;
48 df.add_column("sales".to_string(), sales)?;
49
50 println!("DataFrame Info:");
51 println!(" Number of columns: {}", df.column_count());
52 println!(" Number of rows: {}", df.row_count());
53 println!(" Column names: {:?}", df.column_names());
54
55 // Grouping and aggregation
56 println!("\n=== Grouping by Category ===");
57 let category_group = df.groupby("category")?;
58
59 println!("Sum by category (in progress):");
60 let _category_sum = category_group.sum(&["sales"])?;
61
62 // Pivot table (in progress)
63 println!("\n=== Pivot Table ===");
64 println!("Sum of sales by category and region (in progress):");
65 let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67 // Note: Pivot table and grouping features are still under development,
68 // so actual results are not displayed
69
70 println!("\n=== Aggregation Function Examples ===");
71 let functions = [
72 AggFunction::Sum,
73 AggFunction::Mean,
74 AggFunction::Min,
75 AggFunction::Max,
76 AggFunction::Count,
77 ];
78
79 for func in &functions {
80 println!(
81 "Aggregation Function: {} ({})",
82 func.name(),
83 match func {
84 AggFunction::Sum => "Sum",
85 AggFunction::Mean => "Mean",
86 AggFunction::Min => "Min",
87 AggFunction::Max => "Max",
88 AggFunction::Count => "Count",
89 }
90 );
91 }
92
93 println!("\n=== Pivot Table Example (Complete) ===");
94 Ok(())
95}
Sourcepub fn groupby(&self, by: &str) -> Result<GroupBy<'_>>
pub fn groupby(&self, by: &str) -> Result<GroupBy<'_>>
Group by specified column
Examples found in repository?
5fn main() -> Result<()> {
6 println!("=== Pivot Table and Grouping Example ===");
7
8 // Create sample data
9 let mut df = DataFrame::new();
10
11 // Create column data
12 let category = Series::new(
13 vec![
14 "A".to_string(),
15 "B".to_string(),
16 "A".to_string(),
17 "C".to_string(),
18 "B".to_string(),
19 "A".to_string(),
20 "C".to_string(),
21 "B".to_string(),
22 ],
23 Some("category".to_string()),
24 )?;
25
26 let region = Series::new(
27 vec![
28 "East".to_string(),
29 "West".to_string(),
30 "West".to_string(),
31 "East".to_string(),
32 "East".to_string(),
33 "West".to_string(),
34 "West".to_string(),
35 "East".to_string(),
36 ],
37 Some("region".to_string()),
38 )?;
39
40 let sales = Series::new(
41 vec![100, 150, 200, 120, 180, 90, 250, 160],
42 Some("sales".to_string()),
43 )?;
44
45 // Add columns to DataFrame
46 df.add_column("category".to_string(), category)?;
47 df.add_column("region".to_string(), region)?;
48 df.add_column("sales".to_string(), sales)?;
49
50 println!("DataFrame Info:");
51 println!(" Number of columns: {}", df.column_count());
52 println!(" Number of rows: {}", df.row_count());
53 println!(" Column names: {:?}", df.column_names());
54
55 // Grouping and aggregation
56 println!("\n=== Grouping by Category ===");
57 let category_group = df.groupby("category")?;
58
59 println!("Sum by category (in progress):");
60 let _category_sum = category_group.sum(&["sales"])?;
61
62 // Pivot table (in progress)
63 println!("\n=== Pivot Table ===");
64 println!("Sum of sales by category and region (in progress):");
65 let _pivot_result = df.pivot_table("category", "region", "sales", AggFunction::Sum)?;
66
67 // Note: Pivot table and grouping features are still under development,
68 // so actual results are not displayed
69
70 println!("\n=== Aggregation Function Examples ===");
71 let functions = [
72 AggFunction::Sum,
73 AggFunction::Mean,
74 AggFunction::Min,
75 AggFunction::Max,
76 AggFunction::Count,
77 ];
78
79 for func in &functions {
80 println!(
81 "Aggregation Function: {} ({})",
82 func.name(),
83 match func {
84 AggFunction::Sum => "Sum",
85 AggFunction::Mean => "Mean",
86 AggFunction::Min => "Min",
87 AggFunction::Max => "Max",
88 AggFunction::Count => "Count",
89 }
90 );
91 }
92
93 println!("\n=== Pivot Table Example (Complete) ===");
94 Ok(())
95}
Source§impl DataFrame
impl DataFrame
Sourcepub fn plot_xy<P: AsRef<Path>>(
&self,
x_col: &str,
y_col: &str,
path: P,
config: PlotConfig,
) -> Result<()>
👎Deprecated since 0.1.0-alpha.2: Use DataFrame.scatter_xy()
instead
pub fn plot_xy<P: AsRef<Path>>( &self, x_col: &str, y_col: &str, path: P, config: PlotConfig, ) -> Result<()>
DataFrame.scatter_xy()
insteadPlot two columns as XY coordinates
Note: This implementation is kept for backward compatibility.
New code should use the scatter_xy
method instead.
Sourcepub fn plot_lines<P: AsRef<Path>>(
&self,
columns: &[&str],
path: P,
config: PlotConfig,
) -> Result<()>
👎Deprecated since 0.1.0-alpha.2: Use DataFrame.multi_line_plot()
instead
pub fn plot_lines<P: AsRef<Path>>( &self, columns: &[&str], path: P, config: PlotConfig, ) -> Result<()>
DataFrame.multi_line_plot()
insteadDraw line graphs for multiple columns
Note: This implementation is kept for backward compatibility.
New code should use the multi_line_plot
method instead.
Trait Implementations§
Source§impl ApplyExt for DataFrame
Implementation of ApplyExt for DataFrame
impl ApplyExt for DataFrame
Implementation of ApplyExt for DataFrame
Source§fn apply<F, R>(
&self,
f: F,
axis: Axis,
result_name: Option<String>,
) -> Result<Series<R>>
fn apply<F, R>( &self, f: F, axis: Axis, result_name: Option<String>, ) -> Result<Series<R>>
Source§fn mask<F>(&self, condition: F, other: &str) -> Result<Self>
fn mask<F>(&self, condition: F, other: &str) -> Result<Self>
Source§fn where_func<F>(&self, condition: F, other: &str) -> Result<Self>
fn where_func<F>(&self, condition: F, other: &str) -> Result<Self>
Source§fn replace(&self, replace_map: &HashMap<String, String>) -> Result<Self>
fn replace(&self, replace_map: &HashMap<String, String>) -> Result<Self>
Source§fn duplicated(
&self,
subset: Option<&[String]>,
keep: Option<&str>,
) -> Result<Series<bool>>
fn duplicated( &self, subset: Option<&[String]>, keep: Option<&str>, ) -> Result<Series<bool>>
Source§fn drop_duplicates(
&self,
subset: Option<&[String]>,
keep: Option<&str>,
) -> Result<Self>
fn drop_duplicates( &self, subset: Option<&[String]>, keep: Option<&str>, ) -> Result<Self>
Source§fn rolling(
&self,
window_size: usize,
column_name: &str,
operation: &str,
result_column: Option<&str>,
) -> Result<Self>
fn rolling( &self, window_size: usize, column_name: &str, operation: &str, result_column: Option<&str>, ) -> Result<Self>
Source§impl DataFramePlotExt for DataFrame
impl DataFramePlotExt for DataFrame
Source§fn plot_column<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn plot_column<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn line_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn line_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn scatter_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn scatter_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn bar_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn bar_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn area_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn area_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn box_plot<P: AsRef<Path>>(
&self,
_value_column: &str,
_category_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn box_plot<P: AsRef<Path>>( &self, _value_column: &str, _category_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn scatter_xy<P: AsRef<Path>>(
&self,
_x_column: &str,
_y_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn scatter_xy<P: AsRef<Path>>( &self, _x_column: &str, _y_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§impl JoinExt for DataFrame
Implementation of JoinExt for DataFrame
impl JoinExt for DataFrame
Implementation of JoinExt for DataFrame
Source§fn join(&self, other: &Self, on: &str, join_type: JoinType) -> Result<Self>
fn join(&self, other: &Self, on: &str, join_type: JoinType) -> Result<Self>
Source§fn inner_join(&self, other: &Self, on: &str) -> Result<Self>
fn inner_join(&self, other: &Self, on: &str) -> Result<Self>
Source§fn right_join(&self, other: &Self, on: &str) -> Result<Self>
fn right_join(&self, other: &Self, on: &str) -> Result<Self>
Source§fn outer_join(&self, other: &Self, on: &str) -> Result<Self>
fn outer_join(&self, other: &Self, on: &str) -> Result<Self>
Source§impl SerializeExt for DataFrame
impl SerializeExt for DataFrame
Source§impl TransformExt for DataFrame
Implementation of TransformExt for DataFrame
impl TransformExt for DataFrame
Implementation of TransformExt for DataFrame
Source§fn melt(&self, options: &MeltOptions) -> Result<Self>
fn melt(&self, options: &MeltOptions) -> Result<Self>
Source§fn stack(&self, options: &StackOptions) -> Result<Self>
fn stack(&self, options: &StackOptions) -> Result<Self>
Source§fn unstack(&self, options: &UnstackOptions) -> Result<Self>
fn unstack(&self, options: &UnstackOptions) -> Result<Self>
Auto Trait Implementations§
impl Freeze for DataFrame
impl !RefUnwindSafe for DataFrame
impl Send for DataFrame
impl Sync for DataFrame
impl Unpin for DataFrame
impl !UnwindSafe for DataFrame
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more