pandrs 0.1.0-beta.2

A high-performance DataFrame library for Rust, providing pandas-like API with advanced features including SIMD optimization, parallel processing, and distributed computing capabilities
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
use std::cmp::PartialOrd;
use std::collections::HashMap;
use std::error::Error;
use std::fmt::Debug;
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::iter::Sum;
use std::path::Path;
use std::str::FromStr;

// Import the cast function from the num-traits crate
// You need to add `num-traits = "0.2"` to Cargo.toml
use num_traits::cast;

// DataFrame structure
#[derive(Debug, Clone)]
pub struct DataFrame<T: Debug + Clone> {
    data: HashMap<String, Vec<T>>,
    columns: Vec<String>, // Maintain column order
    row_count: usize,
}

// Basic method implementation for DataFrame
impl<T: Debug + Clone> Default for DataFrame<T> {
    fn default() -> Self {
        Self::new()
    }
}

impl<T: Debug + Clone> DataFrame<T> {
    pub fn new() -> Self {
        DataFrame {
            data: HashMap::new(),
            columns: Vec::new(),
            row_count: 0,
        }
    }

    // Generate DataFrame with specified column names and data
    pub fn from_vec(data: HashMap<String, Vec<T>>) -> Result<Self, Box<dyn Error>> {
        if data.is_empty() {
            return Ok(DataFrame::new());
        }
        // Get column names from HashMap keys (order is not guaranteed)
        // Removed `mut` (warning: variable does not need to be mutable)
        let columns: Vec<String> = data.keys().cloned().collect();
        // Stabilize column order if needed (e.g., sort)
        // let mut columns = data.keys().cloned().collect::<Vec<_>>(); // mut would be needed
        // columns.sort();

        let first_column_name = columns
            .first()
            .ok_or("Data map is empty after collecting keys")?;
        let row_count = data[first_column_name].len();

        // Check if all columns have the same length
        for column_name in &columns {
            let column_data = data
                .get(column_name)
                .ok_or("Internal error: Column name mismatch")?;
            if column_data.len() != row_count {
                return Err(format!(
                    "Column '{}' has length {} but expected {}",
                    column_name,
                    column_data.len(),
                    row_count
                )
                .into());
            }
        }
        // Construct DataFrame (using the obtained column list)
        Ok(DataFrame {
            data,
            columns,
            row_count,
        })
    }

    // Add a row to the DataFrame
    pub fn add_row(&mut self, mut row: HashMap<String, T>) -> Result<(), Box<dyn Error>> {
        // If the DataFrame is empty, insert the data as the first row and set the column order
        if self.columns.is_empty() {
            // Since the order of HashMap keys is indeterminate, we need to determine the order here
            self.columns = row.keys().cloned().collect();
            // Sort if necessary:
            // self.columns.sort();
            for column_name in &self.columns {
                let value = row.remove(column_name).ok_or_else(|| {
                    format!("Internal error: value for column '{column_name}' disappeared")
                })?;
                self.data.insert(column_name.clone(), vec![value]);
            }
            self.row_count = 1;
            return Ok(());
        }

        // Check if the number of columns matches the DataFrame
        if row.len() != self.columns.len() {
            return Err(format!(
                "Row has {} columns, but DataFrame expects {}",
                row.len(),
                self.columns.len()
            )
            .into());
        }

        // Add values according to the column order of the DataFrame
        for column_name in &self.columns {
            match row.remove(column_name) {
                // Consume the value from the HashMap
                Some(value) => {
                    // The corresponding Vec in the data HashMap should be guaranteed to exist
                    self.data.get_mut(column_name).unwrap().push(value);
                }
                None => {
                    // If the row does not contain a column of the DataFrame
                    return Err(format!(
                        "Missing value for column '{column_name}' in the provided row"
                    )
                    .into());
                }
            }
        }
        self.row_count += 1;
        Ok(())
    }

    // Return the data of the specified column
    pub fn get_column(&self, column_name: &str) -> Option<&Vec<T>> {
        self.data.get(column_name)
    }

    // Return a mutable reference to the data of the specified column
    pub fn get_column_mut(&mut self, column_name: &str) -> Option<&mut Vec<T>> {
        self.data.get_mut(column_name)
    }

    // Check if the specified column exists
    pub fn contains_column(&self, column_name: &str) -> bool {
        self.data.contains_key(column_name)
    }

    // Return the column names of the DataFrame (in the defined order)
    pub fn get_columns(&self) -> &Vec<String> {
        &self.columns
    }

    // Return the number of rows in the DataFrame
    pub fn get_row_count(&self) -> usize {
        self.row_count
    }

    // Filter the DataFrame
    pub fn filter<F>(&self, predicate: F) -> Result<Self, Box<dyn Error>>
    where
        F: Fn(&HashMap<&String, &T>) -> bool,
    {
        let mut filtered_data: HashMap<String, Vec<T>> = HashMap::new();
        for column_name in &self.columns {
            filtered_data.insert(column_name.clone(), Vec::new());
        }
        let mut new_row_count = 0;

        for i in 0..self.row_count {
            // Create row data as a HashMap (using references)
            let mut row_map: HashMap<&String, &T> = HashMap::with_capacity(self.columns.len());
            for column_name in &self.columns {
                row_map.insert(column_name, &self.data[column_name][i]);
            }
            // Check if the condition is met
            if predicate(&row_map) {
                // Add the data of the rows that meet the condition to the new DataFrame
                for column_name in &self.columns {
                    filtered_data
                        .get_mut(column_name)
                        .unwrap() // The key should exist
                        .push(self.data[column_name][i].clone()); // clone is needed
                }
                new_row_count += 1;
            }
        }
        // Construct and return the new DataFrame
        Ok(DataFrame {
            data: filtered_data,
            columns: self.columns.clone(), // Maintain column order
            row_count: new_row_count,
        })
    }

    // Sort the DataFrame (the original DataFrame is not changed, a new DataFrame is returned)
    pub fn sort_by<F>(&self, compare: F) -> Self
    where
        F: Fn(&HashMap<&String, &T>, &HashMap<&String, &T>) -> std::cmp::Ordering,
    {
        if self.row_count == 0 {
            return self.clone();
        } // Return a clone if empty

        // Prepare a HashMap to store the sorted data
        let mut sorted_data: HashMap<String, Vec<T>> = HashMap::new();
        for column_name in &self.columns {
            sorted_data.insert(column_name.clone(), Vec::with_capacity(self.row_count));
        }

        // Create row indices (0..row_count)
        let mut row_indices: Vec<usize> = (0..self.row_count).collect();

        // Helper function to sort indices (using references to the original data)
        let get_row_map = |index: usize| -> HashMap<&String, &T> {
            let mut row_map = HashMap::with_capacity(self.columns.len());
            for column_name in &self.columns {
                row_map.insert(column_name, &self.data[column_name][index]);
            }
            row_map
        };

        // Sort the indices using the comparison function
        row_indices.sort_unstable_by(|&a, &b| {
            // Unstable is often sufficient for performance
            let row_map_a = get_row_map(a);
            let row_map_b = get_row_map(b);
            compare(&row_map_a, &row_map_b)
        });

        // Move the data to the new HashMap in the order of the sorted indices
        for index in row_indices {
            for column_name in &self.columns {
                sorted_data
                    .get_mut(column_name)
                    .unwrap()
                    .push(self.data[column_name][index].clone()); // clone is needed
            }
        }
        // Construct the new DataFrame
        DataFrame {
            data: sorted_data,
            columns: self.columns.clone(),
            row_count: self.row_count,
        }
    }

    // Join two DataFrames (Inner Join)
    pub fn join(&self, other: &Self, join_column: &str) -> Result<Self, Box<dyn Error>>
    where
        T: Eq + std::hash::Hash, // Hash is also needed to use as a key in HashMap
    {
        // Check if the join column exists
        if !self.contains_column(join_column) {
            return Err(format!("Join column '{join_column}' not found in left DataFrame").into());
        }
        if !other.contains_column(join_column) {
            return Err(format!("Join column '{join_column}' not found in right DataFrame").into());
        }

        // Prepare a HashMap and column list to store the joined data
        let mut joined_data: HashMap<String, Vec<T>> = HashMap::new();
        let mut joined_columns = self.columns.clone(); // Maintain the order of the left columns
        let mut right_col_rename_map = HashMap::new(); // For renaming duplicate column names on the right

        // Create the list of columns after the join (add the right columns, rename duplicates)
        for column_name in &other.columns {
            if column_name != join_column {
                let new_col_name = if self.contains_column(column_name) {
                    // Rename if duplicate (e.g., "col" -> "col_right")
                    let mut rename = format!("{column_name}_right");
                    let mut count = 1;
                    // Add a serial number if the renamed name is still a duplicate (rare case)
                    while self.contains_column(&rename)
                        || right_col_rename_map.contains_key(&rename)
                    {
                        rename = format!("{column_name}_right{count}");
                        count += 1;
                    }
                    rename
                } else {
                    column_name.clone() // Use as is if not a duplicate
                };
                // Save the mapping from the original name to the new name
                right_col_rename_map.insert(column_name.clone(), new_col_name.clone());
                joined_columns.push(new_col_name); // Add to the list of columns after the join
            }
        }
        // Initialize the HashMap for the joined data
        for column_name in &joined_columns {
            joined_data.insert(column_name.clone(), Vec::new());
        }

        // Get the data of the join column (unwrap is safe because existence is checked)
        let left_join_col_data = self.get_column(join_column).unwrap();
        let right_join_col_data = other.get_column(join_column).unwrap();

        // Create a map of row indices for the join key in the right DataFrame for performance
        let mut right_indices_map: HashMap<&T, Vec<usize>> = HashMap::new();
        for (j, right_val) in right_join_col_data.iter().enumerate() {
            right_indices_map.entry(right_val).or_default().push(j);
        }

        // Join process
        let mut new_row_count = 0;
        // Process each row of the left DataFrame
        for (i, left_value) in left_join_col_data.iter().enumerate().take(self.row_count) {
            // Use the index map of the right side to search for rows with matching join keys
            if let Some(matching_indices) = right_indices_map.get(left_value) {
                // If matching rows are found on the right side, generate join results for each row
                for &j in matching_indices {
                    new_row_count += 1;
                    // Add the data from the left side to the joined data
                    for column_name in &self.columns {
                        joined_data
                            .get_mut(column_name)
                            .unwrap()
                            .push(self.data[column_name][i].clone());
                    }
                    // Add the data from the right side (excluding the join column) to the joined data
                    for original_right_col in &other.columns {
                        if original_right_col != join_column {
                            // Add using the renamed column name
                            let final_col_name =
                                right_col_rename_map.get(original_right_col).unwrap();
                            joined_data
                                .get_mut(final_col_name)
                                .unwrap()
                                .push(other.data[original_right_col][j].clone());
                        }
                    }
                }
            }
        }
        // Construct the joined DataFrame
        Ok(DataFrame {
            data: joined_data,
            columns: joined_columns,
            row_count: new_row_count,
        })
    }
}

// Extension for DataFrame limited to numeric types
impl<T> DataFrame<T>
where
    T: FromStr
        + Clone
        + Debug
        + Sum<T>
        + Default
        + Copy
        + std::ops::Div<Output = T>
        + PartialOrd
        + 'static,
    T: num_traits::NumCast, // NumCast trait bound (used in mean)
{
    // Calculate the sum of the specified column
    pub fn sum(&self, column_name: &str) -> Result<T, Box<dyn Error>> {
        let column = self
            .get_column(column_name)
            .ok_or_else(|| format!("Column '{column_name}' not found for sum()"))?;
        if column.is_empty() {
            Ok(T::default()) // Return default value (usually 0) if empty
        } else {
            Ok(column.iter().copied().sum()) // Use the sum of the iterator (T: Sum<T> + Copy)
        }
    }

    // Calculate the mean of the specified column
    pub fn mean(&self, column_name: &str) -> Result<T, Box<dyn Error>> {
        let column = self
            .get_column(column_name)
            .ok_or_else(|| format!("Column '{column_name}' not found for mean()"))?;
        let n = column.len(); // Number of rows (usize)
        if n == 0 {
            return Err(format!("Cannot calculate mean of an empty column '{column_name}'").into());
        }
        // Calculate the sum
        let sum_val = self.sum(column_name)?;

        // Safely cast the number of rows (usize) to type T (T: num_traits::NumCast)
        let n_t = cast::<usize, T>(n).ok_or_else(|| {
            format!(
                "Failed to cast column length {} (usize) to the required numeric type '{}'",
                n,
                std::any::type_name::<T>() // T: 'static
            )
        })?;

        // Optional: Check if the cast value is zero (T: num_traits::Zero is needed)
        // use num_traits::Zero;
        // if n_t.is_zero() && n != 0 {
        //     return Err("Division by zero occurred after casting column length to type T".into());
        // }

        // Sum / Number of rows (T: Div<Output = T>)
        Ok(sum_val / n_t)
    }
}

// Extension for DataFrame limited to string type (String)
impl DataFrame<String> {
    // String satisfies Debug + Clone + Eq + Hash + FromStr + 'static
    // Concatenate the strings of the specified column
    pub fn concat(&self, column_name: &str, separator: &str) -> Result<String, Box<dyn Error>> {
        let column = self
            .get_column(column_name)
            .ok_or_else(|| format!("Column '{column_name}' not found for concat()"))?;
        Ok(column.join(separator)) // Use the join method of Vec<String>
    }
}

// Function to read DataFrame from a file
pub fn read_csv<T: Debug + Clone + FromStr + 'static>(
    file_path: &Path,
    has_header: bool,
) -> Result<DataFrame<T>, Box<dyn Error>> {
    // Open the file
    let file = File::open(file_path)
        .map_err(|e| format!("Failed to open file '{}': {}", file_path.display(), e))?;
    let reader = BufReader::new(file);
    let mut lines = reader.lines(); // Line iterator

    // For storing data
    let mut data: HashMap<String, Vec<T>> = HashMap::new();
    let mut columns: Vec<String> = Vec::new(); // Maintain column order
    let mut row_count = 0;
    let mut expected_columns = 0; // Expected number of columns

    // Process the header line
    if has_header {
        if let Some(header_line_result) = lines.next() {
            let header_line = header_line_result.map_err(|e| {
                format!(
                    "Failed to read header line from '{}': {}",
                    file_path.display(),
                    e
                )
            })?;
            // Split the header by commas, trim, and create a list of column names
            columns = header_line
                .split(',')
                .map(|s| s.trim().to_string())
                .collect();
            // Return an error if the header is empty or invalid
            if columns.is_empty() || columns.iter().all(String::is_empty) {
                return Err(format!(
                    "CSV file '{}' has an empty or invalid header",
                    file_path.display()
                )
                .into());
            }
            expected_columns = columns.len();
            // Initialize Vec for storing data for each column
            for column in &columns {
                data.insert(column.clone(), Vec::new());
            }
        } else {
            // If the file is empty but a header is expected
            return Err(format!(
                "CSV file '{}' is empty or failed to read header",
                file_path.display()
            )
            .into());
        }
    }

    // Process the data lines
    for (line_idx, line_result) in lines.enumerate() {
        let line_number = line_idx + if has_header { 2 } else { 1 }; // Line number for error display (1-based)
        let line = line_result.map_err(|e| {
            format!(
                "Failed to read line {} from '{}': {}",
                line_number,
                file_path.display(),
                e
            )
        })?;

        // Skip empty lines
        if line.trim().is_empty() {
            continue;
        }

        // Split the line by commas and trim
        let values: Vec<&str> = line.split(',').map(|s| s.trim()).collect();

        // If there is no header, set column information from the first data line
        if !has_header && row_count == 0 {
            expected_columns = values.len();
            if expected_columns == 0 {
                return Err(format!(
                    "First data line (line {line_number}) is empty, cannot determine columns"
                )
                .into());
            }
            // Generate column names as "column_0", "column_1", ...
            columns = (0..expected_columns)
                .map(|i| format!("column_{i}"))
                .collect();
            // Initialize Vec for storing data
            for column_name in &columns {
                data.insert(column_name.clone(), Vec::new());
            }
        }

        // Check if the number of columns matches the expected value
        if values.len() != expected_columns {
            return Err(format!(
                "Row {} has {} values, but expected {} columns",
                line_number,
                values.len(),
                expected_columns
            )
            .into());
        }

        // Parse each value and add it to the corresponding column's Vec
        for (col_idx, value_str) in values.iter().enumerate() {
            let column_name = &columns[col_idx];
            // Parse the string to type T (T: FromStr)
            let parsed_value = value_str.parse::<T>().map_err(|_| {
                format!(
                    "Failed to parse value '{}' in column '{}' (row {}) as type '{}'",
                    value_str,
                    column_name,
                    line_number,
                    std::any::type_name::<T>()
                ) // T: 'static
            })?;
            // Add the value to the Vec in the data HashMap (unwrap is safe because the key should exist)
            data.get_mut(column_name).unwrap().push(parsed_value);
        }
        row_count += 1; // Count the number of processed rows
    }

    // Final check after reading the CSV
    if has_header && row_count == 0 {
        // If there is a header but no data rows -> Return an empty DataFrame with only the header
    }
    if !has_header && row_count == 0 {
        // If there is no header and no data rows -> Return a completely empty DataFrame
        return Ok(DataFrame::new());
    }

    // Construct and return the final DataFrame
    Ok(DataFrame {
        data,
        columns,
        row_count,
    })
}

// --- main function ---
fn main() -> Result<(), Box<dyn Error>> {
    // === i32 DataFrame ===
    println!("--- i32 DataFrame Example ---");
    let mut df_i32: DataFrame<i32> = DataFrame::new();
    df_i32.add_row(HashMap::from([
        ("id".to_string(), 1),
        ("age".to_string(), 30),
        ("height".to_string(), 180),
    ]))?;
    df_i32.add_row(HashMap::from([
        ("id".to_string(), 2),
        ("age".to_string(), 25),
        ("height".to_string(), 175),
    ]))?;
    df_i32.add_row(HashMap::from([
        ("id".to_string(), 3),
        ("age".to_string(), 30),
        ("height".to_string(), 185),
    ]))?;
    println!("DataFrame (i32):\n{df_i32:?}");
    println!("Sum of age: {:?}", df_i32.sum("age")?);
    println!("Mean of age (using num_cast): {:?}", df_i32.mean("age")?); // Uses safe cast
    let filtered_df_i32 =
        df_i32.filter(|row| **row.get(&"age".to_string()).expect("Age missing") > 25)?;
    println!("Filtered DataFrame (age > 25):\n{filtered_df_i32:?}");
    let sorted_df_i32 = df_i32.sort_by(|a, b| {
        let age_a = **a.get(&"age".to_string()).expect("Age missing");
        let age_b = **b.get(&"age".to_string()).expect("Age missing");
        match age_a.cmp(&age_b) {
            std::cmp::Ordering::Equal => {
                let height_a = **a.get(&"height".to_string()).expect("Height missing");
                let height_b = **b.get(&"height".to_string()).expect("Height missing");
                height_b.cmp(&height_a) // height desc
            }
            other => other,
        }
    });
    println!("Sorted DataFrame (by age asc, height desc):\n{sorted_df_i32:?}");
    println!("-----------------------------");
    println!();

    // === String DataFrame ===
    println!("--- String DataFrame Example ---");
    let mut df_string = DataFrame::<String>::new();
    df_string.add_row(HashMap::from([
        ("name".to_string(), "Alice".to_string()),
        ("city".to_string(), "New York".to_string()),
    ]))?;
    df_string.add_row(HashMap::from([
        ("name".to_string(), "Bob".to_string()),
        ("city".to_string(), "Los Angeles".to_string()),
    ]))?;
    df_string.add_row(HashMap::from([
        ("name".to_string(), "Charlie".to_string()),
        ("city".to_string(), "New York".to_string()),
    ]))?;
    let mut df_string2 = DataFrame::<String>::new();
    df_string2.add_row(HashMap::from([
        ("name".to_string(), "Alice".to_string()),
        ("state".to_string(), "NY".to_string()),
        ("zip".to_string(), "10001".to_string()),
    ]))?;
    df_string2.add_row(HashMap::from([
        ("name".to_string(), "Bob".to_string()),
        ("state".to_string(), "CA".to_string()),
        ("zip".to_string(), "90001".to_string()),
    ]))?;
    df_string2.add_row(HashMap::from([
        ("name".to_string(), "David".to_string()),
        ("state".to_string(), "TX".to_string()),
        ("zip".to_string(), "75001".to_string()),
    ]))?;
    println!("DataFrame 1 (String):\n{df_string:?}");
    println!("DataFrame 2 (String):\n{df_string2:?}");
    let joined_df_string = df_string.join(&df_string2, "name")?;
    println!("Joined DataFrame (inner join on name):\n{joined_df_string:?}");
    println!("Concatenated names: {:?}", df_string.concat("name", ", ")?);
    println!("-----------------------------");
    println!();

    // === CSV Reading (f64) ===
    println!("--- CSV Reading Example (f64) ---");
    let file_path_str = "data_temp.csv";
    let file_path = Path::new(file_path_str);
    // Create dummy CSV
    {
        let mut file = File::create(file_path)?;
        writeln!(file, "value_a,value_b,label")?;
        writeln!(file, "1.1, 2.2, apple")?;
        writeln!(file, "3.3, 4.4, banana")?;
        writeln!(file, " 5.5 ,6.6, apple")?;
        writeln!(file)?; // empty line test
    }
    println!("Attempting to read: {file_path_str}");
    // Try reading as f64 (should fail on 'label' column)
    let df_from_csv_f64_result: Result<DataFrame<f64>, Box<dyn Error>> = read_csv(file_path, true);
    match df_from_csv_f64_result {
        Ok(df) => {
            println!("Successfully read CSV as f64 (This shouldn't happen):\n{df:?}");
        }
        Err(e) => {
            println!("Correctly failed to read CSV as f64:");
            eprintln!("Error: {e}");
        } // Expected
    }
    println!();
    // Try reading as String (should succeed)
    let df_from_csv_string_result: Result<DataFrame<String>, Box<dyn Error>> =
        read_csv(file_path, true);
    match df_from_csv_string_result {
        Ok(df) => {
            println!("Successfully read CSV as String:\n{df:?}");
            if df.contains_column("label") {
                println!("Concatenated labels: {:?}", df.concat("label", " | ")?);
            }
        }
        Err(e) => {
            eprintln!("Failed to read CSV as String (This shouldn't happen): {e}");
        }
    }
    // Clean up dummy file
    if let Err(e) = std::fs::remove_file(file_path) {
        eprintln!("Warning: Failed to remove temporary file '{file_path_str}': {e}");
    }
    println!("-----------------------------");

    Ok(()) // Main finishes successfully
}