dataset_loaders/
dataset_loaders.rs

1use scirs2_datasets::loaders;
2use scirs2_datasets::utils::Dataset;
3use std::env;
4use std::path::Path;
5
6fn main() {
7    // Check if a CSV file is provided as a command-line argument
8    let args: Vec<String> = env::args().collect();
9    if args.len() < 2 {
10        println!("Usage: {} <path_to_csv_file>", args[0]);
11        println!("Example: {} examples/sample_data.csv", args[0]);
12        return;
13    }
14
15    let file_path = &args[1];
16
17    // Verify the file exists
18    if !Path::new(file_path).exists() {
19        println!("Error: File '{}' does not exist", file_path);
20        return;
21    }
22
23    // Load CSV file
24    println!("Loading CSV file: {}", file_path);
25    match loaders::load_csv(file_path, true, None) {
26        Ok(dataset) => {
27            print_dataset_info(&dataset, "Loaded CSV");
28
29            // Split the dataset for demonstration
30            println!("\nDemonstrating train-test split...");
31            match dataset.train_test_split(0.2, Some(42)) {
32                Ok((train, test)) => {
33                    println!("Training set: {} samples", train.n_samples());
34                    println!("Test set: {} samples", test.n_samples());
35
36                    // Save as JSON for demonstration
37                    let json_path = format!("{}.json", file_path);
38                    println!("\nSaving training dataset to JSON: {}", json_path);
39                    if let Err(e) = loaders::save_json(&train, &json_path) {
40                        println!("Error saving JSON: {}", e);
41                    } else {
42                        println!("Successfully saved JSON file");
43
44                        // Load back the JSON file
45                        println!("\nLoading back from JSON file...");
46                        match loaders::load_json(&json_path) {
47                            Ok(loaded) => {
48                                print_dataset_info(&loaded, "Loaded JSON");
49                            }
50                            Err(e) => println!("Error loading JSON: {}", e),
51                        }
52                    }
53                }
54                Err(e) => println!("Error splitting dataset: {}", e),
55            }
56        }
57        Err(e) => println!("Error loading CSV: {}", e),
58    }
59}
60
61fn print_dataset_info(dataset: &Dataset, name: &str) {
62    println!("=== {} Dataset ===", name);
63    println!("Number of samples: {}", dataset.n_samples());
64    println!("Number of features: {}", dataset.n_features());
65
66    if let Some(feature_names) = &dataset.feature_names {
67        println!(
68            "Features: {:?}",
69            &feature_names[0..std::cmp::min(5, feature_names.len())]
70        );
71        if feature_names.len() > 5 {
72            println!("... and {} more", feature_names.len() - 5);
73        }
74    }
75
76    if let Some(target) = &dataset.target {
77        println!("Target shape: {}", target.len());
78
79        if let Some(target_names) = &dataset.target_names {
80            println!("Target classes: {:?}", target_names);
81        }
82    }
83
84    for (key, value) in &dataset.metadata {
85        println!("Metadata - {}: {}", key, value);
86    }
87}