dataset_loaders/
dataset_loaders.rs1use scirs2_datasets::loaders;
2use scirs2_datasets::utils::{train_test_split, Dataset};
3use std::env;
4use std::path::Path;
5
6#[allow(dead_code)]
7fn main() {
8 let args: Vec<String> = env::args().collect();
10 if args.len() < 2 {
11 println!("Usage: {} <path_to_csv_file>", args[0]);
12 println!("Example: {} examples/sampledata.csv", args[0]);
13 return;
14 }
15
16 let filepath = &args[1];
17
18 if !Path::new(filepath).exists() {
20 println!("Error: File '{filepath}' does not exist");
21 return;
22 }
23
24 println!("Loading CSV file: {filepath}");
26 let csv_config = loaders::CsvConfig {
27 has_header: true,
28 target_column: None,
29 ..Default::default()
30 };
31 match loaders::load_csv(filepath, csv_config) {
32 Ok(dataset) => {
33 print_dataset_info(&dataset, "Loaded CSV");
34
35 println!("\nDemonstrating train-test split...");
37 match train_test_split(&dataset, 0.2, Some(42)) {
38 Ok((train, test)) => {
39 println!("Training set: {} samples", train.n_samples());
40 println!("Test set: {} samples", test.n_samples());
41
42 let jsonpath = format!("{filepath}.json");
44 println!("\nSaving training dataset to JSON: {jsonpath}");
45 if let Err(e) = loaders::save_json(&train, &jsonpath) {
46 println!("Error saving JSON: {e}");
47 } else {
48 println!("Successfully saved JSON file");
49
50 println!("\nLoading back from JSON file...");
52 match loaders::load_json(&jsonpath) {
53 Ok(loaded) => {
54 print_dataset_info(&loaded, "Loaded JSON");
55 }
56 Err(e) => println!("Error loading JSON: {e}"),
57 }
58 }
59 }
60 Err(e) => println!("Error splitting dataset: {e}"),
61 }
62 }
63 Err(e) => println!("Error loading CSV: {e}"),
64 }
65}
66
67#[allow(dead_code)]
68fn print_dataset_info(dataset: &Dataset, name: &str) {
69 println!("=== {name} Dataset ===");
70 println!("Number of samples: {}", dataset.n_samples());
71 println!("Number of features: {}", dataset.n_features());
72
73 if let Some(featurenames) = &dataset.featurenames {
74 println!(
75 "Features: {:?}",
76 &featurenames[0..std::cmp::min(5, featurenames.len())]
77 );
78 if featurenames.len() > 5 {
79 println!("... and {} more", featurenames.len() - 5);
80 }
81 }
82
83 if let Some(target) = &dataset.target {
84 println!("Target shape: {}", target.len());
85
86 if let Some(targetnames) = &dataset.targetnames {
87 println!("Target classes: {targetnames:?}");
88 }
89 }
90
91 for (key, value) in &dataset.metadata {
92 println!("Metadata - {key}: {value}");
93 }
94}