data_generators/
data_generators.rs1use scirs2_datasets::{
2 make_blobs, make_classification, make_regression, make_time_series, utils::normalize,
3 utils::train_test_split,
4};
5
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7 println!("Creating synthetic datasets...\n");
8
9 let n_samples = 100;
11 let n_features = 5;
12
13 let classification_data = make_classification(
14 n_samples,
15 n_features,
16 3, 2, 3, Some(42), )?;
21
22 let (train, test) = train_test_split(&classification_data, 0.2, Some(42))?;
24
25 println!("Classification dataset:");
26 println!(" Total samples: {}", classification_data.n_samples());
27 println!(" Features: {}", classification_data.n_features());
28 println!(" Training samples: {}", train.n_samples());
29 println!(" Test samples: {}", test.n_samples());
30
31 let regression_data = make_regression(
33 n_samples,
34 n_features,
35 3, 0.5, Some(42),
38 )?;
39
40 println!("\nRegression dataset:");
41 println!(" Samples: {}", regression_data.n_samples());
42 println!(" Features: {}", regression_data.n_features());
43
44 let mut data_copy = regression_data.data.clone();
46 normalize(&mut data_copy);
47 println!(" Data normalized successfully");
48
49 let clustering_data = make_blobs(
51 n_samples,
52 2, 4, 0.8, Some(42),
56 )?;
57
58 println!("\nClustering dataset (blobs):");
59 println!(" Samples: {}", clustering_data.n_samples());
60 println!(" Features: {}", clustering_data.n_features());
61
62 let num_clusters = clustering_data.target.as_ref().map_or(0, |t| {
64 let mut max_val = -1.0;
65 for &val in t.iter() {
66 if val > max_val {
67 max_val = val;
68 }
69 }
70 (max_val as usize) + 1
71 });
72
73 println!(" Clusters: {}", num_clusters);
74
75 let time_series = make_time_series(
77 100, 3, true, true, 0.2, Some(42),
83 )?;
84
85 println!("\nTime series dataset:");
86 println!(" Time steps: {}", time_series.n_samples());
87 println!(" Features: {}", time_series.n_features());
88
89 Ok(())
90}