use scirs2_datasets::{
make_blobs, make_classification, make_regression, make_time_series, utils::normalize,
utils::train_test_split,
};
#[allow(dead_code)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Creating synthetic datasets...\n");
let n_samples = 100;
let n_features = 5;
let classificationdata = make_classification(
n_samples,
n_features,
3, 2, 3, Some(42), )?;
let (train, test) = train_test_split(&classificationdata, 0.2, Some(42))?;
println!("Classification dataset:");
println!(" Total samples: {}", classificationdata.n_samples());
println!(" Features: {}", classificationdata.n_features());
println!(" Training samples: {}", train.n_samples());
println!(" Test samples: {}", test.n_samples());
let regressiondata = make_regression(
n_samples,
n_features,
3, 0.5, Some(42),
)?;
println!("\nRegression dataset:");
println!(" Samples: {}", regressiondata.n_samples());
println!(" Features: {}", regressiondata.n_features());
let mut data_copy = regressiondata.data.clone();
normalize(&mut data_copy);
println!(" Data normalized successfully");
let clusteringdata = make_blobs(
n_samples,
2, 4, 0.8, Some(42),
)?;
println!("\nClustering dataset (blobs):");
println!(" Samples: {}", clusteringdata.n_samples());
println!(" Features: {}", clusteringdata.n_features());
let num_clusters = clusteringdata.target.as_ref().map_or(0, |t| {
let mut max_val = -1.0;
for &val in t.iter() {
if val > max_val {
max_val = val;
}
}
(max_val as usize) + 1
});
println!(" Clusters: {num_clusters}");
let time_series = make_time_series(
100, 3, true, true, 0.2, Some(42),
)?;
println!("\nTime series dataset:");
println!(" Time steps: {}", time_series.n_samples());
println!(" Features: {}", time_series.n_features());
Ok(())
}