pub fn normalize(data: &mut Array2<f64>)Expand description
Helper function to normalize data (zero mean, unit variance)
This function normalizes each feature (column) in the dataset to have zero mean and unit variance. This is commonly used as a preprocessing step for machine learning. Also known as z-score normalization or standardization.
§Arguments
data- A mutable reference to the data array to normalize in-place
§Examples
use scirs2_core::ndarray::Array2;
use scirs2_datasets::utils::normalize;
let mut data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
normalize(&mut data);
// data is now normalized with zero mean and unit variance for each featureExamples found in repository?
examples/data_generators.rs (line 47)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8 println!("Creating synthetic datasets...\n");
9
10 // Generate classification dataset
11 let n_samples = 100;
12 let n_features = 5;
13
14 let classificationdata = make_classification(
15 n_samples,
16 n_features,
17 3, // 3 classes
18 2, // 2 clusters per class
19 3, // 3 informative features
20 Some(42), // random seed
21 )?;
22
23 // Train-test split
24 let (train, test) = train_test_split(&classificationdata, 0.2, Some(42))?;
25
26 println!("Classification dataset:");
27 println!(" Total samples: {}", classificationdata.n_samples());
28 println!(" Features: {}", classificationdata.n_features());
29 println!(" Training samples: {}", train.n_samples());
30 println!(" Test samples: {}", test.n_samples());
31
32 // Generate regression dataset
33 let regressiondata = make_regression(
34 n_samples,
35 n_features,
36 3, // 3 informative features
37 0.5, // noise level
38 Some(42),
39 )?;
40
41 println!("\nRegression dataset:");
42 println!(" Samples: {}", regressiondata.n_samples());
43 println!(" Features: {}", regressiondata.n_features());
44
45 // Normalize the data (in-place)
46 let mut data_copy = regressiondata.data.clone();
47 normalize(&mut data_copy);
48 println!(" Data normalized successfully");
49
50 // Generate clustering data (blobs)
51 let clusteringdata = make_blobs(
52 n_samples,
53 2, // 2 features for easy visualization
54 4, // 4 clusters
55 0.8, // cluster standard deviation
56 Some(42),
57 )?;
58
59 println!("\nClustering dataset (blobs):");
60 println!(" Samples: {}", clusteringdata.n_samples());
61 println!(" Features: {}", clusteringdata.n_features());
62
63 // Find the number of clusters by finding the max value of target
64 let num_clusters = clusteringdata.target.as_ref().map_or(0, |t| {
65 let mut max_val = -1.0;
66 for &val in t.iter() {
67 if val > max_val {
68 max_val = val;
69 }
70 }
71 (max_val as usize) + 1
72 });
73
74 println!(" Clusters: {num_clusters}");
75
76 // Generate time series data
77 let time_series = make_time_series(
78 100, // 100 time steps
79 3, // 3 features/variables
80 true, // with trend
81 true, // with seasonality
82 0.2, // noise level
83 Some(42),
84 )?;
85
86 println!("\nTime series dataset:");
87 println!(" Time steps: {}", time_series.n_samples());
88 println!(" Features: {}", time_series.n_features());
89
90 Ok(())
91}