pub fn robust_scale(data: &mut Array2<f64>)
Expand description
Performs robust scaling using median and interquartile range
Scales features using statistics that are robust to outliers. Each feature is scaled by: X_scaled = (X - median) / IQR, where IQR is the interquartile range. This scaling method is less sensitive to outliers compared to standard normalization.
§Arguments
data
- Feature matrix to scale in-place (n_samples, n_features)
§Examples
use ndarray::Array2;
use scirs2_datasets::utils::robust_scale;
let mut data = Array2::from_shape_vec((5, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 100.0, 500.0]).unwrap();
robust_scale(&mut data);
// Features are now robustly scaled using median and IQR
Examples found in repository?
examples/feature_extraction_demo.rs (line 43)
12fn main() {
13 println!("=== Feature Extraction Utilities Demonstration ===\n");
14
15 // Create a sample dataset for demonstration
16 let data = Array2::from_shape_vec(
17 (6, 2),
18 vec![
19 1.0, 10.0, // Normal data
20 2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 5.0, 50.0, 100.0, 500.0, // Outlier
21 ],
22 )
23 .unwrap();
24
25 println!("Original dataset:");
26 print_data_summary(&data, "Original");
27 println!();
28
29 // Demonstrate Min-Max Scaling
30 println!("=== Min-Max Scaling ============================");
31 let mut data_minmax = data.clone();
32 min_max_scale(&mut data_minmax, (0.0, 1.0));
33 print_data_summary(&data_minmax, "Min-Max Scaled [0, 1]");
34
35 let mut data_custom_range = data.clone();
36 min_max_scale(&mut data_custom_range, (-1.0, 1.0));
37 print_data_summary(&data_custom_range, "Min-Max Scaled [-1, 1]");
38 println!();
39
40 // Demonstrate Robust Scaling
41 println!("=== Robust Scaling ==============================");
42 let mut data_robust = data.clone();
43 robust_scale(&mut data_robust);
44 print_data_summary(&data_robust, "Robust Scaled (Median/IQR)");
45 println!();
46
47 // Demonstrate Polynomial Features
48 println!("=== Polynomial Feature Generation ==============");
49 let small_data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0]).unwrap();
50
51 println!("Small dataset for polynomial demonstration:");
52 print_data_matrix(&small_data, &["x1", "x2"]);
53
54 let poly_with_bias = polynomial_features(&small_data, 2, true).unwrap();
55 println!("Polynomial features (degree=2, with bias):");
56 print_data_matrix(&poly_with_bias, &["1", "x1", "x2", "x1²", "x1*x2", "x2²"]);
57
58 let poly_no_bias = polynomial_features(&small_data, 2, false).unwrap();
59 println!("Polynomial features (degree=2, no bias):");
60 print_data_matrix(&poly_no_bias, &["x1", "x2", "x1²", "x1*x2", "x2²"]);
61 println!();
62
63 // Demonstrate Statistical Feature Extraction
64 println!("=== Statistical Feature Extraction =============");
65 let stats_data = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
66
67 let stats_features = statistical_features(&stats_data).unwrap();
68 println!("Statistical features for data [1, 2, 3, 4, 5]:");
69 println!("(Each sample gets the same global statistics)");
70 print_statistical_features(stats_features.row(0).to_owned());
71 println!();
72
73 // Demonstrate Binning/Discretization
74 println!("=== Feature Binning/Discretization =============");
75 let binning_data =
76 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
77
78 println!("Original data for binning: [1, 2, 3, 4, 5, 6, 7, 8]");
79
80 let uniform_binned =
81 create_binned_features(&binning_data, 3, BinningStrategy::Uniform).unwrap();
82 println!(
83 "Uniform binning (3 bins): {:?}",
84 uniform_binned
85 .column(0)
86 .iter()
87 .map(|&x| x as usize)
88 .collect::<Vec<_>>()
89 );
90
91 let quantile_binned =
92 create_binned_features(&binning_data, 4, BinningStrategy::Quantile).unwrap();
93 println!(
94 "Quantile binning (4 bins): {:?}",
95 quantile_binned
96 .column(0)
97 .iter()
98 .map(|&x| x as usize)
99 .collect::<Vec<_>>()
100 );
101 println!();
102
103 // Demonstrate Feature Extraction Pipeline
104 println!("=== Complete Feature Extraction Pipeline =======");
105 let iris = load_iris().unwrap();
106 println!(
107 "Using Iris dataset ({} samples, {} features)",
108 iris.n_samples(),
109 iris.n_features()
110 );
111
112 // Step 1: Robust scaling (handles outliers better)
113 let mut scaled_iris = iris.data.clone();
114 robust_scale(&mut scaled_iris);
115 println!("Step 1: Applied robust scaling");
116
117 // Step 2: Generate polynomial features (degree 2)
118 let poly_iris = polynomial_features(&scaled_iris, 2, false).unwrap();
119 println!("Step 2: Generated polynomial features");
120 println!(" Original features: {}", scaled_iris.ncols());
121 println!(" Polynomial features: {}", poly_iris.ncols());
122
123 // Step 3: Create binned features for non-linearity
124 let binned_iris = create_binned_features(&scaled_iris, 5, BinningStrategy::Quantile).unwrap();
125 println!("Step 3: Created binned features");
126 println!(" Binned features: {}", binned_iris.ncols());
127
128 // Step 4: Extract statistical features
129 let stats_iris =
130 statistical_features(&iris.data.slice(ndarray::s![0..20, ..]).to_owned()).unwrap();
131 println!("Step 4: Extracted statistical features (from first 20 samples)");
132 println!(" Statistical features: {}", stats_iris.ncols());
133 println!();
134
135 // Comparison of scaling methods with outliers
136 println!("=== Scaling Methods Comparison (with outliers) =");
137 let outlier_data = Array2::from_shape_vec(
138 (5, 1),
139 vec![1.0, 2.0, 3.0, 4.0, 100.0], // 100.0 is a severe outlier
140 )
141 .unwrap();
142
143 println!("Original data with outlier: [1, 2, 3, 4, 100]");
144
145 let mut minmax_outlier = outlier_data.clone();
146 min_max_scale(&mut minmax_outlier, (0.0, 1.0));
147 println!(
148 "Min-Max scaled: {:?}",
149 minmax_outlier
150 .column(0)
151 .iter()
152 .map(|&x| format!("{:.3}", x))
153 .collect::<Vec<_>>()
154 );
155
156 let mut robust_outlier = outlier_data.clone();
157 robust_scale(&mut robust_outlier);
158 println!(
159 "Robust scaled: {:?}",
160 robust_outlier
161 .column(0)
162 .iter()
163 .map(|&x| format!("{:.3}", x))
164 .collect::<Vec<_>>()
165 );
166
167 println!("\nNotice how robust scaling is less affected by the outlier!");
168 println!();
169
170 // Feature engineering recommendations
171 println!("=== Feature Engineering Recommendations ========");
172 println!("1. **Scaling**: Use robust scaling when outliers are present");
173 println!("2. **Polynomial**: Use degree 2-3 for non-linear relationships");
174 println!("3. **Binning**: Use quantile binning for better distribution");
175 println!("4. **Statistical**: Extract global statistics for context");
176 println!("5. **Pipeline**: Always scale → transform → engineer → validate");
177 println!();
178
179 println!("=== Feature Extraction Demo Complete ===========");
180}