1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_datasets::{
8 create_binned_features, load_iris, min_max_scale, polynomial_features, robust_scale,
9 statistical_features, BinningStrategy,
10};
11
12#[allow(dead_code)]
13fn main() {
14 println!("=== Feature Extraction Utilities Demonstration ===\n");
15
16 let data = Array2::from_shape_vec(
18 (6, 2),
19 vec![
20 1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 5.0, 50.0, 100.0, 500.0, ],
23 )
24 .unwrap();
25
26 println!("Original dataset:");
27 print_data_summary(&data, "Original");
28 println!();
29
30 println!("=== Min-Max Scaling ============================");
32 let mut data_minmax = data.clone();
33 min_max_scale(&mut data_minmax, (0.0, 1.0));
34 print_data_summary(&data_minmax, "Min-Max Scaled [0, 1]");
35
36 let mut data_custom_range = data.clone();
37 min_max_scale(&mut data_custom_range, (-1.0, 1.0));
38 print_data_summary(&data_custom_range, "Min-Max Scaled [-1, 1]");
39 println!();
40
41 println!("=== Robust Scaling ==============================");
43 let mut data_robust = data.clone();
44 robust_scale(&mut data_robust);
45 print_data_summary(&data_robust, "Robust Scaled (Median/IQR)");
46 println!();
47
48 println!("=== Polynomial Feature Generation ==============");
50 let smalldata = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0]).unwrap();
51
52 println!("Small dataset for polynomial demonstration:");
53 print_data_matrix(&smalldata, &["x1", "x2"]);
54
55 let poly_with_bias = polynomial_features(&smalldata, 2, true).unwrap();
56 println!("Polynomial features (degree=2, with bias):");
57 print_data_matrix(&poly_with_bias, &["1", "x1", "x2", "x1²", "x1*x2", "x2²"]);
58
59 let poly_no_bias = polynomial_features(&smalldata, 2, false).unwrap();
60 println!("Polynomial features (degree=2, no bias):");
61 print_data_matrix(&poly_no_bias, &["x1", "x2", "x1²", "x1*x2", "x2²"]);
62 println!();
63
64 println!("=== Statistical Feature Extraction =============");
66 let statsdata = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
67
68 let stats_features = statistical_features(&statsdata).unwrap();
69 println!("Statistical features for data [1, 2, 3, 4, 5]:");
70 println!("(Each sample gets the same global statistics)");
71 print_statistical_features(stats_features.row(0).to_owned());
72 println!();
73
74 println!("=== Feature Binning/Discretization =============");
76 let binningdata =
77 Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
78
79 println!("Original data for binning: [1, 2, 3, 4, 5, 6, 7, 8]");
80
81 let uniform_binned = create_binned_features(&binningdata, 3, BinningStrategy::Uniform).unwrap();
82 println!(
83 "Uniform binning (3 bins): {:?}",
84 uniform_binned
85 .column(0)
86 .iter()
87 .map(|&x| x as usize)
88 .collect::<Vec<_>>()
89 );
90
91 let quantile_binned =
92 create_binned_features(&binningdata, 4, BinningStrategy::Quantile).unwrap();
93 println!(
94 "Quantile binning (4 bins): {:?}",
95 quantile_binned
96 .column(0)
97 .iter()
98 .map(|&x| x as usize)
99 .collect::<Vec<_>>()
100 );
101 println!();
102
103 println!("=== Complete Feature Extraction Pipeline =======");
105 let iris = load_iris().unwrap();
106 println!(
107 "Using Iris dataset ({} samples, {} features)",
108 iris.n_samples(),
109 iris.n_features()
110 );
111
112 let mut scaled_iris = iris.data.clone();
114 robust_scale(&mut scaled_iris);
115 println!("Step 1: Applied robust scaling");
116
117 let poly_iris = polynomial_features(&scaled_iris, 2, false).unwrap();
119 println!("Step 2: Generated polynomial features");
120 println!(" Original features: {}", scaled_iris.ncols());
121 println!(" Polynomial features: {}", poly_iris.ncols());
122
123 let binned_iris = create_binned_features(&scaled_iris, 5, BinningStrategy::Quantile).unwrap();
125 println!("Step 3: Created binned features");
126 println!(" Binned features: {}", binned_iris.ncols());
127
128 let stats_iris = statistical_features(
130 &iris
131 .data
132 .slice(scirs2_core::ndarray::s![0..20, ..])
133 .to_owned(),
134 )
135 .unwrap();
136 println!("Step 4: Extracted statistical features (from first 20 samples)");
137 println!(" Statistical features: {}", stats_iris.ncols());
138 println!();
139
140 println!("=== Scaling Methods Comparison (with outliers) =");
142 let outlierdata = Array2::from_shape_vec(
143 (5, 1),
144 vec![1.0, 2.0, 3.0, 4.0, 100.0], )
146 .unwrap();
147
148 println!("Original data with outlier: [1, 2, 3, 4, 100]");
149
150 let mut minmax_outlier = outlierdata.clone();
151 min_max_scale(&mut minmax_outlier, (0.0, 1.0));
152 println!(
153 "Min-Max scaled: {:?}",
154 minmax_outlier
155 .column(0)
156 .iter()
157 .map(|&x| format!("{x:.3}"))
158 .collect::<Vec<_>>()
159 );
160
161 let mut robust_outlier = outlierdata.clone();
162 robust_scale(&mut robust_outlier);
163 println!(
164 "Robust scaled: {:?}",
165 robust_outlier
166 .column(0)
167 .iter()
168 .map(|&x| format!("{x:.3}"))
169 .collect::<Vec<_>>()
170 );
171
172 println!("\nNotice how robust scaling is less affected by the outlier!");
173 println!();
174
175 println!("=== Feature Engineering Recommendations ========");
177 println!("1. **Scaling**: Use robust scaling when outliers are present");
178 println!("2. **Polynomial**: Use degree 2-3 for non-linear relationships");
179 println!("3. **Binning**: Use quantile binning for better distribution");
180 println!("4. **Statistical**: Extract global statistics for context");
181 println!("5. **Pipeline**: Always scale → transform → engineer → validate");
182 println!();
183
184 println!("=== Feature Extraction Demo Complete ===========");
185}
186
187#[allow(dead_code)]
189fn print_data_summary(data: &Array2<f64>, title: &str) {
190 println!("{}: shape=({}, {})", title, data.nrows(), data.ncols());
191 for j in 0..data.ncols() {
192 let col = data.column(j);
193 let min_val = col.iter().fold(f64::INFINITY, |a, &b| a.min(b));
194 let max_val = col.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
195 let mean = col.iter().sum::<f64>() / col.len() as f64;
196 println!(" Feature {j}: min={min_val:.3}, max={max_val:.3}, mean={mean:.3}");
197 }
198}
199
200#[allow(dead_code)]
202fn print_data_matrix(data: &Array2<f64>, featurenames: &[&str]) {
203 print!(" ");
205 for name in featurenames {
206 print!("{name:>8}");
207 }
208 println!();
209
210 for i in 0..data.nrows() {
212 print!(" {i}: ");
213 for j in 0..data.ncols() {
214 print!("{:8.3}", data[[i, j]]);
215 }
216 println!();
217 }
218}
219
220#[allow(dead_code)]
222fn print_statistical_features(stats: Array1<f64>) {
223 let labels = [
224 "mean", "std", "min", "max", "median", "q25", "q75", "skewness", "kurtosis",
225 ];
226 println!(" Statistical measures:");
227 for (i, &value) in stats.iter().enumerate() {
228 if i < labels.len() {
229 println!(" {}: {:.3}", labels[i], value);
230 }
231 }
232}