Function robust_scale

Source
pub fn robust_scale(data: &mut Array2<f64>)
Expand description

Performs robust scaling using median and interquartile range

Scales features using statistics that are robust to outliers. Each feature is scaled by: X_scaled = (X - median) / IQR, where IQR is the interquartile range. This scaling method is less sensitive to outliers compared to standard normalization.

§Arguments

  • data - Feature matrix to scale in-place (n_samples, n_features)

§Examples

use ndarray::Array2;
use scirs2_datasets::utils::robust_scale;

let mut data = Array2::from_shape_vec((5, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 100.0, 500.0]).unwrap();
robust_scale(&mut data);
// Features are now robustly scaled using median and IQR
Examples found in repository?
examples/feature_extraction_demo.rs (line 43)
12fn main() {
13    println!("=== Feature Extraction Utilities Demonstration ===\n");
14
15    // Create a sample dataset for demonstration
16    let data = Array2::from_shape_vec(
17        (6, 2),
18        vec![
19            1.0, 10.0, // Normal data
20            2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 5.0, 50.0, 100.0, 500.0, // Outlier
21        ],
22    )
23    .unwrap();
24
25    println!("Original dataset:");
26    print_data_summary(&data, "Original");
27    println!();
28
29    // Demonstrate Min-Max Scaling
30    println!("=== Min-Max Scaling ============================");
31    let mut data_minmax = data.clone();
32    min_max_scale(&mut data_minmax, (0.0, 1.0));
33    print_data_summary(&data_minmax, "Min-Max Scaled [0, 1]");
34
35    let mut data_custom_range = data.clone();
36    min_max_scale(&mut data_custom_range, (-1.0, 1.0));
37    print_data_summary(&data_custom_range, "Min-Max Scaled [-1, 1]");
38    println!();
39
40    // Demonstrate Robust Scaling
41    println!("=== Robust Scaling ==============================");
42    let mut data_robust = data.clone();
43    robust_scale(&mut data_robust);
44    print_data_summary(&data_robust, "Robust Scaled (Median/IQR)");
45    println!();
46
47    // Demonstrate Polynomial Features
48    println!("=== Polynomial Feature Generation ==============");
49    let small_data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0]).unwrap();
50
51    println!("Small dataset for polynomial demonstration:");
52    print_data_matrix(&small_data, &["x1", "x2"]);
53
54    let poly_with_bias = polynomial_features(&small_data, 2, true).unwrap();
55    println!("Polynomial features (degree=2, with bias):");
56    print_data_matrix(&poly_with_bias, &["1", "x1", "x2", "x1²", "x1*x2", "x2²"]);
57
58    let poly_no_bias = polynomial_features(&small_data, 2, false).unwrap();
59    println!("Polynomial features (degree=2, no bias):");
60    print_data_matrix(&poly_no_bias, &["x1", "x2", "x1²", "x1*x2", "x2²"]);
61    println!();
62
63    // Demonstrate Statistical Feature Extraction
64    println!("=== Statistical Feature Extraction =============");
65    let stats_data = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
66
67    let stats_features = statistical_features(&stats_data).unwrap();
68    println!("Statistical features for data [1, 2, 3, 4, 5]:");
69    println!("(Each sample gets the same global statistics)");
70    print_statistical_features(stats_features.row(0).to_owned());
71    println!();
72
73    // Demonstrate Binning/Discretization
74    println!("=== Feature Binning/Discretization =============");
75    let binning_data =
76        Array2::from_shape_vec((8, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]).unwrap();
77
78    println!("Original data for binning: [1, 2, 3, 4, 5, 6, 7, 8]");
79
80    let uniform_binned =
81        create_binned_features(&binning_data, 3, BinningStrategy::Uniform).unwrap();
82    println!(
83        "Uniform binning (3 bins): {:?}",
84        uniform_binned
85            .column(0)
86            .iter()
87            .map(|&x| x as usize)
88            .collect::<Vec<_>>()
89    );
90
91    let quantile_binned =
92        create_binned_features(&binning_data, 4, BinningStrategy::Quantile).unwrap();
93    println!(
94        "Quantile binning (4 bins): {:?}",
95        quantile_binned
96            .column(0)
97            .iter()
98            .map(|&x| x as usize)
99            .collect::<Vec<_>>()
100    );
101    println!();
102
103    // Demonstrate Feature Extraction Pipeline
104    println!("=== Complete Feature Extraction Pipeline =======");
105    let iris = load_iris().unwrap();
106    println!(
107        "Using Iris dataset ({} samples, {} features)",
108        iris.n_samples(),
109        iris.n_features()
110    );
111
112    // Step 1: Robust scaling (handles outliers better)
113    let mut scaled_iris = iris.data.clone();
114    robust_scale(&mut scaled_iris);
115    println!("Step 1: Applied robust scaling");
116
117    // Step 2: Generate polynomial features (degree 2)
118    let poly_iris = polynomial_features(&scaled_iris, 2, false).unwrap();
119    println!("Step 2: Generated polynomial features");
120    println!("  Original features: {}", scaled_iris.ncols());
121    println!("  Polynomial features: {}", poly_iris.ncols());
122
123    // Step 3: Create binned features for non-linearity
124    let binned_iris = create_binned_features(&scaled_iris, 5, BinningStrategy::Quantile).unwrap();
125    println!("Step 3: Created binned features");
126    println!("  Binned features: {}", binned_iris.ncols());
127
128    // Step 4: Extract statistical features
129    let stats_iris =
130        statistical_features(&iris.data.slice(ndarray::s![0..20, ..]).to_owned()).unwrap();
131    println!("Step 4: Extracted statistical features (from first 20 samples)");
132    println!("  Statistical features: {}", stats_iris.ncols());
133    println!();
134
135    // Comparison of scaling methods with outliers
136    println!("=== Scaling Methods Comparison (with outliers) =");
137    let outlier_data = Array2::from_shape_vec(
138        (5, 1),
139        vec![1.0, 2.0, 3.0, 4.0, 100.0], // 100.0 is a severe outlier
140    )
141    .unwrap();
142
143    println!("Original data with outlier: [1, 2, 3, 4, 100]");
144
145    let mut minmax_outlier = outlier_data.clone();
146    min_max_scale(&mut minmax_outlier, (0.0, 1.0));
147    println!(
148        "Min-Max scaled: {:?}",
149        minmax_outlier
150            .column(0)
151            .iter()
152            .map(|&x| format!("{:.3}", x))
153            .collect::<Vec<_>>()
154    );
155
156    let mut robust_outlier = outlier_data.clone();
157    robust_scale(&mut robust_outlier);
158    println!(
159        "Robust scaled: {:?}",
160        robust_outlier
161            .column(0)
162            .iter()
163            .map(|&x| format!("{:.3}", x))
164            .collect::<Vec<_>>()
165    );
166
167    println!("\nNotice how robust scaling is less affected by the outlier!");
168    println!();
169
170    // Feature engineering recommendations
171    println!("=== Feature Engineering Recommendations ========");
172    println!("1. **Scaling**: Use robust scaling when outliers are present");
173    println!("2. **Polynomial**: Use degree 2-3 for non-linear relationships");
174    println!("3. **Binning**: Use quantile binning for better distribution");
175    println!("4. **Statistical**: Extract global statistics for context");
176    println!("5. **Pipeline**: Always scale → transform → engineer → validate");
177    println!();
178
179    println!("=== Feature Extraction Demo Complete ===========");
180}