Skip to main content

scirs2_transform/
scaling_simd.rs

1//! SIMD-accelerated scaling operations
2//!
3//! This module provides SIMD-optimized implementations of scaling operations
4//! using the unified SIMD operations from scirs2-core.
5
6use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8use scirs2_core::simd_ops::SimdUnifiedOps;
9
10use crate::error::{Result, TransformError};
11use crate::scaling::EPSILON;
12
13/// SIMD-accelerated MaxAbsScaler
14pub struct SimdMaxAbsScaler<F: Float + NumCast + SimdUnifiedOps> {
15    /// Maximum absolute values for each feature
16    max_abs_: Option<Array1<F>>,
17    /// Scale factors for each feature
18    scale_: Option<Array1<F>>,
19}
20
21impl<F: Float + NumCast + SimdUnifiedOps> SimdMaxAbsScaler<F> {
22    /// Creates a new SIMD-accelerated MaxAbsScaler
23    pub fn new() -> Self {
24        SimdMaxAbsScaler {
25            max_abs_: None,
26            scale_: None,
27        }
28    }
29
30    /// Fits the scaler to the input data using SIMD operations
31    pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
32    where
33        S: Data<Elem = F>,
34    {
35        let n_samples = x.shape()[0];
36        let n_features = x.shape()[1];
37
38        if n_samples == 0 || n_features == 0 {
39            return Err(TransformError::InvalidInput("Empty input data".to_string()));
40        }
41
42        let mut max_abs = Array1::zeros(n_features);
43
44        // Compute maximum absolute value for each feature using SIMD
45        for j in 0..n_features {
46            let col = x.column(j);
47            let col_array = col.to_owned();
48            let abs_col = F::simd_abs(&col_array.view());
49            max_abs[j] = F::simd_max_element(&abs_col.view());
50        }
51
52        // Compute scale factors
53        let scale = max_abs.mapv(|max_abs_val| {
54            if max_abs_val > F::from(EPSILON).expect("Failed to convert to float") {
55                F::one() / max_abs_val
56            } else {
57                F::one()
58            }
59        });
60
61        self.max_abs_ = Some(max_abs);
62        self.scale_ = Some(scale);
63
64        Ok(())
65    }
66
67    /// Transforms the input data using SIMD operations
68    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
69    where
70        S: Data<Elem = F>,
71    {
72        let n_samples = x.shape()[0];
73        let n_features = x.shape()[1];
74
75        if self.scale_.is_none() {
76            return Err(TransformError::TransformationError(
77                "Scaler has not been fitted".to_string(),
78            ));
79        }
80
81        let scale = self.scale_.as_ref().expect("Operation failed");
82
83        if n_features != scale.len() {
84            return Err(TransformError::InvalidInput(format!(
85                "X has {} features, but scaler was fitted with {} features",
86                n_features,
87                scale.len()
88            )));
89        }
90
91        let mut result = Array2::zeros((n_samples, n_features));
92
93        // Transform each row using SIMD operations
94        for i in 0..n_samples {
95            let row = x.row(i);
96            let row_array = row.to_owned();
97            let scaled_row = F::simd_mul(&row_array.view(), &scale.view());
98
99            for j in 0..n_features {
100                result[[i, j]] = scaled_row[j];
101            }
102        }
103
104        Ok(result)
105    }
106
107    /// Fits and transforms the data in one step
108    pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
109    where
110        S: Data<Elem = F>,
111    {
112        self.fit(x)?;
113        self.transform(x)
114    }
115}
116
117/// SIMD-accelerated robust scaling using median and IQR
118pub struct SimdRobustScaler<F: Float + NumCast + SimdUnifiedOps> {
119    /// Median values for each feature
120    median_: Option<Array1<F>>,
121    /// IQR values for each feature
122    iqr_: Option<Array1<F>>,
123    /// Scale factors (1/IQR) for each feature
124    scale_: Option<Array1<F>>,
125}
126
127impl<F: Float + NumCast + SimdUnifiedOps> SimdRobustScaler<F> {
128    /// Creates a new SIMD-accelerated RobustScaler
129    pub fn new() -> Self {
130        SimdRobustScaler {
131            median_: None,
132            iqr_: None,
133            scale_: None,
134        }
135    }
136
137    /// Fits the scaler to the input data
138    pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
139    where
140        S: Data<Elem = F>,
141    {
142        let n_samples = x.shape()[0];
143        let n_features = x.shape()[1];
144
145        if n_samples == 0 || n_features == 0 {
146            return Err(TransformError::InvalidInput("Empty input data".to_string()));
147        }
148
149        let mut median = Array1::zeros(n_features);
150        let mut iqr = Array1::zeros(n_features);
151
152        // Compute median and IQR for each feature
153        for j in 0..n_features {
154            let col = x.column(j);
155            let mut col_data: Vec<F> = col.to_vec();
156            col_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
157
158            let n = col_data.len();
159
160            // Calculate median
161            median[j] = if n % 2 == 0 {
162                (col_data[n / 2 - 1] + col_data[n / 2])
163                    / F::from(2.0).expect("Failed to convert constant to float")
164            } else {
165                col_data[n / 2]
166            };
167
168            // Calculate IQR
169            let q1_idx = n / 4;
170            let q3_idx = 3 * n / 4;
171            let q1 = col_data[q1_idx];
172            let q3 = col_data[q3_idx];
173            iqr[j] = q3 - q1;
174        }
175
176        // Compute scale factors
177        let scale = iqr.mapv(|iqr_val| {
178            if iqr_val > F::from(EPSILON).expect("Failed to convert to float") {
179                F::one() / iqr_val
180            } else {
181                F::one()
182            }
183        });
184
185        self.median_ = Some(median);
186        self.iqr_ = Some(iqr);
187        self.scale_ = Some(scale);
188
189        Ok(())
190    }
191
192    /// Transforms the input data using SIMD operations
193    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
194    where
195        S: Data<Elem = F>,
196    {
197        let n_samples = x.shape()[0];
198        let n_features = x.shape()[1];
199
200        if self.median_.is_none() || self.scale_.is_none() {
201            return Err(TransformError::TransformationError(
202                "Scaler has not been fitted".to_string(),
203            ));
204        }
205
206        let median = self.median_.as_ref().expect("Operation failed");
207        let scale = self.scale_.as_ref().expect("Operation failed");
208
209        if n_features != median.len() {
210            return Err(TransformError::InvalidInput(format!(
211                "X has {} features, but scaler was fitted with {} features",
212                n_features,
213                median.len()
214            )));
215        }
216
217        let mut result = Array2::zeros((n_samples, n_features));
218
219        // Transform each row: (x - median) * scale
220        for i in 0..n_samples {
221            let row = x.row(i);
222            let row_array = row.to_owned();
223
224            // Subtract median
225            let centered = F::simd_sub(&row_array.view(), &median.view());
226
227            // Scale by IQR
228            let scaled = F::simd_mul(&centered.view(), &scale.view());
229
230            for j in 0..n_features {
231                result[[i, j]] = scaled[j];
232            }
233        }
234
235        Ok(result)
236    }
237
238    /// Fits and transforms the data in one step
239    pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
240    where
241        S: Data<Elem = F>,
242    {
243        self.fit(x)?;
244        self.transform(x)
245    }
246}
247
248/// SIMD-accelerated standard scaling (Z-score normalization)
249pub struct SimdStandardScaler<F: Float + NumCast + SimdUnifiedOps> {
250    /// Mean values for each feature
251    mean_: Option<Array1<F>>,
252    /// Standard deviation values for each feature
253    std_: Option<Array1<F>>,
254    /// Whether to center the data
255    with_mean: bool,
256    /// Whether to scale to unit variance
257    with_std: bool,
258}
259
260impl<F: Float + NumCast + SimdUnifiedOps> SimdStandardScaler<F> {
261    /// Creates a new SIMD-accelerated StandardScaler
262    pub fn new(with_mean: bool, with_std: bool) -> Self {
263        SimdStandardScaler {
264            mean_: None,
265            std_: None,
266            with_mean,
267            with_std,
268        }
269    }
270
271    /// Fits the scaler to the input data using SIMD operations
272    pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
273    where
274        S: Data<Elem = F>,
275    {
276        let n_samples = x.shape()[0];
277        let n_features = x.shape()[1];
278
279        if n_samples == 0 || n_features == 0 {
280            return Err(TransformError::InvalidInput("Empty input data".to_string()));
281        }
282
283        let n_samples_f = F::from(n_samples).expect("Failed to convert to float");
284        let mut mean = Array1::zeros(n_features);
285        let mut std = Array1::ones(n_features);
286
287        if self.with_mean {
288            // Compute mean for each feature using SIMD
289            for j in 0..n_features {
290                let col = x.column(j);
291                let col_array = col.to_owned();
292                mean[j] = F::simd_sum(&col_array.view()) / n_samples_f;
293            }
294        }
295
296        if self.with_std {
297            // Compute standard deviation for each feature using SIMD
298            for j in 0..n_features {
299                let col = x.column(j);
300                let col_array = col.to_owned();
301
302                // Compute variance
303                let m = if self.with_mean { mean[j] } else { F::zero() };
304
305                let mean_array = Array1::from_elem(n_samples, m);
306                let centered = F::simd_sub(&col_array.view(), &mean_array.view());
307                let squared = F::simd_mul(&centered.view(), &centered.view());
308                let variance = F::simd_sum(&squared.view()) / n_samples_f;
309
310                std[j] = variance.sqrt();
311
312                // Avoid division by zero
313                if std[j] <= F::from(EPSILON).expect("Failed to convert to float") {
314                    std[j] = F::one();
315                }
316            }
317        }
318
319        self.mean_ = Some(mean);
320        self.std_ = Some(std);
321
322        Ok(())
323    }
324
325    /// Transforms the input data using SIMD operations
326    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
327    where
328        S: Data<Elem = F>,
329    {
330        let n_samples = x.shape()[0];
331        let n_features = x.shape()[1];
332
333        if self.mean_.is_none() || self.std_.is_none() {
334            return Err(TransformError::TransformationError(
335                "Scaler has not been fitted".to_string(),
336            ));
337        }
338
339        let mean = self.mean_.as_ref().expect("Operation failed");
340        let std = self.std_.as_ref().expect("Operation failed");
341
342        if n_features != mean.len() {
343            return Err(TransformError::InvalidInput(format!(
344                "X has {} features, but scaler was fitted with {} features",
345                n_features,
346                mean.len()
347            )));
348        }
349
350        let mut result = Array2::zeros((n_samples, n_features));
351
352        // Transform each row: (x - mean) / std
353        for i in 0..n_samples {
354            let row = x.row(i);
355            let mut row_array = row.to_owned();
356
357            if self.with_mean {
358                // Center the data
359                row_array = F::simd_sub(&row_array.view(), &mean.view());
360            }
361
362            if self.with_std {
363                // Scale to unit variance
364                row_array = F::simd_div(&row_array.view(), &std.view());
365            }
366
367            for j in 0..n_features {
368                result[[i, j]] = row_array[j];
369            }
370        }
371
372        Ok(result)
373    }
374
375    /// Fits and transforms the data in one step
376    pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
377    where
378        S: Data<Elem = F>,
379    {
380        self.fit(x)?;
381        self.transform(x)
382    }
383}