scirs2_transform/
scaling_simd.rs1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8use scirs2_core::simd_ops::SimdUnifiedOps;
9
10use crate::error::{Result, TransformError};
11use crate::scaling::EPSILON;
12
13pub struct SimdMaxAbsScaler<F: Float + NumCast + SimdUnifiedOps> {
15 max_abs_: Option<Array1<F>>,
17 scale_: Option<Array1<F>>,
19}
20
21impl<F: Float + NumCast + SimdUnifiedOps> SimdMaxAbsScaler<F> {
22 pub fn new() -> Self {
24 SimdMaxAbsScaler {
25 max_abs_: None,
26 scale_: None,
27 }
28 }
29
30 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
32 where
33 S: Data<Elem = F>,
34 {
35 let n_samples = x.shape()[0];
36 let n_features = x.shape()[1];
37
38 if n_samples == 0 || n_features == 0 {
39 return Err(TransformError::InvalidInput("Empty input data".to_string()));
40 }
41
42 let mut max_abs = Array1::zeros(n_features);
43
44 for j in 0..n_features {
46 let col = x.column(j);
47 let col_array = col.to_owned();
48 let abs_col = F::simd_abs(&col_array.view());
49 max_abs[j] = F::simd_max_element(&abs_col.view());
50 }
51
52 let scale = max_abs.mapv(|max_abs_val| {
54 if max_abs_val > F::from(EPSILON).expect("Failed to convert to float") {
55 F::one() / max_abs_val
56 } else {
57 F::one()
58 }
59 });
60
61 self.max_abs_ = Some(max_abs);
62 self.scale_ = Some(scale);
63
64 Ok(())
65 }
66
67 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
69 where
70 S: Data<Elem = F>,
71 {
72 let n_samples = x.shape()[0];
73 let n_features = x.shape()[1];
74
75 if self.scale_.is_none() {
76 return Err(TransformError::TransformationError(
77 "Scaler has not been fitted".to_string(),
78 ));
79 }
80
81 let scale = self.scale_.as_ref().expect("Operation failed");
82
83 if n_features != scale.len() {
84 return Err(TransformError::InvalidInput(format!(
85 "X has {} features, but scaler was fitted with {} features",
86 n_features,
87 scale.len()
88 )));
89 }
90
91 let mut result = Array2::zeros((n_samples, n_features));
92
93 for i in 0..n_samples {
95 let row = x.row(i);
96 let row_array = row.to_owned();
97 let scaled_row = F::simd_mul(&row_array.view(), &scale.view());
98
99 for j in 0..n_features {
100 result[[i, j]] = scaled_row[j];
101 }
102 }
103
104 Ok(result)
105 }
106
107 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
109 where
110 S: Data<Elem = F>,
111 {
112 self.fit(x)?;
113 self.transform(x)
114 }
115}
116
117pub struct SimdRobustScaler<F: Float + NumCast + SimdUnifiedOps> {
119 median_: Option<Array1<F>>,
121 iqr_: Option<Array1<F>>,
123 scale_: Option<Array1<F>>,
125}
126
127impl<F: Float + NumCast + SimdUnifiedOps> SimdRobustScaler<F> {
128 pub fn new() -> Self {
130 SimdRobustScaler {
131 median_: None,
132 iqr_: None,
133 scale_: None,
134 }
135 }
136
137 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
139 where
140 S: Data<Elem = F>,
141 {
142 let n_samples = x.shape()[0];
143 let n_features = x.shape()[1];
144
145 if n_samples == 0 || n_features == 0 {
146 return Err(TransformError::InvalidInput("Empty input data".to_string()));
147 }
148
149 let mut median = Array1::zeros(n_features);
150 let mut iqr = Array1::zeros(n_features);
151
152 for j in 0..n_features {
154 let col = x.column(j);
155 let mut col_data: Vec<F> = col.to_vec();
156 col_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
157
158 let n = col_data.len();
159
160 median[j] = if n % 2 == 0 {
162 (col_data[n / 2 - 1] + col_data[n / 2])
163 / F::from(2.0).expect("Failed to convert constant to float")
164 } else {
165 col_data[n / 2]
166 };
167
168 let q1_idx = n / 4;
170 let q3_idx = 3 * n / 4;
171 let q1 = col_data[q1_idx];
172 let q3 = col_data[q3_idx];
173 iqr[j] = q3 - q1;
174 }
175
176 let scale = iqr.mapv(|iqr_val| {
178 if iqr_val > F::from(EPSILON).expect("Failed to convert to float") {
179 F::one() / iqr_val
180 } else {
181 F::one()
182 }
183 });
184
185 self.median_ = Some(median);
186 self.iqr_ = Some(iqr);
187 self.scale_ = Some(scale);
188
189 Ok(())
190 }
191
192 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
194 where
195 S: Data<Elem = F>,
196 {
197 let n_samples = x.shape()[0];
198 let n_features = x.shape()[1];
199
200 if self.median_.is_none() || self.scale_.is_none() {
201 return Err(TransformError::TransformationError(
202 "Scaler has not been fitted".to_string(),
203 ));
204 }
205
206 let median = self.median_.as_ref().expect("Operation failed");
207 let scale = self.scale_.as_ref().expect("Operation failed");
208
209 if n_features != median.len() {
210 return Err(TransformError::InvalidInput(format!(
211 "X has {} features, but scaler was fitted with {} features",
212 n_features,
213 median.len()
214 )));
215 }
216
217 let mut result = Array2::zeros((n_samples, n_features));
218
219 for i in 0..n_samples {
221 let row = x.row(i);
222 let row_array = row.to_owned();
223
224 let centered = F::simd_sub(&row_array.view(), &median.view());
226
227 let scaled = F::simd_mul(¢ered.view(), &scale.view());
229
230 for j in 0..n_features {
231 result[[i, j]] = scaled[j];
232 }
233 }
234
235 Ok(result)
236 }
237
238 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
240 where
241 S: Data<Elem = F>,
242 {
243 self.fit(x)?;
244 self.transform(x)
245 }
246}
247
248pub struct SimdStandardScaler<F: Float + NumCast + SimdUnifiedOps> {
250 mean_: Option<Array1<F>>,
252 std_: Option<Array1<F>>,
254 with_mean: bool,
256 with_std: bool,
258}
259
260impl<F: Float + NumCast + SimdUnifiedOps> SimdStandardScaler<F> {
261 pub fn new(with_mean: bool, with_std: bool) -> Self {
263 SimdStandardScaler {
264 mean_: None,
265 std_: None,
266 with_mean,
267 with_std,
268 }
269 }
270
271 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
273 where
274 S: Data<Elem = F>,
275 {
276 let n_samples = x.shape()[0];
277 let n_features = x.shape()[1];
278
279 if n_samples == 0 || n_features == 0 {
280 return Err(TransformError::InvalidInput("Empty input data".to_string()));
281 }
282
283 let n_samples_f = F::from(n_samples).expect("Failed to convert to float");
284 let mut mean = Array1::zeros(n_features);
285 let mut std = Array1::ones(n_features);
286
287 if self.with_mean {
288 for j in 0..n_features {
290 let col = x.column(j);
291 let col_array = col.to_owned();
292 mean[j] = F::simd_sum(&col_array.view()) / n_samples_f;
293 }
294 }
295
296 if self.with_std {
297 for j in 0..n_features {
299 let col = x.column(j);
300 let col_array = col.to_owned();
301
302 let m = if self.with_mean { mean[j] } else { F::zero() };
304
305 let mean_array = Array1::from_elem(n_samples, m);
306 let centered = F::simd_sub(&col_array.view(), &mean_array.view());
307 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
308 let variance = F::simd_sum(&squared.view()) / n_samples_f;
309
310 std[j] = variance.sqrt();
311
312 if std[j] <= F::from(EPSILON).expect("Failed to convert to float") {
314 std[j] = F::one();
315 }
316 }
317 }
318
319 self.mean_ = Some(mean);
320 self.std_ = Some(std);
321
322 Ok(())
323 }
324
325 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
327 where
328 S: Data<Elem = F>,
329 {
330 let n_samples = x.shape()[0];
331 let n_features = x.shape()[1];
332
333 if self.mean_.is_none() || self.std_.is_none() {
334 return Err(TransformError::TransformationError(
335 "Scaler has not been fitted".to_string(),
336 ));
337 }
338
339 let mean = self.mean_.as_ref().expect("Operation failed");
340 let std = self.std_.as_ref().expect("Operation failed");
341
342 if n_features != mean.len() {
343 return Err(TransformError::InvalidInput(format!(
344 "X has {} features, but scaler was fitted with {} features",
345 n_features,
346 mean.len()
347 )));
348 }
349
350 let mut result = Array2::zeros((n_samples, n_features));
351
352 for i in 0..n_samples {
354 let row = x.row(i);
355 let mut row_array = row.to_owned();
356
357 if self.with_mean {
358 row_array = F::simd_sub(&row_array.view(), &mean.view());
360 }
361
362 if self.with_std {
363 row_array = F::simd_div(&row_array.view(), &std.view());
365 }
366
367 for j in 0..n_features {
368 result[[i, j]] = row_array[j];
369 }
370 }
371
372 Ok(result)
373 }
374
375 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
377 where
378 S: Data<Elem = F>,
379 {
380 self.fit(x)?;
381 self.transform(x)
382 }
383}