scirs2_core/simd/
preprocessing.rs1use ndarray::{Array1, ArrayView1};
7
8use super::arithmetic::{simd_scalar_mul_f32, simd_scalar_mul_f64};
10use super::norms::{simd_norm_l2_f32, simd_norm_l2_f64};
11use super::reductions::{simd_mean_f32, simd_mean_f64, simd_std_f32, simd_std_f64};
12
13#[allow(dead_code)]
34pub fn simd_normalize_f32(input: &ArrayView1<f32>) -> Array1<f32> {
35 if input.is_empty() {
36 return Array1::zeros(0);
37 }
38
39 let norm = simd_norm_l2_f32(input);
40 if norm == 0.0 || norm.is_nan() {
41 return Array1::zeros(input.len());
42 }
43
44 let inv_norm = 1.0 / norm;
45 simd_scalar_mul_f32(input, inv_norm)
46}
47
48#[allow(dead_code)]
61pub fn simd_normalize_f64(input: &ArrayView1<f64>) -> Array1<f64> {
62 if input.is_empty() {
63 return Array1::zeros(0);
64 }
65
66 let norm = simd_norm_l2_f64(input);
67 if norm == 0.0 || norm.is_nan() {
68 return Array1::zeros(input.len());
69 }
70
71 let inv_norm = 1.0 / norm;
72 simd_scalar_mul_f64(input, inv_norm)
73}
74
75#[allow(dead_code)]
95pub fn simd_standardize_f32(input: &ArrayView1<f32>) -> Array1<f32> {
96 if input.is_empty() {
97 return Array1::zeros(0);
98 }
99
100 let mean = simd_mean_f32(input);
101 let std = simd_std_f32(input);
102
103 if std == 0.0 || std.is_nan() {
104 return Array1::zeros(input.len());
105 }
106
107 let len = input.len();
108 let mut result = Array1::zeros(len);
109 let input_slice = input.as_slice().expect("Operation failed");
110 let result_slice: &mut [f32] = result.as_slice_mut().expect("Operation failed");
111
112 #[cfg(target_arch = "x86_64")]
113 {
114 if is_x86_feature_detected!("avx2") {
115 unsafe {
116 use std::arch::x86_64::*;
117
118 let mean_vec = _mm256_set1_ps(mean);
119 let inv_std = 1.0 / std;
120 let inv_std_vec = _mm256_set1_ps(inv_std);
121 let mut i = 0;
122
123 while i + 8 <= len {
124 let input_vec = _mm256_loadu_ps(input_slice.as_ptr().add(i));
125
126 let centered = _mm256_sub_ps(input_vec, mean_vec);
127 let scaled = _mm256_mul_ps(centered, inv_std_vec);
128
129 _mm256_storeu_ps(result_slice.as_mut_ptr().add(i), scaled);
130 i += 8;
131 }
132
133 for j in i..len {
135 result_slice[j] = (input_slice[j] - mean) * inv_std;
136 }
137
138 return result;
139 }
140 }
141 }
142
143 #[cfg(target_arch = "aarch64")]
144 {
145 if std::arch::is_aarch64_feature_detected!("neon") {
146 unsafe {
147 use std::arch::aarch64::*;
148
149 let mean_vec = vdupq_n_f32(mean);
150 let inv_std = 1.0 / std;
151 let inv_std_vec = vdupq_n_f32(inv_std);
152 let mut i = 0;
153
154 while i + 4 <= len {
155 let input_vec = vld1q_f32(input_slice.as_ptr().add(i));
156
157 let centered = vsubq_f32(input_vec, mean_vec);
158 let scaled = vmulq_f32(centered, inv_std_vec);
159
160 vst1q_f32(result_slice.as_mut_ptr().add(i), scaled);
161 i += 4;
162 }
163
164 for j in i..len {
166 result_slice[j] = (input_slice[j] - mean) * inv_std;
167 }
168
169 return result;
170 }
171 }
172 }
173
174 let inv_std = 1.0 / std;
176 for (i, val) in input.iter().enumerate() {
177 result_slice[i] = (val - mean) * inv_std;
178 }
179 result
180}
181
182#[allow(dead_code)]
195pub fn simd_standardize_f64(input: &ArrayView1<f64>) -> Array1<f64> {
196 if input.is_empty() {
197 return Array1::zeros(0);
198 }
199
200 let mean = simd_mean_f64(input);
201 let std = simd_std_f64(input);
202
203 if std == 0.0 || std.is_nan() {
204 return Array1::zeros(input.len());
205 }
206
207 let len = input.len();
208 let mut result = Array1::zeros(len);
209 let input_slice = input.as_slice().expect("Operation failed");
210 let result_slice: &mut [f64] = result.as_slice_mut().expect("Operation failed");
211
212 #[cfg(target_arch = "x86_64")]
213 {
214 if is_x86_feature_detected!("avx2") {
215 unsafe {
216 use std::arch::x86_64::*;
217
218 let mean_vec = _mm256_set1_pd(mean);
219 let inv_std = 1.0 / std;
220 let inv_std_vec = _mm256_set1_pd(inv_std);
221 let mut i = 0;
222
223 while i + 4 <= len {
224 let input_vec = _mm256_loadu_pd(input_slice.as_ptr().add(i));
225
226 let centered = _mm256_sub_pd(input_vec, mean_vec);
227 let scaled = _mm256_mul_pd(centered, inv_std_vec);
228
229 _mm256_storeu_pd(result_slice.as_mut_ptr().add(i), scaled);
230 i += 4;
231 }
232
233 for j in i..len {
235 result_slice[j] = (input_slice[j] - mean) * inv_std;
236 }
237
238 return result;
239 }
240 }
241 }
242
243 #[cfg(target_arch = "aarch64")]
244 {
245 if std::arch::is_aarch64_feature_detected!("neon") {
246 unsafe {
247 use std::arch::aarch64::*;
248
249 let mean_vec = vdupq_n_f64(mean);
250 let inv_std = 1.0 / std;
251 let inv_std_vec = vdupq_n_f64(inv_std);
252 let mut i = 0;
253
254 while i + 2 <= len {
255 let input_vec = vld1q_f64(input_slice.as_ptr().add(i));
256
257 let centered = vsubq_f64(input_vec, mean_vec);
258 let scaled = vmulq_f64(centered, inv_std_vec);
259
260 vst1q_f64(result_slice.as_mut_ptr().add(i), scaled);
261 i += 2;
262 }
263
264 for j in i..len {
266 result_slice[j] = (input_slice[j] - mean) * inv_std;
267 }
268
269 return result;
270 }
271 }
272 }
273
274 let inv_std = 1.0 / std;
276 for (i, val) in input.iter().enumerate() {
277 result_slice[i] = (val - mean) * inv_std;
278 }
279 result
280}