1use ::ndarray::{Array1, Array2, ArrayView1, ArrayView2, ArrayViewMut1};
6
7use super::functions::SimdUnifiedOps;
8use super::functions_2::{
9 digamma_f32, digamma_f64, erf_f32, erf_f64, erfc_f32, erfc_f64, erfinv_f32, lanczos_gamma_f32,
10 lanczos_gamma_f64, ln_gamma_f32, ln_gamma_f64, trigamma_f32, trigamma_f64,
11};
12use super::functions_3::{
13 elu_f32, elu_f64, erfcinv_f32, erfcinv_f64, erfinv_f64, gelu_f32, gelu_f64, hardsigmoid_f32,
14 hardsigmoid_f64, hardswish_f32, hardswish_f64, mish_f32, mish_f64, selu_f32, selu_f64,
15 sigmoid_f32, sigmoid_f64, sinc_f32, sinc_f64, softplus_f32, softplus_f64, swish_f32, swish_f64,
16};
17#[cfg(feature = "simd")]
18use crate::simd_ops_polynomial;
19
20impl SimdUnifiedOps for f64 {
21 #[cfg(feature = "simd")]
22 fn simd_add(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
23 crate::simd::simd_add_f64(a, b)
24 }
25 #[cfg(not(feature = "simd"))]
26 fn simd_add(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
27 (a + b).to_owned()
28 }
29 #[cfg(feature = "simd")]
30 fn simd_sub(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
31 crate::simd::simd_sub_f64(a, b)
32 }
33 #[cfg(not(feature = "simd"))]
34 fn simd_sub(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
35 (a - b).to_owned()
36 }
37 #[cfg(feature = "simd")]
38 fn simd_mul(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
39 crate::simd::simd_mul_f64(a, b)
40 }
41 #[cfg(not(feature = "simd"))]
42 fn simd_mul(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
43 (a * b).to_owned()
44 }
45 #[cfg(feature = "simd")]
46 fn simd_div(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
47 crate::simd::simd_div_f64(a, b)
48 }
49 #[cfg(not(feature = "simd"))]
50 fn simd_div(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
51 (a / b).to_owned()
52 }
53 #[cfg(feature = "simd")]
54 fn simd_dot(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
55 crate::simd::simd_dot_f64(a, b)
56 }
57 #[cfg(not(feature = "simd"))]
58 fn simd_dot(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
59 a.dot(b)
60 }
61 fn simd_gemv(a: &ArrayView2<Self>, x: &ArrayView1<Self>, beta: Self, y: &mut Array1<Self>) {
62 let m = a.nrows();
63 let n = a.ncols();
64 assert_eq!(n, x.len());
65 assert_eq!(m, y.len());
66 if beta == 0.0 {
67 y.fill(0.0);
68 } else if beta != 1.0 {
69 y.mapv_inplace(|v| v * beta);
70 }
71 for i in 0..m {
72 let row = a.row(i);
73 y[i] += Self::simd_dot(&row, x);
74 }
75 }
76 fn simd_gemm(
77 alpha: Self,
78 a: &ArrayView2<Self>,
79 b: &ArrayView2<Self>,
80 beta: Self,
81 c: &mut Array2<Self>,
82 ) {
83 let m = a.nrows();
84 let k = a.ncols();
85 let n = b.ncols();
86 assert_eq!(k, b.nrows());
87 assert_eq!((m, n), c.dim());
88 if beta == 0.0 {
89 c.fill(0.0);
90 } else if beta != 1.0 {
91 c.mapv_inplace(|v| v * beta);
92 }
93 const GEMM_TRANSPOSE_THRESHOLD: usize = 4096;
94 if n * k > GEMM_TRANSPOSE_THRESHOLD {
95 let b_t = Self::simd_transpose_blocked(b);
96 for i in 0..m {
97 let a_row = a.row(i);
98 for j in 0..n {
99 let b_row = b_t.row(j);
100 c[[i, j]] += alpha * Self::simd_dot(&a_row, &b_row);
101 }
102 }
103 } else {
104 for i in 0..m {
105 let a_row = a.row(i);
106 for j in 0..n {
107 let b_col = b.column(j);
108 c[[i, j]] += alpha * Self::simd_dot(&a_row, &b_col);
109 }
110 }
111 }
112 }
113 #[cfg(feature = "simd")]
114 fn simd_norm(a: &ArrayView1<Self>) -> Self {
115 crate::simd::norms::simd_norm_l2_f64(a)
116 }
117 #[cfg(not(feature = "simd"))]
118 fn simd_norm(a: &ArrayView1<Self>) -> Self {
119 a.iter().map(|&x| x * x).sum::<f64>().sqrt()
120 }
121 #[cfg(feature = "simd")]
122 fn simd_max(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
123 crate::simd::simd_maximum_f64(a, b)
124 }
125 #[cfg(not(feature = "simd"))]
126 fn simd_max(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
127 let mut result = Array1::zeros(a.len());
128 for _i in 0..a.len() {
129 result[0] = a[0].max(b[0]);
130 }
131 result
132 }
133 #[cfg(feature = "simd")]
134 fn simd_min(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
135 crate::simd::simd_minimum_f64(a, b)
136 }
137 #[cfg(not(feature = "simd"))]
138 fn simd_min(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
139 let mut result = Array1::zeros(a.len());
140 for _i in 0..a.len() {
141 result[0] = a[0].min(b[0]);
142 }
143 result
144 }
145 #[cfg(feature = "simd")]
146 fn simd_scalar_mul(a: &ArrayView1<Self>, scalar: Self) -> Array1<Self> {
147 crate::simd::simd_scalar_mul_f64(a, scalar)
148 }
149 #[cfg(not(feature = "simd"))]
150 fn simd_scalar_mul(a: &ArrayView1<Self>, scalar: Self) -> Array1<Self> {
151 a.mapv(|x| x * scalar)
152 }
153 #[cfg(feature = "simd")]
154 fn simd_sum(a: &ArrayView1<Self>) -> Self {
155 crate::simd::simd_sum_f64(a)
156 }
157 #[cfg(not(feature = "simd"))]
158 fn simd_sum(a: &ArrayView1<Self>) -> Self {
159 a.sum()
160 }
161 fn simd_mean(a: &ArrayView1<Self>) -> Self {
162 if a.is_empty() {
163 0.0
164 } else {
165 Self::simd_sum(a) / (a.len() as f64)
166 }
167 }
168 #[cfg(feature = "simd")]
169 fn simd_max_element(a: &ArrayView1<Self>) -> Self {
170 crate::simd::simd_max_f64(a)
171 }
172 #[cfg(not(feature = "simd"))]
173 fn simd_max_element(a: &ArrayView1<Self>) -> Self {
174 a.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
175 }
176 #[cfg(feature = "simd")]
177 fn simd_min_element(a: &ArrayView1<Self>) -> Self {
178 crate::simd::simd_min_f64(a)
179 }
180 #[cfg(not(feature = "simd"))]
181 fn simd_min_element(a: &ArrayView1<Self>) -> Self {
182 a.fold(f64::INFINITY, |acc, &x| acc.min(x))
183 }
184 #[cfg(feature = "simd")]
185 fn simd_fma(a: &ArrayView1<Self>, b: &ArrayView1<Self>, c: &ArrayView1<Self>) -> Array1<Self> {
186 crate::simd::simd_fused_multiply_add_f64(a, b, c)
187 }
188 #[cfg(not(feature = "simd"))]
189 fn simd_fma(a: &ArrayView1<Self>, b: &ArrayView1<Self>, c: &ArrayView1<Self>) -> Array1<Self> {
190 let mut result = Array1::zeros(a.len());
191 for _i in 0..a.len() {
192 result[0] = a[0] * b[0] + c[0];
193 }
194 result
195 }
196 #[cfg(feature = "simd")]
197 fn simd_add_cache_optimized(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
198 crate::simd::simd_add_cache_optimized_f64(a, b)
199 }
200 #[cfg(not(feature = "simd"))]
201 fn simd_add_cache_optimized(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
202 a + b
203 }
204 #[cfg(feature = "simd")]
205 fn simd_fma_advanced_optimized(
206 a: &ArrayView1<Self>,
207 b: &ArrayView1<Self>,
208 c: &ArrayView1<Self>,
209 ) -> Array1<Self> {
210 crate::simd::simd_fma_advanced_optimized_f64(a, b, c)
211 }
212 #[cfg(not(feature = "simd"))]
213 fn simd_fma_advanced_optimized(
214 a: &ArrayView1<Self>,
215 b: &ArrayView1<Self>,
216 c: &ArrayView1<Self>,
217 ) -> Array1<Self> {
218 let mut result = Array1::zeros(a.len());
219 for _i in 0..a.len() {
220 result[0] = a[0] * b[0] + c[0];
221 }
222 result
223 }
224 #[cfg(feature = "simd")]
225 fn simd_add_adaptive(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
226 crate::simd::simd_adaptive_add_f64(a, b)
227 }
228 #[cfg(not(feature = "simd"))]
229 fn simd_add_adaptive(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
230 a + b
231 }
232 fn simd_transpose(a: &ArrayView2<Self>) -> Array2<Self> {
233 a.t().to_owned()
234 }
235 fn simd_transpose_blocked(a: &ArrayView2<Self>) -> Array2<Self> {
236 #[cfg(feature = "simd")]
237 {
238 crate::simd::simd_transpose_blocked_f64(a)
239 }
240 #[cfg(not(feature = "simd"))]
241 {
242 a.t().to_owned()
243 }
244 }
245 fn simd_sum_squares(a: &ArrayView1<Self>) -> Self {
246 a.iter().map(|&x| x * x).sum()
247 }
248 fn simd_multiply(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
249 Self::simd_mul(a, b)
250 }
251 #[cfg(feature = "simd")]
252 fn simd_available() -> bool {
253 true
254 }
255 #[cfg(not(feature = "simd"))]
256 fn simd_available() -> bool {
257 false
258 }
259 fn simd_sub_f32_ultra(
260 a: &ArrayView1<Self>,
261 b: &ArrayView1<Self>,
262 result: &mut ArrayViewMut1<Self>,
263 ) {
264 let sub_result = Self::simd_sub(a, b);
265 result.assign(&sub_result);
266 }
267 fn simd_mul_f32_ultra(
268 a: &ArrayView1<Self>,
269 b: &ArrayView1<Self>,
270 result: &mut ArrayViewMut1<Self>,
271 ) {
272 let mul_result = Self::simd_mul(a, b);
273 result.assign(&mul_result);
274 }
275 fn simd_sum_cubes(a: &ArrayView1<Self>) -> Self {
276 a.iter().map(|&x| x * x * x).sum()
277 }
278 fn simd_div_f32_ultra(
279 a: &ArrayView1<Self>,
280 b: &ArrayView1<Self>,
281 result: &mut ArrayViewMut1<Self>,
282 ) {
283 let div_result = Self::simd_div(a, b);
284 result.assign(&div_result);
285 }
286 fn simd_sin_f32_ultra(a: &ArrayView1<Self>, result: &mut ArrayViewMut1<Self>) {
287 let sin_result = a.mapv(|x| x.sin());
288 result.assign(&sin_result);
289 }
290 fn simd_add_f32_ultra(
291 a: &ArrayView1<Self>,
292 b: &ArrayView1<Self>,
293 result: &mut ArrayViewMut1<Self>,
294 ) {
295 let add_result = Self::simd_add(a, b);
296 result.assign(&add_result);
297 }
298 fn simd_fma_f32_ultra(
299 a: &ArrayView1<Self>,
300 b: &ArrayView1<Self>,
301 c: &ArrayView1<Self>,
302 result: &mut ArrayViewMut1<Self>,
303 ) {
304 let fma_result = Self::simd_fma(a, b, c);
305 result.assign(&fma_result);
306 }
307 fn simd_pow_f32_ultra(
308 a: &ArrayView1<Self>,
309 b: &ArrayView1<Self>,
310 result: &mut ArrayViewMut1<Self>,
311 ) {
312 let pow_result = a
313 .iter()
314 .zip(b.iter())
315 .map(|(&x, &y)| x.powf(y))
316 .collect::<Vec<_>>();
317 result.assign(&Array1::from_vec(pow_result));
318 }
319 fn simd_exp_f32_ultra(a: &ArrayView1<Self>, result: &mut ArrayViewMut1<Self>) {
320 let exp_result = a.mapv(|x| x.exp());
321 result.assign(&exp_result);
322 }
323 fn simd_cos_f32_ultra(a: &ArrayView1<Self>, result: &mut ArrayViewMut1<Self>) {
324 let cos_result = a.mapv(|x| x.cos());
325 result.assign(&cos_result);
326 }
327 fn simd_dot_f32_ultra(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
328 Self::simd_dot(a, b)
329 }
330 #[cfg(feature = "simd")]
331 fn simd_variance(a: &ArrayView1<Self>) -> Self {
332 crate::simd::simd_variance_f64(a)
333 }
334 #[cfg(not(feature = "simd"))]
335 fn simd_variance(a: &ArrayView1<Self>) -> Self {
336 let mean = Self::simd_mean(a);
337 let n = a.len() as f64;
338 if n < 2.0 {
339 return f64::NAN;
340 }
341 a.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0)
342 }
343 #[cfg(feature = "simd")]
344 fn simd_std(a: &ArrayView1<Self>) -> Self {
345 crate::simd::simd_std_f64(a)
346 }
347 #[cfg(not(feature = "simd"))]
348 fn simd_std(a: &ArrayView1<Self>) -> Self {
349 Self::simd_variance(a).sqrt()
350 }
351 #[cfg(feature = "simd")]
352 fn simd_norm_l1(a: &ArrayView1<Self>) -> Self {
353 crate::simd::simd_norm_l1_f64(a)
354 }
355 #[cfg(not(feature = "simd"))]
356 fn simd_norm_l1(a: &ArrayView1<Self>) -> Self {
357 a.iter().map(|&x| x.abs()).sum()
358 }
359 #[cfg(feature = "simd")]
360 fn simd_norm_linf(a: &ArrayView1<Self>) -> Self {
361 crate::simd::simd_norm_linf_f64(a)
362 }
363 #[cfg(not(feature = "simd"))]
364 fn simd_norm_linf(a: &ArrayView1<Self>) -> Self {
365 a.iter().fold(0.0f64, |acc, &x| acc.max(x.abs()))
366 }
367 #[cfg(feature = "simd")]
368 fn simd_cosine_similarity(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
369 crate::simd::simd_cosine_similarity_f64(a, b)
370 }
371 #[cfg(not(feature = "simd"))]
372 fn simd_cosine_similarity(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
373 let dot = Self::simd_dot(a, b);
374 let norm_a = Self::simd_norm(a);
375 let norm_b = Self::simd_norm(b);
376 dot / (norm_a * norm_b)
377 }
378 #[cfg(feature = "simd")]
379 fn simd_distance_euclidean(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
380 crate::simd::simd_distance_euclidean_f64(a, b)
381 }
382 #[cfg(not(feature = "simd"))]
383 fn simd_distance_euclidean(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
384 a.iter()
385 .zip(b.iter())
386 .map(|(&x, &y)| (x - y).powi(2))
387 .sum::<f64>()
388 .sqrt()
389 }
390 #[cfg(feature = "simd")]
391 fn simd_distance_manhattan(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
392 crate::simd::simd_distance_manhattan_f64(a, b)
393 }
394 #[cfg(not(feature = "simd"))]
395 fn simd_distance_manhattan(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
396 a.iter().zip(b.iter()).map(|(&x, &y)| (x - y).abs()).sum()
397 }
398 #[cfg(feature = "simd")]
399 fn simd_distance_chebyshev(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
400 crate::simd::simd_distance_chebyshev_f64(a, b)
401 }
402 #[cfg(not(feature = "simd"))]
403 fn simd_distance_chebyshev(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
404 a.iter()
405 .zip(b.iter())
406 .fold(0.0f64, |acc, (&x, &y)| acc.max((x - y).abs()))
407 }
408 #[cfg(feature = "simd")]
409 fn simd_distance_cosine(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
410 crate::simd::simd_distance_cosine_f64(a, b)
411 }
412 #[cfg(not(feature = "simd"))]
413 fn simd_distance_cosine(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Self {
414 1.0 - Self::simd_cosine_similarity(a, b)
415 }
416 #[cfg(feature = "simd")]
417 fn simd_weighted_sum(values: &ArrayView1<Self>, weights: &ArrayView1<Self>) -> Self {
418 crate::simd::simd_weighted_sum_f64(values, weights)
419 }
420 #[cfg(not(feature = "simd"))]
421 fn simd_weighted_sum(values: &ArrayView1<Self>, weights: &ArrayView1<Self>) -> Self {
422 values
423 .iter()
424 .zip(weights.iter())
425 .map(|(&v, &w)| v * w)
426 .sum()
427 }
428 #[cfg(feature = "simd")]
429 fn simd_weighted_mean(values: &ArrayView1<Self>, weights: &ArrayView1<Self>) -> Self {
430 crate::simd::simd_weighted_mean_f64(values, weights)
431 }
432 #[cfg(not(feature = "simd"))]
433 fn simd_weighted_mean(values: &ArrayView1<Self>, weights: &ArrayView1<Self>) -> Self {
434 let weighted_sum = Self::simd_weighted_sum(values, weights);
435 let weight_sum: f64 = weights.iter().sum();
436 weighted_sum / weight_sum
437 }
438 #[cfg(feature = "simd")]
439 fn simd_argmin(a: &ArrayView1<Self>) -> Option<usize> {
440 crate::simd::simd_argmin_f64(a)
441 }
442 #[cfg(not(feature = "simd"))]
443 fn simd_argmin(a: &ArrayView1<Self>) -> Option<usize> {
444 if a.is_empty() {
445 return None;
446 }
447 let mut min_idx = 0;
448 let mut min_val = a[0];
449 for (i, &v) in a.iter().enumerate().skip(1) {
450 if v < min_val {
451 min_val = v;
452 min_idx = i;
453 }
454 }
455 Some(min_idx)
456 }
457 #[cfg(feature = "simd")]
458 fn simd_argmax(a: &ArrayView1<Self>) -> Option<usize> {
459 crate::simd::simd_argmax_f64(a)
460 }
461 #[cfg(not(feature = "simd"))]
462 fn simd_argmax(a: &ArrayView1<Self>) -> Option<usize> {
463 if a.is_empty() {
464 return None;
465 }
466 let mut max_idx = 0;
467 let mut max_val = a[0];
468 for (i, &v) in a.iter().enumerate().skip(1) {
469 if v > max_val {
470 max_val = v;
471 max_idx = i;
472 }
473 }
474 Some(max_idx)
475 }
476 #[cfg(feature = "simd")]
477 fn simd_clip(a: &ArrayView1<Self>, min_val: Self, max_val: Self) -> Array1<Self> {
478 crate::simd::simd_clip_f64(a, min_val, max_val)
479 }
480 #[cfg(not(feature = "simd"))]
481 fn simd_clip(a: &ArrayView1<Self>, min_val: Self, max_val: Self) -> Array1<Self> {
482 a.mapv(|v| v.max(min_val).min(max_val))
483 }
484 #[cfg(feature = "simd")]
485 fn simd_log_sum_exp(a: &ArrayView1<Self>) -> Self {
486 crate::simd::simd_log_sum_exp_f64(a)
487 }
488 #[cfg(not(feature = "simd"))]
489 fn simd_log_sum_exp(a: &ArrayView1<Self>) -> Self {
490 if a.is_empty() {
491 return f64::NEG_INFINITY;
492 }
493 let max_val = a.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
494 let sum_exp: f64 = a.iter().map(|&x| (x - max_val).exp()).sum();
495 max_val + sum_exp.ln()
496 }
497 #[cfg(feature = "simd")]
498 fn simd_softmax(a: &ArrayView1<Self>) -> Array1<Self> {
499 crate::simd::simd_softmax_f64(a)
500 }
501 #[cfg(not(feature = "simd"))]
502 fn simd_softmax(a: &ArrayView1<Self>) -> Array1<Self> {
503 if a.is_empty() {
504 return Array1::zeros(0);
505 }
506 let lse = Self::simd_log_sum_exp(a);
507 a.mapv(|x| (x - lse).exp())
508 }
509 #[cfg(feature = "simd")]
510 fn simd_cumsum(a: &ArrayView1<Self>) -> Array1<Self> {
511 crate::simd::simd_cumsum_f64(a)
512 }
513 #[cfg(not(feature = "simd"))]
514 fn simd_cumsum(a: &ArrayView1<Self>) -> Array1<Self> {
515 if a.is_empty() {
516 return Array1::zeros(0);
517 }
518 let mut cumsum = 0.0f64;
519 a.mapv(|x| {
520 cumsum += x;
521 cumsum
522 })
523 }
524 #[cfg(feature = "simd")]
525 fn simd_cumprod(a: &ArrayView1<Self>) -> Array1<Self> {
526 crate::simd::simd_cumprod_f64(a)
527 }
528 #[cfg(not(feature = "simd"))]
529 fn simd_cumprod(a: &ArrayView1<Self>) -> Array1<Self> {
530 if a.is_empty() {
531 return Array1::zeros(0);
532 }
533 let mut cumprod = 1.0f64;
534 a.mapv(|x| {
535 cumprod *= x;
536 cumprod
537 })
538 }
539 #[cfg(feature = "simd")]
540 fn simd_diff(a: &ArrayView1<Self>) -> Array1<Self> {
541 crate::simd::simd_diff_f64(a)
542 }
543 #[cfg(not(feature = "simd"))]
544 fn simd_diff(a: &ArrayView1<Self>) -> Array1<Self> {
545 if a.len() <= 1 {
546 return Array1::zeros(0);
547 }
548 Array1::from_iter((1..a.len()).map(|i| a[i] - a[i - 1]))
549 }
550 #[cfg(feature = "simd")]
551 fn simd_sign(a: &ArrayView1<Self>) -> Array1<Self> {
552 crate::simd::simd_sign_f64(a)
553 }
554 #[cfg(not(feature = "simd"))]
555 fn simd_sign(a: &ArrayView1<Self>) -> Array1<Self> {
556 a.mapv(|x| {
557 if x > 0.0 {
558 1.0
559 } else if x < 0.0 {
560 -1.0
561 } else {
562 0.0
563 }
564 })
565 }
566 #[cfg(feature = "simd")]
567 fn simd_relu(a: &ArrayView1<Self>) -> Array1<Self> {
568 crate::simd::simd_relu_f64(a)
569 }
570 #[cfg(not(feature = "simd"))]
571 fn simd_relu(a: &ArrayView1<Self>) -> Array1<Self> {
572 a.mapv(|x| x.max(0.0))
573 }
574 #[cfg(feature = "simd")]
575 fn simd_leaky_relu(a: &ArrayView1<Self>, alpha: Self) -> Array1<Self> {
576 crate::simd::simd_leaky_relu_f64(a, alpha)
577 }
578 #[cfg(not(feature = "simd"))]
579 fn simd_leaky_relu(a: &ArrayView1<Self>, alpha: Self) -> Array1<Self> {
580 a.mapv(|x| if x > 0.0 { x } else { alpha * x })
581 }
582 #[cfg(feature = "simd")]
583 fn simd_normalize(a: &ArrayView1<Self>) -> Array1<Self> {
584 crate::simd::simd_normalize_f64(a)
585 }
586 #[cfg(not(feature = "simd"))]
587 fn simd_normalize(a: &ArrayView1<Self>) -> Array1<Self> {
588 let norm: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
589 if norm == 0.0 {
590 return a.to_owned();
591 }
592 a.mapv(|x| x / norm)
593 }
594 #[cfg(feature = "simd")]
595 fn simd_standardize(a: &ArrayView1<Self>) -> Array1<Self> {
596 crate::simd::simd_standardize_f64(a)
597 }
598 #[cfg(not(feature = "simd"))]
599 fn simd_standardize(a: &ArrayView1<Self>) -> Array1<Self> {
600 if a.len() <= 1 {
601 return Array1::zeros(a.len());
602 }
603 let mean: f64 = a.iter().sum::<f64>() / a.len() as f64;
604 let variance: f64 =
605 a.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / (a.len() - 1) as f64;
606 let std = variance.sqrt();
607 if std == 0.0 {
608 return Array1::zeros(a.len());
609 }
610 a.mapv(|x| (x - mean) / std)
611 }
612 fn simd_abs(a: &ArrayView1<Self>) -> Array1<Self> {
613 a.mapv(|x| x.abs())
614 }
615 fn simd_sqrt(a: &ArrayView1<Self>) -> Array1<Self> {
616 a.mapv(|x| x.sqrt())
617 }
618 fn simd_exp(a: &ArrayView1<Self>) -> Array1<Self> {
619 a.mapv(|x| x.exp())
620 }
621 fn simd_ln(a: &ArrayView1<Self>) -> Array1<Self> {
622 a.mapv(|x| x.ln())
623 }
624 fn simd_sin(a: &ArrayView1<Self>) -> Array1<Self> {
625 a.mapv(|x| x.sin())
626 }
627 fn simd_cos(a: &ArrayView1<Self>) -> Array1<Self> {
628 a.mapv(|x| x.cos())
629 }
630 fn simd_tan(a: &ArrayView1<Self>) -> Array1<Self> {
631 a.mapv(|x| x.tan())
632 }
633 fn simd_sinh(a: &ArrayView1<Self>) -> Array1<Self> {
634 #[cfg(feature = "simd")]
635 {
636 simd_ops_polynomial::simd_sinh_f64_poly(a)
637 }
638 #[cfg(not(feature = "simd"))]
639 {
640 a.mapv(|x| x.sinh())
641 }
642 }
643 fn simd_cosh(a: &ArrayView1<Self>) -> Array1<Self> {
644 #[cfg(feature = "simd")]
645 {
646 simd_ops_polynomial::simd_cosh_f64_poly(a)
647 }
648 #[cfg(not(feature = "simd"))]
649 {
650 a.mapv(|x| x.cosh())
651 }
652 }
653 fn simd_tanh(a: &ArrayView1<Self>) -> Array1<Self> {
654 #[cfg(feature = "simd")]
655 {
656 simd_ops_polynomial::simd_tanh_f64_poly(a)
657 }
658 #[cfg(not(feature = "simd"))]
659 {
660 a.mapv(|x| x.tanh())
661 }
662 }
663 fn simd_floor(a: &ArrayView1<Self>) -> Array1<Self> {
664 #[cfg(feature = "simd")]
665 {
666 crate::simd::simd_floor_f64(a)
667 }
668 #[cfg(not(feature = "simd"))]
669 {
670 a.mapv(|x| x.floor())
671 }
672 }
673 fn simd_ceil(a: &ArrayView1<Self>) -> Array1<Self> {
674 #[cfg(feature = "simd")]
675 {
676 crate::simd::simd_ceil_f64(a)
677 }
678 #[cfg(not(feature = "simd"))]
679 {
680 a.mapv(|x| x.ceil())
681 }
682 }
683 fn simd_round(a: &ArrayView1<Self>) -> Array1<Self> {
684 #[cfg(feature = "simd")]
685 {
686 crate::simd::simd_round_f64(a)
687 }
688 #[cfg(not(feature = "simd"))]
689 {
690 a.mapv(|x| x.round())
691 }
692 }
693 fn simd_atan(a: &ArrayView1<Self>) -> Array1<Self> {
694 a.mapv(|x| x.atan())
695 }
696 fn simd_asin(a: &ArrayView1<Self>) -> Array1<Self> {
697 a.mapv(|x| x.asin())
698 }
699 fn simd_acos(a: &ArrayView1<Self>) -> Array1<Self> {
700 a.mapv(|x| x.acos())
701 }
702 fn simd_atan2(y: &ArrayView1<Self>, x: &ArrayView1<Self>) -> Array1<Self> {
703 y.iter()
704 .zip(x.iter())
705 .map(|(&y_val, &x_val)| y_val.atan2(x_val))
706 .collect::<Vec<_>>()
707 .into()
708 }
709 fn simd_log10(a: &ArrayView1<Self>) -> Array1<Self> {
710 const LOG10_E: f64 = std::f64::consts::LOG10_E;
711 let ln_a = Self::simd_ln(a);
712 Self::simd_scalar_mul(&ln_a.view(), LOG10_E)
713 }
714 fn simd_log2(a: &ArrayView1<Self>) -> Array1<Self> {
715 const LOG2_E: f64 = std::f64::consts::LOG2_E;
716 let ln_a = Self::simd_ln(a);
717 Self::simd_scalar_mul(&ln_a.view(), LOG2_E)
718 }
719 #[cfg(feature = "simd")]
720 fn simd_clamp(a: &ArrayView1<Self>, min: Self, max: Self) -> Array1<Self> {
721 crate::simd::simd_clip_f64(a, min, max)
722 }
723 #[cfg(not(feature = "simd"))]
724 fn simd_clamp(a: &ArrayView1<Self>, min: Self, max: Self) -> Array1<Self> {
725 a.mapv(|x| x.clamp(min, max))
726 }
727 fn simd_fract(a: &ArrayView1<Self>) -> Array1<Self> {
728 #[cfg(feature = "simd")]
729 {
730 let truncated = crate::simd::simd_trunc_f64(a);
731 Self::simd_sub(a, &truncated.view())
732 }
733 #[cfg(not(feature = "simd"))]
734 {
735 a.mapv(|x| x.fract())
736 }
737 }
738 fn simd_trunc(a: &ArrayView1<Self>) -> Array1<Self> {
739 #[cfg(feature = "simd")]
740 {
741 crate::simd::simd_trunc_f64(a)
742 }
743 #[cfg(not(feature = "simd"))]
744 {
745 a.mapv(|x| x.trunc())
746 }
747 }
748 fn simd_recip(a: &ArrayView1<Self>) -> Array1<Self> {
749 let ones = Array1::from_elem(a.len(), 1.0f64);
750 Self::simd_div(&ones.view(), a)
751 }
752 fn simd_powf(base: &ArrayView1<Self>, exp: Self) -> Array1<Self> {
753 let ln_base = Self::simd_ln(base);
754 let scaled = Self::simd_scalar_mul(&ln_base.view(), exp);
755 Self::simd_exp(&scaled.view())
756 }
757 fn simd_pow(base: &ArrayView1<Self>, exp: &ArrayView1<Self>) -> Array1<Self> {
758 let ln_base = Self::simd_ln(base);
759 let scaled = Self::simd_mul(&ln_base.view(), exp);
760 Self::simd_exp(&scaled.view())
761 }
762 #[cfg(feature = "simd")]
763 fn simd_powi(base: &ArrayView1<Self>, n: i32) -> Array1<Self> {
764 crate::simd::unary_powi::simd_powi_f64(base, n)
765 }
766 #[cfg(not(feature = "simd"))]
767 fn simd_powi(base: &ArrayView1<Self>, n: i32) -> Array1<Self> {
768 base.mapv(|x| x.powi(n))
769 }
770 fn simd_gamma(x: &ArrayView1<Self>) -> Array1<Self> {
771 x.mapv(lanczos_gamma_f64)
772 }
773 fn simd_exp2(a: &ArrayView1<Self>) -> Array1<Self> {
774 const LN2: f64 = std::f64::consts::LN_2;
775 let scaled = Self::simd_scalar_mul(a, LN2);
776 Self::simd_exp(&scaled.view())
777 }
778 fn simd_cbrt(a: &ArrayView1<Self>) -> Array1<Self> {
779 a.mapv(|x| x.cbrt())
780 }
781 fn simd_ln_1p(a: &ArrayView1<Self>) -> Array1<Self> {
782 a.mapv(|x| x.ln_1p())
783 }
784 fn simd_exp_m1(a: &ArrayView1<Self>) -> Array1<Self> {
785 a.mapv(|x| x.exp_m1())
786 }
787 fn simd_to_radians(a: &ArrayView1<Self>) -> Array1<Self> {
788 const DEG_TO_RAD: f64 = std::f64::consts::PI / 180.0;
789 Self::simd_scalar_mul(a, DEG_TO_RAD)
790 }
791 fn simd_to_degrees(a: &ArrayView1<Self>) -> Array1<Self> {
792 const RAD_TO_DEG: f64 = 180.0 / std::f64::consts::PI;
793 Self::simd_scalar_mul(a, RAD_TO_DEG)
794 }
795 fn simd_digamma(a: &ArrayView1<Self>) -> Array1<Self> {
796 a.mapv(digamma_f64)
797 }
798 fn simd_trigamma(a: &ArrayView1<Self>) -> Array1<Self> {
799 a.mapv(trigamma_f64)
800 }
801 fn simd_ln_gamma(a: &ArrayView1<Self>) -> Array1<Self> {
802 a.mapv(ln_gamma_f64)
803 }
804 fn simd_erf(a: &ArrayView1<Self>) -> Array1<Self> {
805 a.mapv(erf_f64)
806 }
807 fn simd_erfc(a: &ArrayView1<Self>) -> Array1<Self> {
808 a.mapv(erfc_f64)
809 }
810 fn simd_erfinv(a: &ArrayView1<Self>) -> Array1<Self> {
811 a.mapv(erfinv_f64)
812 }
813 fn simd_erfcinv(a: &ArrayView1<Self>) -> Array1<Self> {
814 a.mapv(erfcinv_f64)
815 }
816 fn simd_sigmoid(a: &ArrayView1<Self>) -> Array1<Self> {
817 a.mapv(sigmoid_f64)
818 }
819 fn simd_gelu(a: &ArrayView1<Self>) -> Array1<Self> {
820 a.mapv(gelu_f64)
821 }
822 fn simd_swish(a: &ArrayView1<Self>) -> Array1<Self> {
823 a.mapv(swish_f64)
824 }
825 fn simd_softplus(a: &ArrayView1<Self>) -> Array1<Self> {
826 a.mapv(softplus_f64)
827 }
828 fn simd_mish(a: &ArrayView1<Self>) -> Array1<Self> {
829 a.mapv(mish_f64)
830 }
831 fn simd_elu(a: &ArrayView1<Self>, alpha: Self) -> Array1<Self> {
832 a.mapv(|x| elu_f64(x, alpha))
833 }
834 fn simd_selu(a: &ArrayView1<Self>) -> Array1<Self> {
835 a.mapv(selu_f64)
836 }
837 fn simd_hardsigmoid(a: &ArrayView1<Self>) -> Array1<Self> {
838 a.mapv(hardsigmoid_f64)
839 }
840 fn simd_hardswish(a: &ArrayView1<Self>) -> Array1<Self> {
841 a.mapv(hardswish_f64)
842 }
843 fn simd_sinc(a: &ArrayView1<Self>) -> Array1<Self> {
844 a.mapv(sinc_f64)
845 }
846 fn simd_log_softmax(a: &ArrayView1<Self>) -> Array1<Self> {
847 if a.is_empty() {
848 return Array1::zeros(0);
849 }
850 let lse = Self::simd_log_sum_exp(a);
851 a.mapv(|x| x - lse)
852 }
853 fn simd_asinh(a: &ArrayView1<Self>) -> Array1<Self> {
854 a.mapv(|x| x.asinh())
855 }
856 fn simd_acosh(a: &ArrayView1<Self>) -> Array1<Self> {
857 a.mapv(|x| x.acosh())
858 }
859 fn simd_atanh(a: &ArrayView1<Self>) -> Array1<Self> {
860 a.mapv(|x| x.atanh())
861 }
862 fn simd_ln_beta(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
863 let ln_gamma_a = Self::simd_ln_gamma(a);
864 let ln_gamma_b = Self::simd_ln_gamma(b);
865 let a_plus_b = Self::simd_add(a, b);
866 let ln_gamma_ab = Self::simd_ln_gamma(&a_plus_b.view());
867 Self::simd_sub(
868 &Self::simd_add(&ln_gamma_a.view(), &ln_gamma_b.view()).view(),
869 &ln_gamma_ab.view(),
870 )
871 }
872 fn simd_beta(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
873 let ln_beta = Self::simd_ln_beta(a, b);
874 Self::simd_exp(&ln_beta.view())
875 }
876 fn simd_lerp(a: &ArrayView1<Self>, b: &ArrayView1<Self>, t: Self) -> Array1<Self> {
877 if a.is_empty() || b.is_empty() {
878 return Array1::zeros(0);
879 }
880 let diff = Self::simd_sub(b, a);
881 let scaled = Self::simd_scalar_mul(&diff.view(), t);
882 Self::simd_add(a, &scaled.view())
883 }
884 fn simd_smoothstep(edge0: Self, edge1: Self, x: &ArrayView1<Self>) -> Array1<Self> {
885 if x.is_empty() {
886 return Array1::zeros(0);
887 }
888 let range = edge1 - edge0;
889 if range.abs() < Self::EPSILON {
890 return x.mapv(|xi| if xi < edge0 { 0.0 } else { 1.0 });
891 }
892 x.mapv(|xi| {
893 let t = ((xi - edge0) / range).clamp(0.0, 1.0);
894 t * t * (3.0 - 2.0 * t)
895 })
896 }
897 fn simd_hypot(x: &ArrayView1<Self>, y: &ArrayView1<Self>) -> Array1<Self> {
898 if x.is_empty() || y.is_empty() {
899 return Array1::zeros(0);
900 }
901 let len = x.len().min(y.len());
902 Array1::from_iter((0..len).map(|i| x[i].hypot(y[i])))
903 }
904 fn simd_copysign(x: &ArrayView1<Self>, y: &ArrayView1<Self>) -> Array1<Self> {
905 if x.is_empty() || y.is_empty() {
906 return Array1::zeros(0);
907 }
908 let len = x.len().min(y.len());
909 Array1::from_iter((0..len).map(|i| x[i].copysign(y[i])))
910 }
911 fn simd_smootherstep(edge0: Self, edge1: Self, x: &ArrayView1<Self>) -> Array1<Self> {
912 if x.is_empty() {
913 return Array1::zeros(0);
914 }
915 let range = edge1 - edge0;
916 if range.abs() < Self::EPSILON {
917 return x.mapv(|xi| if xi < edge0 { 0.0 } else { 1.0 });
918 }
919 x.mapv(|xi| {
920 let t = ((xi - edge0) / range).clamp(0.0, 1.0);
921 let t3 = t * t * t;
922 t3 * (t * (t * 6.0 - 15.0) + 10.0)
923 })
924 }
925 fn simd_logaddexp(a: &ArrayView1<Self>, b: &ArrayView1<Self>) -> Array1<Self> {
926 if a.is_empty() || b.is_empty() {
927 return Array1::zeros(0);
928 }
929 let len = a.len().min(b.len());
930 Array1::from_iter((0..len).map(|i| {
931 let ai = a[i];
932 let bi = b[i];
933 let max_val = ai.max(bi);
934 let diff = (ai - bi).abs();
935 if diff > 50.0 {
936 max_val
937 } else {
938 max_val + (1.0 + (-diff).exp()).ln()
939 }
940 }))
941 }
942 fn simd_logit(a: &ArrayView1<Self>) -> Array1<Self> {
943 if a.is_empty() {
944 return Array1::zeros(0);
945 }
946 a.mapv(|p| {
947 if p <= 0.0 {
948 Self::NEG_INFINITY
949 } else if p >= 1.0 {
950 Self::INFINITY
951 } else {
952 (p / (1.0 - p)).ln()
953 }
954 })
955 }
956 fn simd_square(a: &ArrayView1<Self>) -> Array1<Self> {
957 if a.is_empty() {
958 return Array1::zeros(0);
959 }
960 a.mapv(|x| x * x)
961 }
962 fn simd_rsqrt(a: &ArrayView1<Self>) -> Array1<Self> {
963 if a.is_empty() {
964 return Array1::zeros(0);
965 }
966 a.mapv(|x| {
967 if x <= 0.0 {
968 if x == 0.0 {
969 Self::INFINITY
970 } else {
971 Self::NAN
972 }
973 } else {
974 1.0 / x.sqrt()
975 }
976 })
977 }
978 fn simd_sincos(a: &ArrayView1<Self>) -> (Array1<Self>, Array1<Self>) {
979 if a.is_empty() {
980 return (Array1::zeros(0), Array1::zeros(0));
981 }
982 let sin_result = a.mapv(|x| x.sin());
983 let cos_result = a.mapv(|x| x.cos());
984 (sin_result, cos_result)
985 }
986 fn simd_expm1(a: &ArrayView1<Self>) -> Array1<Self> {
987 if a.is_empty() {
988 return Array1::zeros(0);
989 }
990 a.mapv(|x| x.exp_m1())
991 }
992 fn simd_log1p(a: &ArrayView1<Self>) -> Array1<Self> {
993 if a.is_empty() {
994 return Array1::zeros(0);
995 }
996 a.mapv(|x| x.ln_1p())
997 }
998
999 #[cfg(feature = "simd")]
1004 fn simd_add_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1005 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1006 assert_eq!(
1007 a.len(),
1008 output.len(),
1009 "Output buffer must match input length"
1010 );
1011
1012 let len = a.len();
1013
1014 #[cfg(target_arch = "x86_64")]
1015 {
1016 use std::arch::x86_64::*;
1017
1018 if is_x86_feature_detected!("avx2") {
1019 unsafe {
1020 let mut i = 0;
1021 while i + 4 <= len {
1023 let a_vec = _mm256_loadu_pd(a.as_ptr().add(i));
1024 let b_vec = _mm256_loadu_pd(b.as_ptr().add(i));
1025 let result_vec = _mm256_add_pd(a_vec, b_vec);
1026 _mm256_storeu_pd(output.as_mut_ptr().add(i), result_vec);
1027 i += 4;
1028 }
1029 while i < len {
1030 *output.get_unchecked_mut(i) = *a.get_unchecked(i) + *b.get_unchecked(i);
1031 i += 1;
1032 }
1033 }
1034 return;
1035 }
1036 }
1037
1038 #[cfg(target_arch = "aarch64")]
1039 {
1040 use std::arch::aarch64::*;
1041
1042 if std::arch::is_aarch64_feature_detected!("neon") {
1043 unsafe {
1044 let mut i = 0;
1045 while i + 2 <= len {
1047 let a_vec = vld1q_f64(a.as_ptr().add(i));
1048 let b_vec = vld1q_f64(b.as_ptr().add(i));
1049 let result_vec = vaddq_f64(a_vec, b_vec);
1050 vst1q_f64(output.as_mut_ptr().add(i), result_vec);
1051 i += 2;
1052 }
1053 while i < len {
1054 *output.get_unchecked_mut(i) = *a.get_unchecked(i) + *b.get_unchecked(i);
1055 i += 1;
1056 }
1057 }
1058 return;
1059 }
1060 }
1061
1062 for i in 0..len {
1063 output[i] = a[i] + b[i];
1064 }
1065 }
1066
1067 #[cfg(not(feature = "simd"))]
1068 fn simd_add_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1069 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1070 assert_eq!(
1071 a.len(),
1072 output.len(),
1073 "Output buffer must match input length"
1074 );
1075 for i in 0..a.len() {
1076 output[i] = a[i] + b[i];
1077 }
1078 }
1079
1080 fn simd_sub_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1081 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1082 assert_eq!(
1083 a.len(),
1084 output.len(),
1085 "Output buffer must match input length"
1086 );
1087 for i in 0..a.len() {
1088 output[i] = a[i] - b[i];
1089 }
1090 }
1091
1092 #[cfg(feature = "simd")]
1093 fn simd_mul_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1094 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1095 assert_eq!(
1096 a.len(),
1097 output.len(),
1098 "Output buffer must match input length"
1099 );
1100
1101 let len = a.len();
1102
1103 #[cfg(target_arch = "x86_64")]
1104 {
1105 use std::arch::x86_64::*;
1106
1107 if is_x86_feature_detected!("avx2") {
1108 unsafe {
1109 let mut i = 0;
1110 while i + 4 <= len {
1111 let a_vec = _mm256_loadu_pd(a.as_ptr().add(i));
1112 let b_vec = _mm256_loadu_pd(b.as_ptr().add(i));
1113 let result_vec = _mm256_mul_pd(a_vec, b_vec);
1114 _mm256_storeu_pd(output.as_mut_ptr().add(i), result_vec);
1115 i += 4;
1116 }
1117 while i < len {
1118 *output.get_unchecked_mut(i) = *a.get_unchecked(i) * *b.get_unchecked(i);
1119 i += 1;
1120 }
1121 }
1122 return;
1123 }
1124 }
1125
1126 #[cfg(target_arch = "aarch64")]
1127 {
1128 use std::arch::aarch64::*;
1129
1130 if std::arch::is_aarch64_feature_detected!("neon") {
1131 unsafe {
1132 let mut i = 0;
1133 while i + 2 <= len {
1134 let a_vec = vld1q_f64(a.as_ptr().add(i));
1135 let b_vec = vld1q_f64(b.as_ptr().add(i));
1136 let result_vec = vmulq_f64(a_vec, b_vec);
1137 vst1q_f64(output.as_mut_ptr().add(i), result_vec);
1138 i += 2;
1139 }
1140 while i < len {
1141 *output.get_unchecked_mut(i) = *a.get_unchecked(i) * *b.get_unchecked(i);
1142 i += 1;
1143 }
1144 }
1145 return;
1146 }
1147 }
1148
1149 for i in 0..len {
1150 output[i] = a[i] * b[i];
1151 }
1152 }
1153
1154 #[cfg(not(feature = "simd"))]
1155 fn simd_mul_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1156 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1157 assert_eq!(
1158 a.len(),
1159 output.len(),
1160 "Output buffer must match input length"
1161 );
1162 for i in 0..a.len() {
1163 output[i] = a[i] * b[i];
1164 }
1165 }
1166
1167 fn simd_div_into(a: &[Self], b: &[Self], output: &mut [Self]) {
1168 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1169 assert_eq!(
1170 a.len(),
1171 output.len(),
1172 "Output buffer must match input length"
1173 );
1174 for i in 0..a.len() {
1175 output[i] = a[i] / b[i];
1176 }
1177 }
1178
1179 fn simd_add_inplace(a: &mut [Self], b: &[Self]) {
1180 assert_eq!(a.len(), b.len(), "Arrays must have same length");
1181 for i in 0..a.len() {
1182 a[i] += b[i];
1183 }
1184 }
1185
1186 fn simd_sub_inplace(a: &mut [Self], b: &[Self]) {
1187 assert_eq!(a.len(), b.len(), "Arrays must have same length");
1188 for i in 0..a.len() {
1189 a[i] -= b[i];
1190 }
1191 }
1192
1193 fn simd_mul_inplace(a: &mut [Self], b: &[Self]) {
1194 assert_eq!(a.len(), b.len(), "Arrays must have same length");
1195 for i in 0..a.len() {
1196 a[i] *= b[i];
1197 }
1198 }
1199
1200 fn simd_div_inplace(a: &mut [Self], b: &[Self]) {
1201 assert_eq!(a.len(), b.len(), "Arrays must have same length");
1202 for i in 0..a.len() {
1203 a[i] /= b[i];
1204 }
1205 }
1206
1207 fn simd_add_scalar_inplace(a: &mut [Self], scalar: Self) {
1208 for x in a.iter_mut() {
1209 *x += scalar;
1210 }
1211 }
1212
1213 fn simd_mul_scalar_inplace(a: &mut [Self], scalar: Self) {
1214 for x in a.iter_mut() {
1215 *x *= scalar;
1216 }
1217 }
1218
1219 fn simd_fma_into(a: &[Self], b: &[Self], c: &[Self], output: &mut [Self]) {
1220 assert_eq!(a.len(), b.len(), "Input arrays must have same length");
1221 assert_eq!(a.len(), c.len(), "Input arrays must have same length");
1222 assert_eq!(
1223 a.len(),
1224 output.len(),
1225 "Output buffer must match input length"
1226 );
1227 for i in 0..a.len() {
1228 output[i] = a[i].mul_add(b[i], c[i]);
1229 }
1230 }
1231}