1use crate::Transcendental;
11use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
12use wide::{f32x4, f32x8, f64x2, f64x4, CmpEq, CmpGe, CmpGt, CmpLe, CmpLt, CmpNe};
13
14use crate::math::vector::traits::{Vector, VectorMask, VectorTranscendental};
15
16#[derive(Copy, Clone, Debug, PartialEq)]
22pub struct F32x4(f32x4);
23
24#[derive(Copy, Clone, Debug, PartialEq)]
26pub struct F32x8(f32x8);
27
28#[derive(Copy, Clone, Debug, PartialEq)]
30pub struct F64x2(f64x2);
31
32#[derive(Copy, Clone, Debug, PartialEq)]
34pub struct F64x4(f64x4);
35
36impl Default for F32x4 {
41 fn default() -> Self {
42 Self(f32x4::splat(0.0))
43 }
44}
45
46impl Default for F32x8 {
47 fn default() -> Self {
48 Self(f32x8::splat(0.0))
49 }
50}
51
52impl Default for F64x2 {
53 fn default() -> Self {
54 Self(f64x2::splat(0.0))
55 }
56}
57
58impl Default for F64x4 {
59 fn default() -> Self {
60 Self(f64x4::splat(0.0))
61 }
62}
63
64impl Vector<f32, 4> for F32x4 {
69 fn splat(value: f32) -> Self {
70 F32x4(f32x4::splat(value))
71 }
72
73 fn load(slice: &[f32]) -> Self {
74 let mut arr = [0.0f32; 4];
75 arr.copy_from_slice(&slice[0..4]);
76 F32x4(f32x4::from(arr))
77 }
78
79 fn store(&self, slice: &mut [f32]) {
80 let arr: [f32; 4] = self.0.into();
81 slice[0..4].copy_from_slice(&arr);
82 }
83
84 fn extract(&self, index: usize) -> f32 {
85 let arr: [f32; 4] = self.0.into();
86 arr[index]
87 }
88
89 fn insert(&self, index: usize, value: f32) -> Self {
90 let mut arr: [f32; 4] = self.0.into();
91 arr[index] = value;
92 F32x4(f32x4::from(arr))
93 }
94
95 fn add(&self, other: &Self) -> Self {
96 F32x4(self.0 + other.0)
97 }
98
99 fn sub(&self, other: &Self) -> Self {
100 F32x4(self.0 - other.0)
101 }
102
103 fn mul(&self, other: &Self) -> Self {
104 F32x4(self.0 * other.0)
105 }
106
107 fn div(&self, other: &Self) -> Self {
108 F32x4(self.0 / other.0)
109 }
110
111 fn rem(&self, other: &Self) -> Self {
112 let a: [f32; 4] = self.0.into();
114 let b: [f32; 4] = other.0.into();
115 let mut arr = [0.0f32; 4];
116 for i in 0..4 {
117 arr[i] = a[i] % b[i];
118 }
119 F32x4(f32x4::from(arr))
120 }
121
122 fn neg(&self) -> Self {
123 F32x4(-self.0)
124 }
125
126 fn abs(&self) -> Self {
127 F32x4(self.0.abs())
128 }
129
130 fn min(&self, other: &Self) -> Self {
131 F32x4(self.0.min(other.0))
132 }
133
134 fn max(&self, other: &Self) -> Self {
135 F32x4(self.0.max(other.0))
136 }
137
138 fn clamp(&self, min: &Self, max: &Self) -> Self {
139 F32x4(self.0.max(min.0).min(max.0))
141 }
142
143}
144
145impl VectorTranscendental<f32, 4> for F32x4 {
146 fn sqrt(&self) -> Self { F32x4(self.0.sqrt()) }
147 fn exp(&self) -> Self { F32x4(self.0.exp()) }
148 fn ln(&self) -> Self { F32x4(self.0.ln()) }
149 fn sin(&self) -> Self { F32x4(self.0.sin()) }
150 fn cos(&self) -> Self { F32x4(self.0.cos()) }
151 fn tan(&self) -> Self { F32x4(self.0.tan()) }
152}
153
154impl Vector<f32, 8> for F32x8 {
159 fn splat(value: f32) -> Self {
160 F32x8(f32x8::splat(value))
161 }
162
163 fn load(slice: &[f32]) -> Self {
164 let mut arr = [0.0f32; 8];
165 arr.copy_from_slice(&slice[0..8]);
166 F32x8(f32x8::from(arr))
167 }
168
169 fn store(&self, slice: &mut [f32]) {
170 let arr: [f32; 8] = self.0.into();
171 slice[0..8].copy_from_slice(&arr);
172 }
173
174 fn extract(&self, index: usize) -> f32 {
175 let arr: [f32; 8] = self.0.into();
176 arr[index]
177 }
178
179 fn insert(&self, index: usize, value: f32) -> Self {
180 let mut arr: [f32; 8] = self.0.into();
181 arr[index] = value;
182 F32x8(f32x8::from(arr))
183 }
184
185 fn add(&self, other: &Self) -> Self {
186 F32x8(self.0 + other.0)
187 }
188
189 fn sub(&self, other: &Self) -> Self {
190 F32x8(self.0 - other.0)
191 }
192
193 fn mul(&self, other: &Self) -> Self {
194 F32x8(self.0 * other.0)
195 }
196
197 fn div(&self, other: &Self) -> Self {
198 F32x8(self.0 / other.0)
199 }
200
201 fn rem(&self, other: &Self) -> Self {
202 let a: [f32; 8] = self.0.into();
203 let b: [f32; 8] = other.0.into();
204 let mut arr = [0.0f32; 8];
205 for i in 0..8 {
206 arr[i] = a[i] % b[i];
207 }
208 F32x8(f32x8::from(arr))
209 }
210
211 fn neg(&self) -> Self {
212 F32x8(-self.0)
213 }
214
215 fn abs(&self) -> Self {
216 F32x8(self.0.abs())
217 }
218
219 fn min(&self, other: &Self) -> Self {
220 F32x8(self.0.min(other.0))
221 }
222
223 fn max(&self, other: &Self) -> Self {
224 F32x8(self.0.max(other.0))
225 }
226
227 fn clamp(&self, min: &Self, max: &Self) -> Self {
228 F32x8(self.0.max(min.0).min(max.0))
229 }
230
231}
232
233impl VectorTranscendental<f32, 8> for F32x8 {
234 fn sqrt(&self) -> Self { F32x8(self.0.sqrt()) }
235 fn exp(&self) -> Self { F32x8(self.0.exp()) }
236 fn ln(&self) -> Self { F32x8(self.0.ln()) }
237 fn sin(&self) -> Self { F32x8(self.0.sin()) }
238 fn cos(&self) -> Self { F32x8(self.0.cos()) }
239 fn tan(&self) -> Self { F32x8(self.0.tan()) }
240}
241
242impl Vector<f64, 2> for F64x2 {
247 fn splat(value: f64) -> Self {
248 F64x2(f64x2::splat(value))
249 }
250
251 fn load(slice: &[f64]) -> Self {
252 let mut arr = [0.0f64; 2];
253 arr.copy_from_slice(&slice[0..2]);
254 F64x2(f64x2::from(arr))
255 }
256
257 fn store(&self, slice: &mut [f64]) {
258 let arr: [f64; 2] = self.0.into();
259 slice[0..2].copy_from_slice(&arr);
260 }
261
262 fn extract(&self, index: usize) -> f64 {
263 let arr: [f64; 2] = self.0.into();
264 arr[index]
265 }
266
267 fn insert(&self, index: usize, value: f64) -> Self {
268 let mut arr: [f64; 2] = self.0.into();
269 arr[index] = value;
270 F64x2(f64x2::from(arr))
271 }
272
273 fn add(&self, other: &Self) -> Self {
274 F64x2(self.0 + other.0)
275 }
276
277 fn sub(&self, other: &Self) -> Self {
278 F64x2(self.0 - other.0)
279 }
280
281 fn mul(&self, other: &Self) -> Self {
282 F64x2(self.0 * other.0)
283 }
284
285 fn div(&self, other: &Self) -> Self {
286 F64x2(self.0 / other.0)
287 }
288
289 fn rem(&self, other: &Self) -> Self {
290 let a: [f64; 2] = self.0.into();
291 let b: [f64; 2] = other.0.into();
292 let mut arr = [0.0f64; 2];
293 for i in 0..2 {
294 arr[i] = a[i] % b[i];
295 }
296 F64x2(f64x2::from(arr))
297 }
298
299 fn neg(&self) -> Self {
300 F64x2(-self.0)
301 }
302
303 fn abs(&self) -> Self {
304 F64x2(self.0.abs())
305 }
306
307 fn min(&self, other: &Self) -> Self {
308 F64x2(self.0.min(other.0))
309 }
310
311 fn max(&self, other: &Self) -> Self {
312 F64x2(self.0.max(other.0))
313 }
314
315 fn clamp(&self, min: &Self, max: &Self) -> Self {
316 F64x2(self.0.max(min.0).min(max.0))
317 }
318
319}
320
321impl VectorTranscendental<f64, 2> for F64x2 {
322 fn sqrt(&self) -> Self { F64x2(self.0.sqrt()) }
323 fn exp(&self) -> Self { F64x2(self.0.exp()) }
324 fn ln(&self) -> Self { F64x2(self.0.ln()) }
325 fn sin(&self) -> Self { F64x2(self.0.sin()) }
326 fn cos(&self) -> Self { F64x2(self.0.cos()) }
327 fn tan(&self) -> Self { F64x2(self.0.tan()) }
328}
329
330impl Vector<f64, 4> for F64x4 {
335 fn splat(value: f64) -> Self {
336 F64x4(f64x4::splat(value))
337 }
338
339 fn load(slice: &[f64]) -> Self {
340 let mut arr = [0.0f64; 4];
341 arr.copy_from_slice(&slice[0..4]);
342 F64x4(f64x4::from(arr))
343 }
344
345 fn store(&self, slice: &mut [f64]) {
346 let arr: [f64; 4] = self.0.into();
347 slice[0..4].copy_from_slice(&arr);
348 }
349
350 fn extract(&self, index: usize) -> f64 {
351 let arr: [f64; 4] = self.0.into();
352 arr[index]
353 }
354
355 fn insert(&self, index: usize, value: f64) -> Self {
356 let mut arr: [f64; 4] = self.0.into();
357 arr[index] = value;
358 F64x4(f64x4::from(arr))
359 }
360
361 fn add(&self, other: &Self) -> Self {
362 F64x4(self.0 + other.0)
363 }
364
365 fn sub(&self, other: &Self) -> Self {
366 F64x4(self.0 - other.0)
367 }
368
369 fn mul(&self, other: &Self) -> Self {
370 F64x4(self.0 * other.0)
371 }
372
373 fn div(&self, other: &Self) -> Self {
374 F64x4(self.0 / other.0)
375 }
376
377 fn rem(&self, other: &Self) -> Self {
378 let a: [f64; 4] = self.0.into();
379 let b: [f64; 4] = other.0.into();
380 let mut arr = [0.0f64; 4];
381 for i in 0..4 {
382 arr[i] = a[i] % b[i];
383 }
384 F64x4(f64x4::from(arr))
385 }
386
387 fn neg(&self) -> Self {
388 F64x4(-self.0)
389 }
390
391 fn abs(&self) -> Self {
392 F64x4(self.0.abs())
393 }
394
395 fn min(&self, other: &Self) -> Self {
396 F64x4(self.0.min(other.0))
397 }
398
399 fn max(&self, other: &Self) -> Self {
400 F64x4(self.0.max(other.0))
401 }
402
403 fn clamp(&self, min: &Self, max: &Self) -> Self {
404 F64x4(self.0.max(min.0).min(max.0))
405 }
406
407}
408
409impl VectorTranscendental<f64, 4> for F64x4 {
410 fn sqrt(&self) -> Self { F64x4(self.0.sqrt()) }
411 fn exp(&self) -> Self { F64x4(self.0.exp()) }
412 fn ln(&self) -> Self { F64x4(self.0.ln()) }
413 fn sin(&self) -> Self { F64x4(self.0.sin()) }
414 fn cos(&self) -> Self { F64x4(self.0.cos()) }
415 fn tan(&self) -> Self { F64x4(self.0.tan()) }
416}
417
418impl VectorMask<f64, 4> for F64x4 {
423 type Mask = F64x4;
426
427 fn eq(&self, other: &Self) -> F64x4 {
428 F64x4(self.0.cmp_eq(other.0))
429 }
430
431 fn ne(&self, other: &Self) -> F64x4 {
432 F64x4(self.0.cmp_ne(other.0))
433 }
434
435 fn gt(&self, other: &Self) -> F64x4 {
436 F64x4(self.0.cmp_gt(other.0))
437 }
438
439 fn ge(&self, other: &Self) -> F64x4 {
440 F64x4(self.0.cmp_ge(other.0))
441 }
442
443 fn lt(&self, other: &Self) -> F64x4 {
444 F64x4(self.0.cmp_lt(other.0))
445 }
446
447 fn le(&self, other: &Self) -> F64x4 {
448 F64x4(self.0.cmp_le(other.0))
449 }
450
451 fn select(&self, other: &Self, mask: F64x4) -> Self {
452 F64x4(mask.0.blend(self.0, other.0))
455 }
456
457 fn all(mask: &F64x4) -> bool {
458 mask.0.move_mask() == 0b1111
461 }
462}
463
464impl Add for F32x4 {
469 type Output = Self;
470 fn add(self, rhs: Self) -> Self {
471 Self(self.0 + rhs.0)
472 }
473}
474
475impl Sub for F32x4 {
476 type Output = Self;
477 fn sub(self, rhs: Self) -> Self {
478 Self(self.0 - rhs.0)
479 }
480}
481
482impl Mul for F32x4 {
483 type Output = Self;
484 fn mul(self, rhs: Self) -> Self {
485 Self(self.0 * rhs.0)
486 }
487}
488
489impl Div for F32x4 {
490 type Output = Self;
491 fn div(self, rhs: Self) -> Self {
492 Self(self.0 / rhs.0)
493 }
494}
495
496impl Rem for F32x4 {
497 type Output = Self;
498 fn rem(self, rhs: Self) -> Self {
499 let a: [f32; 4] = self.0.into();
500 let b: [f32; 4] = rhs.0.into();
501 let mut arr = [0.0f32; 4];
502 for i in 0..4 {
503 arr[i] = a[i] % b[i];
504 }
505 Self(f32x4::from(arr))
506 }
507}
508
509impl Neg for F32x4 {
510 type Output = Self;
511 fn neg(self) -> Self {
512 Self(-self.0)
513 }
514}
515
516impl Add for F32x8 {
519 type Output = Self;
520 fn add(self, rhs: Self) -> Self {
521 Self(self.0 + rhs.0)
522 }
523}
524
525impl Sub for F32x8 {
526 type Output = Self;
527 fn sub(self, rhs: Self) -> Self {
528 Self(self.0 - rhs.0)
529 }
530}
531
532impl Mul for F32x8 {
533 type Output = Self;
534 fn mul(self, rhs: Self) -> Self {
535 Self(self.0 * rhs.0)
536 }
537}
538
539impl Div for F32x8 {
540 type Output = Self;
541 fn div(self, rhs: Self) -> Self {
542 Self(self.0 / rhs.0)
543 }
544}
545
546impl Rem for F32x8 {
547 type Output = Self;
548 fn rem(self, rhs: Self) -> Self {
549 let a: [f32; 8] = self.0.into();
550 let b: [f32; 8] = rhs.0.into();
551 let mut arr = [0.0f32; 8];
552 for i in 0..8 {
553 arr[i] = a[i] % b[i];
554 }
555 Self(f32x8::from(arr))
556 }
557}
558
559impl Neg for F32x8 {
560 type Output = Self;
561 fn neg(self) -> Self {
562 Self(-self.0)
563 }
564}
565
566impl Add for F64x2 {
567 type Output = Self;
568 fn add(self, rhs: Self) -> Self {
569 Self(self.0 + rhs.0)
570 }
571}
572
573impl Sub for F64x2 {
574 type Output = Self;
575 fn sub(self, rhs: Self) -> Self {
576 Self(self.0 - rhs.0)
577 }
578}
579
580impl Mul for F64x2 {
581 type Output = Self;
582 fn mul(self, rhs: Self) -> Self {
583 Self(self.0 * rhs.0)
584 }
585}
586
587impl Div for F64x2 {
588 type Output = Self;
589 fn div(self, rhs: Self) -> Self {
590 Self(self.0 / rhs.0)
591 }
592}
593
594impl Rem for F64x2 {
595 type Output = Self;
596 fn rem(self, rhs: Self) -> Self {
597 let a: [f64; 2] = self.0.into();
598 let b: [f64; 2] = rhs.0.into();
599 let mut arr = [0.0f64; 2];
600 for i in 0..2 {
601 arr[i] = a[i] % b[i];
602 }
603 Self(f64x2::from(arr))
604 }
605}
606
607impl Neg for F64x2 {
608 type Output = Self;
609 fn neg(self) -> Self {
610 Self(-self.0)
611 }
612}
613
614impl Add for F64x4 {
615 type Output = Self;
616 fn add(self, rhs: Self) -> Self {
617 Self(self.0 + rhs.0)
618 }
619}
620
621impl Sub for F64x4 {
622 type Output = Self;
623 fn sub(self, rhs: Self) -> Self {
624 Self(self.0 - rhs.0)
625 }
626}
627
628impl Mul for F64x4 {
629 type Output = Self;
630 fn mul(self, rhs: Self) -> Self {
631 Self(self.0 * rhs.0)
632 }
633}
634
635impl Div for F64x4 {
636 type Output = Self;
637 fn div(self, rhs: Self) -> Self {
638 Self(self.0 / rhs.0)
639 }
640}
641
642impl Rem for F64x4 {
643 type Output = Self;
644 fn rem(self, rhs: Self) -> Self {
645 let a: [f64; 4] = self.0.into();
646 let b: [f64; 4] = rhs.0.into();
647 let mut arr = [0.0f64; 4];
648 for i in 0..4 {
649 arr[i] = a[i] % b[i];
650 }
651 Self(f64x4::from(arr))
652 }
653}
654
655impl Neg for F64x4 {
656 type Output = Self;
657 fn neg(self) -> Self {
658 Self(-self.0)
659 }
660}
661
662#[cfg(test)]
667mod tests {
668 use super::*;
669 use crate::math::vector::traits::VectorMask;
670
671 #[test]
672 fn test_f32x4_basic() {
673 let a = F32x4::load(&[1.0, 2.0, 3.0, 4.0]);
674 let b = F32x4::load(&[5.0, 6.0, 7.0, 8.0]);
675
676 let c = a + b;
677 let mut arr = [0.0f32; 4];
678 c.store(&mut arr);
679 assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
680
681 let c = a * b;
682 c.store(&mut arr);
683 assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
684 }
685
686 #[test]
687 fn test_f32x4_math() {
688 let a = F32x4::load(&[0.0, 0.5, 1.0, 2.0]);
689 let sin_a = a.sin();
690 let mut arr = [0.0f32; 4];
691 sin_a.store(&mut arr);
692 let expected = [0.0f32.sin(), 0.5f32.sin(), 1.0f32.sin(), 2.0f32.sin()];
693 for i in 0..4 {
694 assert!((arr[i] - expected[i]).abs() < 1e-5);
695 }
696 }
697
698 #[test]
699 fn test_f64x2_basic() {
700 let a = F64x2::load(&[1.0, 2.0]);
701 let b = F64x2::load(&[3.0, 4.0]);
702
703 let c = a + b;
704 let mut arr = [0.0f64; 2];
705 c.store(&mut arr);
706 assert_eq!(arr, [4.0, 6.0]);
707 }
708
709 #[test]
710 fn test_f64x4_basic() {
711 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
712 let b = F64x4::load(&[5.0, 6.0, 7.0, 8.0]);
713
714 let c = a + b;
715 let mut arr = [0.0f64; 4];
716 c.store(&mut arr);
717 assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
718
719 let c = a * b;
720 c.store(&mut arr);
721 assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
722 }
723
724 #[test]
725 fn test_f64x4_math() {
726 let a = F64x4::load(&[0.0, 0.5, 1.0, 2.0]);
727 let sqrt_a = a.sqrt();
728 let mut arr = [0.0f64; 4];
729 sqrt_a.store(&mut arr);
730 let expected = [0.0f64.sqrt(), 0.5f64.sqrt(), 1.0f64.sqrt(), 2.0f64.sqrt()];
731 for i in 0..4 {
732 assert!((arr[i] - expected[i]).abs() < 1e-12);
733 }
734
735 let exp_a = a.exp();
736 exp_a.store(&mut arr);
737 let expected = [0.0f64.exp(), 0.5f64.exp(), 1.0f64.exp(), 2.0f64.exp()];
738 for i in 0..4 {
739 assert!((arr[i] - expected[i]).abs() < 1e-12);
740 }
741 }
742
743 #[test]
744 fn test_f64x4_vector_mask_lt() {
745 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
748 let b = F64x4::load(&[3.0, 3.0, 3.0, 3.0]);
749 let mask = <F64x4 as VectorMask<f64, 4>>::lt(&a, &b);
750 assert_eq!(mask.0.move_mask() & 0b1111, 0b0011); }
753
754 #[test]
755 fn test_f64x4_vector_mask_gt() {
756 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
757 let b = F64x4::load(&[2.0, 2.0, 2.0, 2.0]);
758 let mask = <F64x4 as VectorMask<f64, 4>>::gt(&a, &b);
759 assert_eq!(mask.0.move_mask() & 0b1111, 0b1100); }
761
762 #[test]
763 fn test_f64x4_vector_mask_eq() {
764 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
765 let b = F64x4::load(&[1.0, 0.0, 3.0, 5.0]);
766 let mask = <F64x4 as VectorMask<f64, 4>>::eq(&a, &b);
767 assert_eq!(mask.0.move_mask() & 0b1111, 0b0101); }
769
770 #[test]
771 fn test_f64x4_vector_mask_all() {
772 let all_true = <F64x4 as VectorMask<f64, 4>>::lt(&F64x4::splat(1.0), &F64x4::splat(2.0));
773 assert!(<F64x4 as VectorMask<f64, 4>>::all(&all_true));
774
775 let partial_true = <F64x4 as VectorMask<f64, 4>>::lt(
776 &F64x4::load(&[1.0, 2.0, 3.0, 4.0]),
777 &F64x4::splat(3.0),
778 );
779 assert!(!<F64x4 as VectorMask<f64, 4>>::all(&partial_true));
780 }
781
782 #[test]
783 fn test_f64x4_vector_mask_select() {
784 let true_vals = F64x4::load(&[10.0, 20.0, 30.0, 40.0]);
785 let false_vals = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
786 let threshold = F64x4::load(&[5.0, 25.0, 25.0, 25.0]);
788 let mask = <F64x4 as VectorMask<f64, 4>>::lt(&true_vals, &threshold);
789 let selected = <F64x4 as VectorMask<f64, 4>>::select(&true_vals, &false_vals, mask);
790 assert_eq!(mask.0.move_mask() & 0b1111, 0b0010);
795 let mut arr = [0.0; 4];
797 selected.store(&mut arr);
798 assert!((arr[0] - 1.0).abs() < 1e-15);
799 assert!((arr[1] - 20.0).abs() < 1e-15);
800 assert!((arr[2] - 3.0).abs() < 1e-15);
801 assert!((arr[3] - 4.0).abs() < 1e-15);
802 }
803}