1use crate::Transcendental;
11use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
12use wide::{f32x4, f32x8, f64x2, f64x4, CmpEq, CmpGe, CmpGt, CmpLe, CmpLt, CmpNe};
13
14use crate::math::vector::traits::{Vector, VectorMask, VectorTranscendental};
15
16#[derive(Copy, Clone, Debug, PartialEq)]
22pub struct F32x4(f32x4);
23
24#[derive(Copy, Clone, Debug, PartialEq)]
26pub struct F32x8(f32x8);
27
28#[derive(Copy, Clone, Debug, PartialEq)]
30pub struct F64x2(f64x2);
31
32#[derive(Copy, Clone, Debug, PartialEq)]
34pub struct F64x4(f64x4);
35
36impl Default for F32x4 {
41 fn default() -> Self {
42 Self(f32x4::splat(0.0))
43 }
44}
45
46impl Default for F32x8 {
47 fn default() -> Self {
48 Self(f32x8::splat(0.0))
49 }
50}
51
52impl Default for F64x2 {
53 fn default() -> Self {
54 Self(f64x2::splat(0.0))
55 }
56}
57
58impl Default for F64x4 {
59 fn default() -> Self {
60 Self(f64x4::splat(0.0))
61 }
62}
63
64impl Vector<f32, 4> for F32x4 {
69 fn splat(value: f32) -> Self {
70 F32x4(f32x4::splat(value))
71 }
72
73 fn load(slice: &[f32]) -> Self {
74 let mut arr = [0.0f32; 4];
75 arr.copy_from_slice(&slice[0..4]);
76 F32x4(f32x4::from(arr))
77 }
78
79 fn store(&self, slice: &mut [f32]) {
80 let arr: [f32; 4] = self.0.into();
81 slice[0..4].copy_from_slice(&arr);
82 }
83
84 fn extract(&self, index: usize) -> f32 {
85 let arr: [f32; 4] = self.0.into();
86 arr[index]
87 }
88
89 fn insert(&self, index: usize, value: f32) -> Self {
90 let mut arr: [f32; 4] = self.0.into();
91 arr[index] = value;
92 F32x4(f32x4::from(arr))
93 }
94
95 fn add(&self, other: &Self) -> Self {
96 F32x4(self.0 + other.0)
97 }
98
99 fn sub(&self, other: &Self) -> Self {
100 F32x4(self.0 - other.0)
101 }
102
103 fn mul(&self, other: &Self) -> Self {
104 F32x4(self.0 * other.0)
105 }
106
107 fn div(&self, other: &Self) -> Self {
108 F32x4(self.0 / other.0)
109 }
110
111 fn rem(&self, other: &Self) -> Self {
112 let a: [f32; 4] = self.0.into();
114 let b: [f32; 4] = other.0.into();
115 let mut arr = [0.0f32; 4];
116 for i in 0..4 {
117 arr[i] = a[i] % b[i];
118 }
119 F32x4(f32x4::from(arr))
120 }
121
122 fn neg(&self) -> Self {
123 F32x4(-self.0)
124 }
125
126 fn abs(&self) -> Self {
127 F32x4(self.0.abs())
128 }
129
130 fn min(&self, other: &Self) -> Self {
131 F32x4(self.0.min(other.0))
132 }
133
134 fn max(&self, other: &Self) -> Self {
135 F32x4(self.0.max(other.0))
136 }
137
138 fn clamp(&self, min: &Self, max: &Self) -> Self {
139 F32x4(self.0.max(min.0).min(max.0))
141 }
142}
143
144impl VectorTranscendental<f32, 4> for F32x4 {
145 fn sqrt(&self) -> Self {
146 F32x4(self.0.sqrt())
147 }
148 fn exp(&self) -> Self {
149 F32x4(self.0.exp())
150 }
151 fn ln(&self) -> Self {
152 F32x4(self.0.ln())
153 }
154 fn sin(&self) -> Self {
155 F32x4(self.0.sin())
156 }
157 fn cos(&self) -> Self {
158 F32x4(self.0.cos())
159 }
160 fn tan(&self) -> Self {
161 F32x4(self.0.tan())
162 }
163}
164
165impl Vector<f32, 8> for F32x8 {
170 fn splat(value: f32) -> Self {
171 F32x8(f32x8::splat(value))
172 }
173
174 fn load(slice: &[f32]) -> Self {
175 let mut arr = [0.0f32; 8];
176 arr.copy_from_slice(&slice[0..8]);
177 F32x8(f32x8::from(arr))
178 }
179
180 fn store(&self, slice: &mut [f32]) {
181 let arr: [f32; 8] = self.0.into();
182 slice[0..8].copy_from_slice(&arr);
183 }
184
185 fn extract(&self, index: usize) -> f32 {
186 let arr: [f32; 8] = self.0.into();
187 arr[index]
188 }
189
190 fn insert(&self, index: usize, value: f32) -> Self {
191 let mut arr: [f32; 8] = self.0.into();
192 arr[index] = value;
193 F32x8(f32x8::from(arr))
194 }
195
196 fn add(&self, other: &Self) -> Self {
197 F32x8(self.0 + other.0)
198 }
199
200 fn sub(&self, other: &Self) -> Self {
201 F32x8(self.0 - other.0)
202 }
203
204 fn mul(&self, other: &Self) -> Self {
205 F32x8(self.0 * other.0)
206 }
207
208 fn div(&self, other: &Self) -> Self {
209 F32x8(self.0 / other.0)
210 }
211
212 fn rem(&self, other: &Self) -> Self {
213 let a: [f32; 8] = self.0.into();
214 let b: [f32; 8] = other.0.into();
215 let mut arr = [0.0f32; 8];
216 for i in 0..8 {
217 arr[i] = a[i] % b[i];
218 }
219 F32x8(f32x8::from(arr))
220 }
221
222 fn neg(&self) -> Self {
223 F32x8(-self.0)
224 }
225
226 fn abs(&self) -> Self {
227 F32x8(self.0.abs())
228 }
229
230 fn min(&self, other: &Self) -> Self {
231 F32x8(self.0.min(other.0))
232 }
233
234 fn max(&self, other: &Self) -> Self {
235 F32x8(self.0.max(other.0))
236 }
237
238 fn clamp(&self, min: &Self, max: &Self) -> Self {
239 F32x8(self.0.max(min.0).min(max.0))
240 }
241}
242
243impl VectorTranscendental<f32, 8> for F32x8 {
244 fn sqrt(&self) -> Self {
245 F32x8(self.0.sqrt())
246 }
247 fn exp(&self) -> Self {
248 F32x8(self.0.exp())
249 }
250 fn ln(&self) -> Self {
251 F32x8(self.0.ln())
252 }
253 fn sin(&self) -> Self {
254 F32x8(self.0.sin())
255 }
256 fn cos(&self) -> Self {
257 F32x8(self.0.cos())
258 }
259 fn tan(&self) -> Self {
260 F32x8(self.0.tan())
261 }
262}
263
264impl Vector<f64, 2> for F64x2 {
269 fn splat(value: f64) -> Self {
270 F64x2(f64x2::splat(value))
271 }
272
273 fn load(slice: &[f64]) -> Self {
274 let mut arr = [0.0f64; 2];
275 arr.copy_from_slice(&slice[0..2]);
276 F64x2(f64x2::from(arr))
277 }
278
279 fn store(&self, slice: &mut [f64]) {
280 let arr: [f64; 2] = self.0.into();
281 slice[0..2].copy_from_slice(&arr);
282 }
283
284 fn extract(&self, index: usize) -> f64 {
285 let arr: [f64; 2] = self.0.into();
286 arr[index]
287 }
288
289 fn insert(&self, index: usize, value: f64) -> Self {
290 let mut arr: [f64; 2] = self.0.into();
291 arr[index] = value;
292 F64x2(f64x2::from(arr))
293 }
294
295 fn add(&self, other: &Self) -> Self {
296 F64x2(self.0 + other.0)
297 }
298
299 fn sub(&self, other: &Self) -> Self {
300 F64x2(self.0 - other.0)
301 }
302
303 fn mul(&self, other: &Self) -> Self {
304 F64x2(self.0 * other.0)
305 }
306
307 fn div(&self, other: &Self) -> Self {
308 F64x2(self.0 / other.0)
309 }
310
311 fn rem(&self, other: &Self) -> Self {
312 let a: [f64; 2] = self.0.into();
313 let b: [f64; 2] = other.0.into();
314 let mut arr = [0.0f64; 2];
315 for i in 0..2 {
316 arr[i] = a[i] % b[i];
317 }
318 F64x2(f64x2::from(arr))
319 }
320
321 fn neg(&self) -> Self {
322 F64x2(-self.0)
323 }
324
325 fn abs(&self) -> Self {
326 F64x2(self.0.abs())
327 }
328
329 fn min(&self, other: &Self) -> Self {
330 F64x2(self.0.min(other.0))
331 }
332
333 fn max(&self, other: &Self) -> Self {
334 F64x2(self.0.max(other.0))
335 }
336
337 fn clamp(&self, min: &Self, max: &Self) -> Self {
338 F64x2(self.0.max(min.0).min(max.0))
339 }
340}
341
342impl VectorTranscendental<f64, 2> for F64x2 {
343 fn sqrt(&self) -> Self {
344 F64x2(self.0.sqrt())
345 }
346 fn exp(&self) -> Self {
347 F64x2(self.0.exp())
348 }
349 fn ln(&self) -> Self {
350 F64x2(self.0.ln())
351 }
352 fn sin(&self) -> Self {
353 F64x2(self.0.sin())
354 }
355 fn cos(&self) -> Self {
356 F64x2(self.0.cos())
357 }
358 fn tan(&self) -> Self {
359 F64x2(self.0.tan())
360 }
361}
362
363impl Vector<f64, 4> for F64x4 {
368 fn splat(value: f64) -> Self {
369 F64x4(f64x4::splat(value))
370 }
371
372 fn load(slice: &[f64]) -> Self {
373 let mut arr = [0.0f64; 4];
374 arr.copy_from_slice(&slice[0..4]);
375 F64x4(f64x4::from(arr))
376 }
377
378 fn store(&self, slice: &mut [f64]) {
379 let arr: [f64; 4] = self.0.into();
380 slice[0..4].copy_from_slice(&arr);
381 }
382
383 fn extract(&self, index: usize) -> f64 {
384 let arr: [f64; 4] = self.0.into();
385 arr[index]
386 }
387
388 fn insert(&self, index: usize, value: f64) -> Self {
389 let mut arr: [f64; 4] = self.0.into();
390 arr[index] = value;
391 F64x4(f64x4::from(arr))
392 }
393
394 fn add(&self, other: &Self) -> Self {
395 F64x4(self.0 + other.0)
396 }
397
398 fn sub(&self, other: &Self) -> Self {
399 F64x4(self.0 - other.0)
400 }
401
402 fn mul(&self, other: &Self) -> Self {
403 F64x4(self.0 * other.0)
404 }
405
406 fn div(&self, other: &Self) -> Self {
407 F64x4(self.0 / other.0)
408 }
409
410 fn rem(&self, other: &Self) -> Self {
411 let a: [f64; 4] = self.0.into();
412 let b: [f64; 4] = other.0.into();
413 let mut arr = [0.0f64; 4];
414 for i in 0..4 {
415 arr[i] = a[i] % b[i];
416 }
417 F64x4(f64x4::from(arr))
418 }
419
420 fn neg(&self) -> Self {
421 F64x4(-self.0)
422 }
423
424 fn abs(&self) -> Self {
425 F64x4(self.0.abs())
426 }
427
428 fn min(&self, other: &Self) -> Self {
429 F64x4(self.0.min(other.0))
430 }
431
432 fn max(&self, other: &Self) -> Self {
433 F64x4(self.0.max(other.0))
434 }
435
436 fn clamp(&self, min: &Self, max: &Self) -> Self {
437 F64x4(self.0.max(min.0).min(max.0))
438 }
439}
440
441impl VectorTranscendental<f64, 4> for F64x4 {
442 fn sqrt(&self) -> Self {
443 F64x4(self.0.sqrt())
444 }
445 fn exp(&self) -> Self {
446 F64x4(self.0.exp())
447 }
448 fn ln(&self) -> Self {
449 F64x4(self.0.ln())
450 }
451 fn sin(&self) -> Self {
452 F64x4(self.0.sin())
453 }
454 fn cos(&self) -> Self {
455 F64x4(self.0.cos())
456 }
457 fn tan(&self) -> Self {
458 F64x4(self.0.tan())
459 }
460}
461
462impl VectorMask<f64, 4> for F64x4 {
467 type Mask = F64x4;
470
471 fn eq(&self, other: &Self) -> F64x4 {
472 F64x4(self.0.cmp_eq(other.0))
473 }
474
475 fn ne(&self, other: &Self) -> F64x4 {
476 F64x4(self.0.cmp_ne(other.0))
477 }
478
479 fn gt(&self, other: &Self) -> F64x4 {
480 F64x4(self.0.cmp_gt(other.0))
481 }
482
483 fn ge(&self, other: &Self) -> F64x4 {
484 F64x4(self.0.cmp_ge(other.0))
485 }
486
487 fn lt(&self, other: &Self) -> F64x4 {
488 F64x4(self.0.cmp_lt(other.0))
489 }
490
491 fn le(&self, other: &Self) -> F64x4 {
492 F64x4(self.0.cmp_le(other.0))
493 }
494
495 fn select(&self, other: &Self, mask: F64x4) -> Self {
496 F64x4(mask.0.blend(self.0, other.0))
499 }
500
501 fn all(mask: &F64x4) -> bool {
502 mask.0.move_mask() == 0b1111
505 }
506}
507
508impl Add for F32x4 {
513 type Output = Self;
514 fn add(self, rhs: Self) -> Self {
515 Self(self.0 + rhs.0)
516 }
517}
518
519impl Sub for F32x4 {
520 type Output = Self;
521 fn sub(self, rhs: Self) -> Self {
522 Self(self.0 - rhs.0)
523 }
524}
525
526impl Mul for F32x4 {
527 type Output = Self;
528 fn mul(self, rhs: Self) -> Self {
529 Self(self.0 * rhs.0)
530 }
531}
532
533impl Div for F32x4 {
534 type Output = Self;
535 fn div(self, rhs: Self) -> Self {
536 Self(self.0 / rhs.0)
537 }
538}
539
540impl Rem for F32x4 {
541 type Output = Self;
542 fn rem(self, rhs: Self) -> Self {
543 let a: [f32; 4] = self.0.into();
544 let b: [f32; 4] = rhs.0.into();
545 let mut arr = [0.0f32; 4];
546 for i in 0..4 {
547 arr[i] = a[i] % b[i];
548 }
549 Self(f32x4::from(arr))
550 }
551}
552
553impl Neg for F32x4 {
554 type Output = Self;
555 fn neg(self) -> Self {
556 Self(-self.0)
557 }
558}
559
560impl Add for F32x8 {
563 type Output = Self;
564 fn add(self, rhs: Self) -> Self {
565 Self(self.0 + rhs.0)
566 }
567}
568
569impl Sub for F32x8 {
570 type Output = Self;
571 fn sub(self, rhs: Self) -> Self {
572 Self(self.0 - rhs.0)
573 }
574}
575
576impl Mul for F32x8 {
577 type Output = Self;
578 fn mul(self, rhs: Self) -> Self {
579 Self(self.0 * rhs.0)
580 }
581}
582
583impl Div for F32x8 {
584 type Output = Self;
585 fn div(self, rhs: Self) -> Self {
586 Self(self.0 / rhs.0)
587 }
588}
589
590impl Rem for F32x8 {
591 type Output = Self;
592 fn rem(self, rhs: Self) -> Self {
593 let a: [f32; 8] = self.0.into();
594 let b: [f32; 8] = rhs.0.into();
595 let mut arr = [0.0f32; 8];
596 for i in 0..8 {
597 arr[i] = a[i] % b[i];
598 }
599 Self(f32x8::from(arr))
600 }
601}
602
603impl Neg for F32x8 {
604 type Output = Self;
605 fn neg(self) -> Self {
606 Self(-self.0)
607 }
608}
609
610impl Add for F64x2 {
611 type Output = Self;
612 fn add(self, rhs: Self) -> Self {
613 Self(self.0 + rhs.0)
614 }
615}
616
617impl Sub for F64x2 {
618 type Output = Self;
619 fn sub(self, rhs: Self) -> Self {
620 Self(self.0 - rhs.0)
621 }
622}
623
624impl Mul for F64x2 {
625 type Output = Self;
626 fn mul(self, rhs: Self) -> Self {
627 Self(self.0 * rhs.0)
628 }
629}
630
631impl Div for F64x2 {
632 type Output = Self;
633 fn div(self, rhs: Self) -> Self {
634 Self(self.0 / rhs.0)
635 }
636}
637
638impl Rem for F64x2 {
639 type Output = Self;
640 fn rem(self, rhs: Self) -> Self {
641 let a: [f64; 2] = self.0.into();
642 let b: [f64; 2] = rhs.0.into();
643 let mut arr = [0.0f64; 2];
644 for i in 0..2 {
645 arr[i] = a[i] % b[i];
646 }
647 Self(f64x2::from(arr))
648 }
649}
650
651impl Neg for F64x2 {
652 type Output = Self;
653 fn neg(self) -> Self {
654 Self(-self.0)
655 }
656}
657
658impl Add for F64x4 {
659 type Output = Self;
660 fn add(self, rhs: Self) -> Self {
661 Self(self.0 + rhs.0)
662 }
663}
664
665impl Sub for F64x4 {
666 type Output = Self;
667 fn sub(self, rhs: Self) -> Self {
668 Self(self.0 - rhs.0)
669 }
670}
671
672impl Mul for F64x4 {
673 type Output = Self;
674 fn mul(self, rhs: Self) -> Self {
675 Self(self.0 * rhs.0)
676 }
677}
678
679impl Div for F64x4 {
680 type Output = Self;
681 fn div(self, rhs: Self) -> Self {
682 Self(self.0 / rhs.0)
683 }
684}
685
686impl Rem for F64x4 {
687 type Output = Self;
688 fn rem(self, rhs: Self) -> Self {
689 let a: [f64; 4] = self.0.into();
690 let b: [f64; 4] = rhs.0.into();
691 let mut arr = [0.0f64; 4];
692 for i in 0..4 {
693 arr[i] = a[i] % b[i];
694 }
695 Self(f64x4::from(arr))
696 }
697}
698
699impl Neg for F64x4 {
700 type Output = Self;
701 fn neg(self) -> Self {
702 Self(-self.0)
703 }
704}
705
706#[cfg(test)]
711mod tests {
712 use super::*;
713 use crate::math::vector::traits::VectorMask;
714
715 #[test]
716 fn test_f32x4_basic() {
717 let a = F32x4::load(&[1.0, 2.0, 3.0, 4.0]);
718 let b = F32x4::load(&[5.0, 6.0, 7.0, 8.0]);
719
720 let c = a + b;
721 let mut arr = [0.0f32; 4];
722 c.store(&mut arr);
723 assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
724
725 let c = a * b;
726 c.store(&mut arr);
727 assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
728 }
729
730 #[test]
731 fn test_f32x4_math() {
732 let a = F32x4::load(&[0.0, 0.5, 1.0, 2.0]);
733 let sin_a = a.sin();
734 let mut arr = [0.0f32; 4];
735 sin_a.store(&mut arr);
736 let expected = [0.0f32.sin(), 0.5f32.sin(), 1.0f32.sin(), 2.0f32.sin()];
737 for i in 0..4 {
738 assert!((arr[i] - expected[i]).abs() < 1e-5);
739 }
740 }
741
742 #[test]
743 fn test_f64x2_basic() {
744 let a = F64x2::load(&[1.0, 2.0]);
745 let b = F64x2::load(&[3.0, 4.0]);
746
747 let c = a + b;
748 let mut arr = [0.0f64; 2];
749 c.store(&mut arr);
750 assert_eq!(arr, [4.0, 6.0]);
751 }
752
753 #[test]
754 fn test_f64x4_basic() {
755 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
756 let b = F64x4::load(&[5.0, 6.0, 7.0, 8.0]);
757
758 let c = a + b;
759 let mut arr = [0.0f64; 4];
760 c.store(&mut arr);
761 assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
762
763 let c = a * b;
764 c.store(&mut arr);
765 assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
766 }
767
768 #[test]
769 fn test_f64x4_math() {
770 let a = F64x4::load(&[0.0, 0.5, 1.0, 2.0]);
771 let sqrt_a = a.sqrt();
772 let mut arr = [0.0f64; 4];
773 sqrt_a.store(&mut arr);
774 let expected = [0.0f64.sqrt(), 0.5f64.sqrt(), 1.0f64.sqrt(), 2.0f64.sqrt()];
775 for i in 0..4 {
776 assert!((arr[i] - expected[i]).abs() < 1e-12);
777 }
778
779 let exp_a = a.exp();
780 exp_a.store(&mut arr);
781 let expected = [0.0f64.exp(), 0.5f64.exp(), 1.0f64.exp(), 2.0f64.exp()];
782 for i in 0..4 {
783 assert!((arr[i] - expected[i]).abs() < 1e-12);
784 }
785 }
786
787 #[test]
788 fn test_f64x4_vector_mask_lt() {
789 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
792 let b = F64x4::load(&[3.0, 3.0, 3.0, 3.0]);
793 let mask = <F64x4 as VectorMask<f64, 4>>::lt(&a, &b);
794 assert_eq!(mask.0.move_mask() & 0b1111, 0b0011); }
797
798 #[test]
799 fn test_f64x4_vector_mask_gt() {
800 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
801 let b = F64x4::load(&[2.0, 2.0, 2.0, 2.0]);
802 let mask = <F64x4 as VectorMask<f64, 4>>::gt(&a, &b);
803 assert_eq!(mask.0.move_mask() & 0b1111, 0b1100); }
805
806 #[test]
807 fn test_f64x4_vector_mask_eq() {
808 let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
809 let b = F64x4::load(&[1.0, 0.0, 3.0, 5.0]);
810 let mask = <F64x4 as VectorMask<f64, 4>>::eq(&a, &b);
811 assert_eq!(mask.0.move_mask() & 0b1111, 0b0101); }
813
814 #[test]
815 fn test_f64x4_vector_mask_all() {
816 let all_true = <F64x4 as VectorMask<f64, 4>>::lt(&F64x4::splat(1.0), &F64x4::splat(2.0));
817 assert!(<F64x4 as VectorMask<f64, 4>>::all(&all_true));
818
819 let partial_true = <F64x4 as VectorMask<f64, 4>>::lt(
820 &F64x4::load(&[1.0, 2.0, 3.0, 4.0]),
821 &F64x4::splat(3.0),
822 );
823 assert!(!<F64x4 as VectorMask<f64, 4>>::all(&partial_true));
824 }
825
826 #[test]
827 fn test_f64x4_vector_mask_select() {
828 let true_vals = F64x4::load(&[10.0, 20.0, 30.0, 40.0]);
829 let false_vals = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
830 let threshold = F64x4::load(&[5.0, 25.0, 25.0, 25.0]);
832 let mask = <F64x4 as VectorMask<f64, 4>>::lt(&true_vals, &threshold);
833 let selected = <F64x4 as VectorMask<f64, 4>>::select(&true_vals, &false_vals, mask);
834 assert_eq!(mask.0.move_mask() & 0b1111, 0b0010);
839 let mut arr = [0.0; 4];
841 selected.store(&mut arr);
842 assert!((arr[0] - 1.0).abs() < 1e-15);
843 assert!((arr[1] - 20.0).abs() < 1e-15);
844 assert!((arr[2] - 3.0).abs() < 1e-15);
845 assert!((arr[3] - 4.0).abs() < 1e-15);
846 }
847}