1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f32x8 { avx: m256 }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f32x8 { a : f32x4, b : f32x4 }
12 }
13}
14
15macro_rules! const_f32_as_f32x8 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f32x8 = f32x8::new([$f; 8]);
19 };
20}
21
22impl f32x8 {
23 const_f32_as_f32x8!(ONE, 1.0);
24 const_f32_as_f32x8!(HALF, 0.5);
25 const_f32_as_f32x8!(ZERO, 0.0);
26 const_f32_as_f32x8!(E, core::f32::consts::E);
27 const_f32_as_f32x8!(FRAC_1_PI, core::f32::consts::FRAC_1_PI);
28 const_f32_as_f32x8!(FRAC_2_PI, core::f32::consts::FRAC_2_PI);
29 const_f32_as_f32x8!(FRAC_2_SQRT_PI, core::f32::consts::FRAC_2_SQRT_PI);
30 const_f32_as_f32x8!(FRAC_1_SQRT_2, core::f32::consts::FRAC_1_SQRT_2);
31 const_f32_as_f32x8!(FRAC_PI_2, core::f32::consts::FRAC_PI_2);
32 const_f32_as_f32x8!(FRAC_PI_3, core::f32::consts::FRAC_PI_3);
33 const_f32_as_f32x8!(FRAC_PI_4, core::f32::consts::FRAC_PI_4);
34 const_f32_as_f32x8!(FRAC_PI_6, core::f32::consts::FRAC_PI_6);
35 const_f32_as_f32x8!(FRAC_PI_8, core::f32::consts::FRAC_PI_8);
36 const_f32_as_f32x8!(LN_2, core::f32::consts::LN_2);
37 const_f32_as_f32x8!(LN_10, core::f32::consts::LN_10);
38 const_f32_as_f32x8!(LOG2_E, core::f32::consts::LOG2_E);
39 const_f32_as_f32x8!(LOG10_E, core::f32::consts::LOG10_E);
40 const_f32_as_f32x8!(LOG10_2, core::f32::consts::LOG10_2);
41 const_f32_as_f32x8!(LOG2_10, core::f32::consts::LOG2_10);
42 const_f32_as_f32x8!(PI, core::f32::consts::PI);
43 const_f32_as_f32x8!(SQRT_2, core::f32::consts::SQRT_2);
44 const_f32_as_f32x8!(TAU, core::f32::consts::TAU);
45}
46
47unsafe impl Zeroable for f32x8 {}
48unsafe impl Pod for f32x8 {}
49
50impl AlignTo for f32x8 {
51 type Elem = f32;
52}
53
54impl Add for f32x8 {
55 type Output = Self;
56 #[inline]
57 fn add(self, rhs: Self) -> Self::Output {
58 pick! {
59 if #[cfg(target_feature="avx")] {
60 Self { avx: add_m256(self.avx, rhs.avx) }
61 } else {
62 Self {
63 a : self.a.add(rhs.a),
64 b : self.b.add(rhs.b),
65 }
66 }
67 }
68 }
69}
70
71impl Sub for f32x8 {
72 type Output = Self;
73 #[inline]
74 fn sub(self, rhs: Self) -> Self::Output {
75 pick! {
76 if #[cfg(target_feature="avx")] {
77 Self { avx: sub_m256(self.avx, rhs.avx) }
78 } else {
79 Self {
80 a : self.a.sub(rhs.a),
81 b : self.b.sub(rhs.b),
82 }
83 }
84 }
85 }
86}
87
88impl Mul for f32x8 {
89 type Output = Self;
90 #[inline]
91 fn mul(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="avx")] {
94 Self { avx: mul_m256(self.avx, rhs.avx) }
95 } else {
96 Self {
97 a : self.a.mul(rhs.a),
98 b : self.b.mul(rhs.b),
99 }
100 }
101 }
102 }
103}
104
105impl Div for f32x8 {
106 type Output = Self;
107 #[inline]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Neg for f32x8 {
123 type Output = Self;
124 #[inline]
125 fn neg(self) -> Self::Output {
126 pick! {
127 if #[cfg(target_feature="avx")] {
128 Self { avx: bitxor_m256(self.avx, Self::splat(-0.0).avx) }
129 } else {
130 Self {
131 a : self.a.neg(),
132 b : self.b.neg(),
133 }
134 }
135 }
136 }
137}
138
139impl Add<f32> for f32x8 {
140 type Output = Self;
141 #[inline]
142 fn add(self, rhs: f32) -> Self::Output {
143 self.add(Self::splat(rhs))
144 }
145}
146
147impl Sub<f32> for f32x8 {
148 type Output = Self;
149 #[inline]
150 fn sub(self, rhs: f32) -> Self::Output {
151 self.sub(Self::splat(rhs))
152 }
153}
154
155impl Mul<f32> for f32x8 {
156 type Output = Self;
157 #[inline]
158 fn mul(self, rhs: f32) -> Self::Output {
159 self.mul(Self::splat(rhs))
160 }
161}
162
163impl Div<f32> for f32x8 {
164 type Output = Self;
165 #[inline]
166 fn div(self, rhs: f32) -> Self::Output {
167 self.div(Self::splat(rhs))
168 }
169}
170
171impl Add<f32x8> for f32 {
172 type Output = f32x8;
173 #[inline]
174 fn add(self, rhs: f32x8) -> Self::Output {
175 f32x8::splat(self).add(rhs)
176 }
177}
178
179impl Sub<f32x8> for f32 {
180 type Output = f32x8;
181 #[inline]
182 fn sub(self, rhs: f32x8) -> Self::Output {
183 f32x8::splat(self).sub(rhs)
184 }
185}
186
187impl Mul<f32x8> for f32 {
188 type Output = f32x8;
189 #[inline]
190 fn mul(self, rhs: f32x8) -> Self::Output {
191 f32x8::splat(self).mul(rhs)
192 }
193}
194
195impl Div<f32x8> for f32 {
196 type Output = f32x8;
197 #[inline]
198 fn div(self, rhs: f32x8) -> Self::Output {
199 f32x8::splat(self).div(rhs)
200 }
201}
202
203impl BitAnd for f32x8 {
204 type Output = Self;
205 #[inline]
206 fn bitand(self, rhs: Self) -> Self::Output {
207 pick! {
208 if #[cfg(target_feature="avx")] {
209 Self { avx: bitand_m256(self.avx, rhs.avx) }
210 } else {
211 Self {
212 a : self.a.bitand(rhs.a),
213 b : self.b.bitand(rhs.b),
214 }
215 }
216 }
217 }
218}
219
220impl BitOr for f32x8 {
221 type Output = Self;
222 #[inline]
223 fn bitor(self, rhs: Self) -> Self::Output {
224 pick! {
225 if #[cfg(target_feature="avx")] {
226 Self { avx: bitor_m256(self.avx, rhs.avx) }
227 } else {
228 Self {
229 a : self.a.bitor(rhs.a),
230 b : self.b.bitor(rhs.b),
231 }
232 }
233 }
234 }
235}
236
237impl BitXor for f32x8 {
238 type Output = Self;
239 #[inline]
240 fn bitxor(self, rhs: Self) -> Self::Output {
241 pick! {
242 if #[cfg(target_feature="avx")] {
243 Self { avx: bitxor_m256(self.avx, rhs.avx) }
244 } else {
245 Self {
246 a : self.a.bitxor(rhs.a),
247 b : self.b.bitxor(rhs.b),
248 }
249 }
250 }
251 }
252}
253
254impl CmpEq for f32x8 {
255 type Output = Self;
256 #[inline]
257 fn simd_eq(self, rhs: Self) -> Self::Output {
258 pick! {
259 if #[cfg(target_feature="avx")] {
260 Self { avx: cmp_op_mask_m256::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
261 } else {
262 Self {
263 a : self.a.simd_eq(rhs.a),
264 b : self.b.simd_eq(rhs.b),
265 }
266 }
267 }
268 }
269}
270
271impl CmpGe for f32x8 {
272 type Output = Self;
273 #[inline]
274 fn simd_ge(self, rhs: Self) -> Self::Output {
275 pick! {
276 if #[cfg(target_feature="avx")] {
277 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
278 } else {
279 Self {
280 a : self.a.simd_ge(rhs.a),
281 b : self.b.simd_ge(rhs.b),
282 }
283 }
284 }
285 }
286}
287
288impl CmpGt for f32x8 {
289 type Output = Self;
290 #[inline]
291 fn simd_gt(self, rhs: Self) -> Self::Output {
292 pick! {
293 if #[cfg(target_feature="avx")] {
294 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterThanOrdered)}>(self.avx, rhs.avx) }
295 } else {
296 Self {
297 a : self.a.simd_gt(rhs.a),
298 b : self.b.simd_gt(rhs.b),
299 }
300 }
301 }
302 }
303}
304
305impl CmpNe for f32x8 {
306 type Output = Self;
307 #[inline]
308 fn simd_ne(self, rhs: Self) -> Self::Output {
309 pick! {
310 if #[cfg(target_feature="avx")] {
311 Self { avx: cmp_op_mask_m256::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
312 } else {
313 Self {
314 a : self.a.simd_ne(rhs.a),
315 b : self.b.simd_ne(rhs.b),
316 }
317 }
318 }
319 }
320}
321
322impl CmpLe for f32x8 {
323 type Output = Self;
324 #[inline]
325 fn simd_le(self, rhs: Self) -> Self::Output {
326 pick! {
327 if #[cfg(target_feature="avx")] {
328 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
329 } else {
330 Self {
331 a : self.a.simd_le(rhs.a),
332 b : self.b.simd_le(rhs.b),
333 }
334 }
335 }
336 }
337}
338
339impl CmpLt for f32x8 {
340 type Output = Self;
341 #[inline]
342 fn simd_lt(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx")] {
345 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
346 } else {
347 Self {
348 a : self.a.simd_lt(rhs.a),
349 b : self.b.simd_lt(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356impl f32x8 {
357 #[inline]
358 #[must_use]
359 pub const fn new(array: [f32; 8]) -> Self {
360 unsafe { core::mem::transmute(array) }
361 }
362 #[inline]
363 #[must_use]
364 pub fn blend(self, t: Self, f: Self) -> Self {
365 pick! {
366 if #[cfg(target_feature="avx")] {
367 Self { avx: blend_varying_m256(f.avx, t.avx, self.avx) }
368 } else {
369 Self {
370 a : self.a.blend(t.a, f.a),
371 b : self.b.blend(t.b, f.b),
372 }
373 }
374 }
375 }
376 #[inline]
377 #[must_use]
378 pub fn abs(self) -> Self {
379 pick! {
380 if #[cfg(target_feature="avx")] {
381 let non_sign_bits = f32x8::from(f32::from_bits(i32::MAX as u32));
382 self & non_sign_bits
383 } else {
384 Self {
385 a : self.a.abs(),
386 b : self.b.abs(),
387 }
388 }
389 }
390 }
391 #[inline]
392 #[must_use]
393 pub fn floor(self) -> Self {
394 pick! {
395 if #[cfg(target_feature="avx")] {
396 Self { avx: floor_m256(self.avx) }
397 } else {
398 Self {
399 a : self.a.floor(),
400 b : self.b.floor(),
401 }
402 }
403 }
404 }
405 #[inline]
406 #[must_use]
407 pub fn ceil(self) -> Self {
408 pick! {
409 if #[cfg(target_feature="avx")] {
410 Self { avx: ceil_m256(self.avx) }
411 } else {
412 Self {
413 a : self.a.ceil(),
414 b : self.b.ceil(),
415 }
416 }
417 }
418 }
419
420 #[inline]
424 #[must_use]
425 pub fn fast_max(self, rhs: Self) -> Self {
426 pick! {
427 if #[cfg(target_feature="avx")] {
428 Self { avx: max_m256(self.avx, rhs.avx) }
429 } else {
430 Self {
431 a : self.a.fast_max(rhs.a),
432 b : self.b.fast_max(rhs.b),
433 }
434 }
435 }
436 }
437
438 #[inline]
441 #[must_use]
442 pub fn max(self, rhs: Self) -> Self {
443 pick! {
444 if #[cfg(target_feature="avx")] {
445 rhs.is_nan().blend(self, Self { avx: max_m256(self.avx, rhs.avx) })
449 } else {
450 Self {
451 a : self.a.max(rhs.a),
452 b : self.b.max(rhs.b),
453 }
454 }
455
456 }
457 }
458
459 #[inline]
463 #[must_use]
464 pub fn fast_min(self, rhs: Self) -> Self {
465 pick! {
466 if #[cfg(target_feature="avx")] {
467 Self { avx: min_m256(self.avx, rhs.avx) }
468 } else {
469 Self {
470 a : self.a.fast_min(rhs.a),
471 b : self.b.fast_min(rhs.b),
472 }
473 }
474 }
475 }
476
477 #[inline]
481 #[must_use]
482 pub fn min(self, rhs: Self) -> Self {
483 pick! {
484 if #[cfg(target_feature="avx")] {
485 rhs.is_nan().blend(self, Self { avx: min_m256(self.avx, rhs.avx) })
489 } else {
490 Self {
491 a : self.a.min(rhs.a),
492 b : self.b.min(rhs.b),
493 }
494 }
495 }
496 }
497 #[inline]
498 #[must_use]
499 pub fn is_nan(self) -> Self {
500 pick! {
501 if #[cfg(target_feature="avx")] {
502 Self { avx: cmp_op_mask_m256::<{cmp_op!(Unordered)}>(self.avx, self.avx) }
503 } else {
504 Self {
505 a : self.a.is_nan(),
506 b : self.b.is_nan(),
507 }
508 }
509 }
510 }
511 #[inline]
512 #[must_use]
513 pub fn is_finite(self) -> Self {
514 let shifted_exp_mask = u32x8::from(0xFF000000);
515 let u: u32x8 = cast(self);
516 let shift_u = u << 1_u64;
517 let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
518 cast(out)
519 }
520 #[inline]
521 #[must_use]
522 pub fn is_inf(self) -> Self {
523 let shifted_inf = u32x8::from(0xFF000000);
524 let u: u32x8 = cast(self);
525 let shift_u = u << 1_u64;
526 let out = (shift_u).simd_eq(shifted_inf);
527 cast(out)
528 }
529
530 #[inline]
531 #[must_use]
532 pub fn round(self) -> Self {
533 pick! {
534 if #[cfg(target_feature="avx")] {
536 Self { avx: round_m256::<{round_op!(Nearest)}>(self.avx) }
537 } else {
538 Self {
539 a : self.a.round(),
540 b : self.b.round(),
541 }
542 }
543 }
544 }
545
546 #[inline]
550 #[must_use]
551 pub fn fast_round_int(self) -> i32x8 {
552 pick! {
553 if #[cfg(target_feature="avx")] {
554 cast(convert_to_i32_m256i_from_m256(self.avx))
555 } else {
556 cast([
557 self.a.fast_round_int(),
558 self.b.fast_round_int()])
559 }
560 }
561 }
562
563 #[inline]
567 #[must_use]
568 pub fn round_int(self) -> i32x8 {
569 pick! {
570 if #[cfg(target_feature="avx")] {
571 let non_nan_mask = self.simd_eq(self);
573 let non_nan = self & non_nan_mask;
574 let flip_to_max: i32x8 = cast(self.simd_ge(Self::splat(2147483648.0)));
575 let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
576 flip_to_max ^ cast
577 } else {
578 cast([
579 self.a.round_int(),
580 self.b.round_int(),
581 ])
582 }
583 }
584 }
585
586 #[inline]
590 #[must_use]
591 pub fn fast_trunc_int(self) -> i32x8 {
592 pick! {
593 if #[cfg(all(target_feature="avx"))] {
594 cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
595 } else {
596 cast([
597 self.a.fast_trunc_int(),
598 self.b.fast_trunc_int(),
599 ])
600 }
601 }
602 }
603
604 #[inline]
608 #[must_use]
609 pub fn trunc_int(self) -> i32x8 {
610 pick! {
611 if #[cfg(target_feature="avx")] {
612 let non_nan_mask = self.simd_eq(self);
614 let non_nan = self & non_nan_mask;
615 let flip_to_max: i32x8 = cast(self.simd_ge(Self::splat(2147483648.0)));
616 let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
617 flip_to_max ^ cast
618 } else {
619 cast([
620 self.a.trunc_int(),
621 self.b.trunc_int(),
622 ])
623 }
624 }
625 }
626 #[inline]
651 #[must_use]
652 pub fn mul_add(self, m: Self, a: Self) -> Self {
653 pick! {
654 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
655 Self { avx: fused_mul_add_m256(self.avx, m.avx, a.avx) }
656 } else if #[cfg(target_feature="avx")] {
657 (self * m) + a
659 } else {
660 Self {
661 a : self.a.mul_add(m.a, a.a),
662 b : self.b.mul_add(m.b, a.b),
663 }
664 }
665 }
666 }
667
668 #[inline]
693 #[must_use]
694 pub fn mul_sub(self, m: Self, s: Self) -> Self {
695 pick! {
696 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
697 Self { avx: fused_mul_sub_m256(self.avx, m.avx, s.avx) }
698 } else if #[cfg(target_feature="avx")] {
699 (self * m) - s
701 } else {
702 Self {
703 a : self.a.mul_sub(m.a, s.a),
704 b : self.b.mul_sub(m.b, s.b),
705 }
706 }
707 }
708 }
709
710 #[inline]
735 #[must_use]
736 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
737 pick! {
738 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
739 Self { avx: fused_mul_neg_add_m256(self.avx, m.avx, a.avx) }
740 } else if #[cfg(target_feature="avx")] {
741 a - (self * m)
743 } else {
744 Self {
745 a : self.a.mul_neg_add(m.a, a.a),
746 b : self.b.mul_neg_add(m.b, a.b),
747 }
748 }
749 }
750 }
751
752 #[inline]
777 #[must_use]
778 pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
779 pick! {
780 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
781 Self { avx: fused_mul_neg_sub_m256(self.avx, m.avx, s.avx) }
782 } else if #[cfg(target_feature="avx")] {
783 -(self * m) - s
785 } else {
786 Self {
787 a : self.a.mul_neg_sub(m.a, s.a),
788 b : self.b.mul_neg_sub(m.b, s.b),
789 }
790 }
791 }
792 }
793
794 #[inline]
795 #[must_use]
796 pub fn flip_signs(self, signs: Self) -> Self {
797 self ^ (signs & Self::from(-0.0))
798 }
799
800 #[inline]
801 #[must_use]
802 pub fn copysign(self, sign: Self) -> Self {
803 let magnitude_mask = Self::from(f32::from_bits(u32::MAX >> 1));
804 (self & magnitude_mask) | (sign & Self::from(-0.0))
805 }
806
807 #[inline]
808 pub fn asin_acos(self) -> (Self, Self) {
809 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
812 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
813 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
814 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
815 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
816
817 let xa = self.abs();
818 let big = xa.simd_ge(f32x8::splat(0.5));
819
820 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
821 let x2 = xa * xa;
822 let x3 = big.blend(x1, x2);
823
824 let xb = x1.sqrt();
825
826 let x4 = big.blend(xb, xa);
827
828 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
829 let z = z.mul_add(x3 * x4, x4);
830
831 let z1 = z + z;
832
833 let z3 = self.simd_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
835 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
836 let acos = big.blend(z3, z4);
837
838 let z3 = f32x8::FRAC_PI_2 - z1;
840 let asin = big.blend(z3, z);
841 let asin = asin.flip_signs(self);
842
843 (asin, acos)
844 }
845
846 #[inline]
847 #[must_use]
848 pub fn asin(self) -> Self {
849 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
852 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
853 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
854 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
855 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
856
857 let xa = self.abs();
858 let big = xa.simd_ge(f32x8::splat(0.5));
859
860 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
861 let x2 = xa * xa;
862 let x3 = big.blend(x1, x2);
863
864 let xb = x1.sqrt();
865
866 let x4 = big.blend(xb, xa);
867
868 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
869 let z = z.mul_add(x3 * x4, x4);
870
871 let z1 = z + z;
872
873 let z3 = f32x8::FRAC_PI_2 - z1;
875 let asin = big.blend(z3, z);
876 let asin = asin.flip_signs(self);
877
878 asin
879 }
880
881 #[inline]
882 #[must_use]
883 pub fn acos(self) -> Self {
884 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
887 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
888 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
889 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
890 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
891
892 let xa = self.abs();
893 let big = xa.simd_ge(f32x8::splat(0.5));
894
895 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
896 let x2 = xa * xa;
897 let x3 = big.blend(x1, x2);
898
899 let xb = x1.sqrt();
900
901 let x4 = big.blend(xb, xa);
902
903 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
904 let z = z.mul_add(x3 * x4, x4);
905
906 let z1 = z + z;
907
908 let z3 = self.simd_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
910 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
911 let acos = big.blend(z3, z4);
912
913 acos
914 }
915
916 #[inline]
917 pub fn atan(self) -> Self {
918 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
921 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
922 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
923 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
924
925 let t = self.abs();
926
927 let notsmal = t.simd_ge(Self::SQRT_2 - Self::ONE);
931 let notbig = t.simd_le(Self::SQRT_2 + Self::ONE);
932
933 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
934 s = notsmal & s;
935
936 let mut a = notbig & t;
937 a = notsmal.blend(a - Self::ONE, a);
938 let mut b = notbig & Self::ONE;
939 b = notsmal.blend(b + t, b);
940 let z = a / b;
941
942 let zz = z * z;
943
944 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
946 re = re.mul_add(zz * z, z) + s;
947
948 re = (self.sign_bit()).blend(-re, re);
950
951 re
952 }
953
954 #[inline]
955 pub fn atan2(self, x: Self) -> Self {
956 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
959 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
960 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
961 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
962
963 let y = self;
964
965 let x1 = x.abs();
967 let y1 = y.abs();
968 let swapxy = y1.simd_gt(x1);
969 let mut x2 = swapxy.blend(y1, x1);
971 let mut y2 = swapxy.blend(x1, y1);
972
973 let both_infinite = x.is_inf() & y.is_inf();
975 if both_infinite.any() {
976 let minus_one = -Self::ONE;
977 x2 = both_infinite.blend(x2 & minus_one, x2);
978 y2 = both_infinite.blend(y2 & minus_one, y2);
979 }
980
981 let t = y2 / x2;
983
984 let notsmal = t.simd_ge(Self::SQRT_2 - Self::ONE);
987
988 let a = notsmal.blend(t - Self::ONE, t);
989 let b = notsmal.blend(t + Self::ONE, Self::ONE);
990 let s = notsmal & Self::FRAC_PI_4;
991 let z = a / b;
992
993 let zz = z * z;
994
995 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
997 re = re.mul_add(zz * z, z) + s;
998
999 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1001 re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
1002 re = (x.sign_bit()).blend(Self::PI - re, re);
1003
1004 re = (y.sign_bit()).blend(-re, re);
1006
1007 re
1008 }
1009
1010 #[inline]
1011 #[must_use]
1012 pub fn sin_cos(self) -> (Self, Self) {
1013 const_f32_as_f32x8!(DP1F, 0.78515625_f32 * 2.0);
1017 const_f32_as_f32x8!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
1018 const_f32_as_f32x8!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
1019
1020 const_f32_as_f32x8!(P0sinf, -1.6666654611E-1);
1021 const_f32_as_f32x8!(P1sinf, 8.3321608736E-3);
1022 const_f32_as_f32x8!(P2sinf, -1.9515295891E-4);
1023
1024 const_f32_as_f32x8!(P0cosf, 4.166664568298827E-2);
1025 const_f32_as_f32x8!(P1cosf, -1.388731625493765E-3);
1026 const_f32_as_f32x8!(P2cosf, 2.443315711809948E-5);
1027
1028 const_f32_as_f32x8!(TWO_OVER_PI, 2.0 / core::f32::consts::PI);
1029
1030 let xa = self.abs();
1031
1032 let y = (xa * TWO_OVER_PI).round();
1034 let q: i32x8 = y.round_int();
1035
1036 let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
1037
1038 let x2 = x * x;
1039 let mut s = polynomial_2!(x2, P0sinf, P1sinf, P2sinf) * (x * x2) + x;
1040 let mut c = polynomial_2!(x2, P0cosf, P1cosf, P2cosf) * (x2 * x2)
1041 + f32x8::from(0.5).mul_neg_add(x2, f32x8::from(1.0));
1042
1043 let swap = !(q & i32x8::from(1)).simd_eq(i32x8::from(0));
1044
1045 let mut overflow: f32x8 = cast(q.simd_gt(i32x8::from(0x2000000)));
1046 overflow &= xa.is_finite();
1047 s = overflow.blend(f32x8::from(0.0), s);
1048 c = overflow.blend(f32x8::from(1.0), c);
1049
1050 let mut sin1 = cast::<_, f32x8>(swap).blend(c, s);
1052 let sign_sin: i32x8 = (q << 30) ^ cast::<_, i32x8>(self);
1053 sin1 = sin1.flip_signs(cast(sign_sin));
1054
1055 let mut cos1 = cast::<_, f32x8>(swap).blend(s, c);
1057 let sign_cos: i32x8 = ((q + i32x8::from(1)) & i32x8::from(2)) << 30;
1058 cos1 ^= cast::<_, f32x8>(sign_cos);
1059
1060 (sin1, cos1)
1061 }
1062 #[inline]
1063 #[must_use]
1064 pub fn sin(self) -> Self {
1065 let (s, _) = self.sin_cos();
1066 s
1067 }
1068 #[inline]
1069 #[must_use]
1070 pub fn cos(self) -> Self {
1071 let (_, c) = self.sin_cos();
1072 c
1073 }
1074 #[inline]
1075 #[must_use]
1076 pub fn tan(self) -> Self {
1077 let (s, c) = self.sin_cos();
1078 s / c
1079 }
1080 #[inline]
1081 #[must_use]
1082 pub fn to_degrees(self) -> Self {
1083 const_f32_as_f32x8!(RAD_TO_DEG_RATIO, 180.0_f32 / core::f32::consts::PI);
1084 self * RAD_TO_DEG_RATIO
1085 }
1086 #[inline]
1087 #[must_use]
1088 pub fn to_radians(self) -> Self {
1089 const_f32_as_f32x8!(DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32);
1090 self * DEG_TO_RAD_RATIO
1091 }
1092 #[inline]
1093 #[must_use]
1094 pub fn recip(self) -> Self {
1095 pick! {
1096 if #[cfg(target_feature="avx")] {
1097 Self { avx: reciprocal_m256(self.avx) }
1098 } else {
1099 Self {
1100 a : self.a.recip(),
1101 b : self.b.recip(),
1102 }
1103 }
1104 }
1105 }
1106 #[inline]
1107 #[must_use]
1108 pub fn recip_sqrt(self) -> Self {
1109 pick! {
1110 if #[cfg(target_feature="avx")] {
1111 Self { avx: reciprocal_sqrt_m256(self.avx) }
1112 } else {
1113 Self {
1114 a : self.a.recip_sqrt(),
1115 b : self.b.recip_sqrt(),
1116 }
1117 }
1118 }
1119 }
1120 #[inline]
1121 #[must_use]
1122 pub fn sqrt(self) -> Self {
1123 pick! {
1124 if #[cfg(target_feature="avx")] {
1125 Self { avx: sqrt_m256(self.avx) }
1126 } else {
1127 Self {
1128 a : self.a.sqrt(),
1129 b : self.b.sqrt(),
1130 }
1131 }
1132 }
1133 }
1134 #[inline]
1135 #[must_use]
1136 #[doc(alias("movemask", "move_mask"))]
1137 pub fn to_bitmask(self) -> u32 {
1138 pick! {
1139 if #[cfg(target_feature="avx")] {
1140 move_mask_m256(self.avx) as u32
1141 } else {
1142 (self.b.to_bitmask() << 4) | self.a.to_bitmask()
1143 }
1144 }
1145 }
1146 #[inline]
1147 #[must_use]
1148 pub fn any(self) -> bool {
1149 pick! {
1150 if #[cfg(target_feature="avx")] {
1151 move_mask_m256(self.avx) != 0
1152 } else {
1153 self.a.any() || self.b.any()
1154 }
1155 }
1156 }
1157 #[inline]
1158 #[must_use]
1159 pub fn all(self) -> bool {
1160 pick! {
1161 if #[cfg(target_feature="avx")] {
1162 move_mask_m256(self.avx) == 0b11111111
1163 } else {
1164 self.a.all() && self.b.all()
1165 }
1166 }
1167 }
1168 #[inline]
1169 #[must_use]
1170 pub fn none(self) -> bool {
1171 !self.any()
1172 }
1173
1174 #[inline]
1175 fn vm_pow2n(self) -> Self {
1176 const_f32_as_f32x8!(pow2_23, 8388608.0);
1177 const_f32_as_f32x8!(bias, 127.0);
1178 let a = self + (bias + pow2_23);
1179 let c = cast::<_, i32x8>(a) << 23;
1180 cast::<_, f32x8>(c)
1181 }
1182
1183 #[inline]
1185 #[must_use]
1186 pub fn exp(self) -> Self {
1187 const_f32_as_f32x8!(P0, 1.0 / 2.0);
1188 const_f32_as_f32x8!(P1, 1.0 / 6.0);
1189 const_f32_as_f32x8!(P2, 1. / 24.);
1190 const_f32_as_f32x8!(P3, 1. / 120.);
1191 const_f32_as_f32x8!(P4, 1. / 720.);
1192 const_f32_as_f32x8!(P5, 1. / 5040.);
1193 const_f32_as_f32x8!(LN2D_HI, 0.693359375);
1194 const_f32_as_f32x8!(LN2D_LO, -2.12194440e-4);
1195 let max_x = f32x8::from(87.3);
1196 let r = (self * Self::LOG2_E).round();
1197 let x = r.mul_neg_add(LN2D_HI, self);
1198 let x = r.mul_neg_add(LN2D_LO, x);
1199 let z = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1200 let x2 = x * x;
1201 let z = z.mul_add(x2, x);
1202 let n2 = Self::vm_pow2n(r);
1203 let z = (z + Self::ONE) * n2;
1204 let in_range = self.abs().simd_lt(max_x);
1206 let in_range = in_range & self.is_finite();
1207 in_range.blend(z, Self::ZERO)
1208 }
1209
1210 #[inline]
1211 fn exponent(self) -> f32x8 {
1212 const_f32_as_f32x8!(pow2_23, 8388608.0);
1213 const_f32_as_f32x8!(bias, 127.0);
1214 let a = cast::<_, u32x8>(self);
1215 let b = a >> 23;
1216 let c = b | cast::<_, u32x8>(pow2_23);
1217 let d = cast::<_, f32x8>(c);
1218 let e = d - (pow2_23 + bias);
1219 e
1220 }
1221
1222 #[inline]
1223 fn fraction_2(self) -> Self {
1224 let t1 = cast::<_, u32x8>(self);
1225 let t2 = cast::<_, u32x8>(
1226 (t1 & u32x8::from(0x007FFFFF)) | u32x8::from(0x3F000000),
1227 );
1228 cast::<_, f32x8>(t2)
1229 }
1230 #[inline]
1231 fn is_zero_or_subnormal(self) -> Self {
1232 let t = cast::<_, i32x8>(self);
1233 let t = t & i32x8::splat(0x7F800000);
1234 i32x8::round_float(t.simd_eq(i32x8::splat(0)))
1235 }
1236 #[inline]
1237 fn infinity() -> Self {
1238 cast::<_, f32x8>(i32x8::splat(0x7F800000))
1239 }
1240 #[inline]
1241 fn nan_log() -> Self {
1242 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1243 }
1244 #[inline]
1245 fn nan_pow() -> Self {
1246 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1247 }
1248 #[inline]
1249 pub fn sign_bit(self) -> Self {
1250 let t1 = cast::<_, i32x8>(self);
1251 let t2 = t1 >> 31;
1252 !cast::<_, f32x8>(t2).simd_eq(f32x8::ZERO)
1253 }
1254
1255 #[inline]
1257 #[must_use]
1258 pub fn reduce_add(self) -> f32 {
1259 pick! {
1260 if #[cfg(target_feature="avx")]{
1262 let hi_quad = extract_m128_from_m256::<1>(self.avx);
1263 let lo_quad = cast_to_m128_from_m256(self.avx);
1264 let sum_quad = add_m128(lo_quad,hi_quad);
1265 let lo_dual = sum_quad;
1266 let hi_dual = move_high_low_m128(sum_quad,sum_quad);
1267 let sum_dual = add_m128(lo_dual,hi_dual);
1268 let lo = sum_dual;
1269 let hi = shuffle_abi_f32_all_m128::<0b_01>(sum_dual, sum_dual);
1270 let sum = add_m128_s(lo, hi);
1271 get_f32_from_m128_s(sum)
1272 } else {
1273 self.a.reduce_add() + self.b.reduce_add()
1274 }
1275 }
1276 }
1277
1278 #[inline]
1280 #[must_use]
1281 pub fn ln(self) -> Self {
1282 const_f32_as_f32x8!(HALF, 0.5);
1283 const_f32_as_f32x8!(P0, 3.3333331174E-1);
1284 const_f32_as_f32x8!(P1, -2.4999993993E-1);
1285 const_f32_as_f32x8!(P2, 2.0000714765E-1);
1286 const_f32_as_f32x8!(P3, -1.6668057665E-1);
1287 const_f32_as_f32x8!(P4, 1.4249322787E-1);
1288 const_f32_as_f32x8!(P5, -1.2420140846E-1);
1289 const_f32_as_f32x8!(P6, 1.1676998740E-1);
1290 const_f32_as_f32x8!(P7, -1.1514610310E-1);
1291 const_f32_as_f32x8!(P8, 7.0376836292E-2);
1292 const_f32_as_f32x8!(LN2F_HI, 0.693359375);
1293 const_f32_as_f32x8!(LN2F_LO, -2.12194440e-4);
1294 const_f32_as_f32x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1295
1296 let x1 = self;
1297 let x = Self::fraction_2(x1);
1298 let e = Self::exponent(x1);
1299 let mask = x.simd_gt(Self::SQRT_2 * HALF);
1300 let x = (!mask).blend(x + x, x);
1301 let fe = mask.blend(e + Self::ONE, e);
1302 let x = x - Self::ONE;
1303 let res = polynomial_8!(x, P0, P1, P2, P3, P4, P5, P6, P7, P8);
1304 let x2 = x * x;
1305 let res = x2 * x * res;
1306 let res = fe.mul_add(LN2F_LO, res);
1307 let res = res + x2.mul_neg_add(HALF, x);
1308 let res = fe.mul_add(LN2F_HI, res);
1309 let overflow = !self.is_finite();
1310 let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1311 let mask = overflow | underflow;
1312 if !mask.any() {
1313 res
1314 } else {
1315 let is_zero = self.is_zero_or_subnormal();
1316 let res = underflow.blend(Self::nan_log(), res);
1317 let res = is_zero.blend(Self::infinity(), res);
1318 let res = overflow.blend(self, res);
1319 res
1320 }
1321 }
1322
1323 #[inline]
1324 #[must_use]
1325 pub fn log2(self) -> Self {
1326 Self::ln(self) * Self::LOG2_E
1327 }
1328 #[inline]
1329 #[must_use]
1330 pub fn log10(self) -> Self {
1331 Self::ln(self) * Self::LOG10_E
1332 }
1333
1334 #[inline]
1335 #[must_use]
1336 pub fn pow_f32x8(self, y: Self) -> Self {
1337 const_f32_as_f32x8!(ln2f_hi, 0.693359375);
1338 const_f32_as_f32x8!(ln2f_lo, -2.12194440e-4);
1339 const_f32_as_f32x8!(P0logf, 3.3333331174E-1);
1340 const_f32_as_f32x8!(P1logf, -2.4999993993E-1);
1341 const_f32_as_f32x8!(P2logf, 2.0000714765E-1);
1342 const_f32_as_f32x8!(P3logf, -1.6668057665E-1);
1343 const_f32_as_f32x8!(P4logf, 1.4249322787E-1);
1344 const_f32_as_f32x8!(P5logf, -1.2420140846E-1);
1345 const_f32_as_f32x8!(P6logf, 1.1676998740E-1);
1346 const_f32_as_f32x8!(P7logf, -1.1514610310E-1);
1347 const_f32_as_f32x8!(P8logf, 7.0376836292E-2);
1348
1349 const_f32_as_f32x8!(p2expf, 1.0 / 2.0); const_f32_as_f32x8!(p3expf, 1.0 / 6.0);
1351 const_f32_as_f32x8!(p4expf, 1.0 / 24.0);
1352 const_f32_as_f32x8!(p5expf, 1.0 / 120.0);
1353 const_f32_as_f32x8!(p6expf, 1.0 / 720.0);
1354 const_f32_as_f32x8!(p7expf, 1.0 / 5040.0);
1355
1356 let x1 = self.abs();
1357 let x = x1.fraction_2();
1358 let mask = x.simd_gt(f32x8::SQRT_2 * f32x8::HALF);
1359 let x = (!mask).blend(x + x, x);
1360
1361 let x = x - f32x8::ONE;
1362 let x2 = x * x;
1363 let lg1 = polynomial_8!(
1364 x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf
1365 );
1366 let lg1 = lg1 * x2 * x;
1367
1368 let ef = x1.exponent();
1369 let ef = mask.blend(ef + f32x8::ONE, ef);
1370 let e1 = (ef * y).round();
1371 let yr = ef.mul_sub(y, e1);
1372
1373 let lg = f32x8::HALF.mul_neg_add(x2, x) + lg1;
1374 let x2_err = (f32x8::HALF * x).mul_sub(x, f32x8::HALF * x2);
1375 let lg_err = f32x8::HALF.mul_add(x2, lg - x) - lg1;
1376
1377 let e2 = (lg * y * f32x8::LOG2_E).round();
1378 let v = lg.mul_sub(y, e2 * ln2f_hi);
1379 let v = e2.mul_neg_add(ln2f_lo, v);
1380 let v = v - (lg_err + x2_err).mul_sub(y, yr * f32x8::LN_2);
1381
1382 let x = v;
1383 let e3 = (x * f32x8::LOG2_E).round();
1384 let x = e3.mul_neg_add(f32x8::LN_2, x);
1385 let x2 = x * x;
1386 let z = x2.mul_add(
1387 polynomial_5!(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf),
1388 x + f32x8::ONE,
1389 );
1390
1391 let ee = e1 + e2 + e3;
1392 let ei = cast::<_, i32x8>(ee.round_int());
1393 let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
1394
1395 let overflow = cast::<_, f32x8>(ej.simd_gt(i32x8::splat(0x0FF)))
1396 | (ee.simd_gt(f32x8::splat(300.0)));
1397 let underflow = cast::<_, f32x8>(ej.simd_lt(i32x8::splat(0x000)))
1398 | (ee.simd_lt(f32x8::splat(-300.0)));
1399
1400 let z = cast::<_, f32x8>(cast::<_, i32x8>(z) + (ei << 23));
1402 let z = underflow.blend(f32x8::ZERO, z);
1404 let z = overflow.blend(Self::infinity(), z);
1405
1406 let x_zero = self.is_zero_or_subnormal();
1408 let z = x_zero.blend(
1409 y.simd_lt(f32x8::ZERO).blend(
1410 Self::infinity(),
1411 y.simd_eq(f32x8::ZERO).blend(f32x8::ONE, f32x8::ZERO),
1412 ),
1413 z,
1414 );
1415
1416 let x_sign = self.sign_bit();
1417 let z = if x_sign.any() {
1418 let yi = y.simd_eq(y.round());
1420
1421 let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
1423
1424 let z1 =
1425 yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1426
1427 x_sign.blend(z1, z)
1428 } else {
1429 z
1430 };
1431
1432 let x_finite = self.is_finite();
1433 let y_finite = y.is_finite();
1434 let e_finite = ee.is_finite();
1435 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1436 return z;
1437 }
1438
1439 (self.is_nan() | y.is_nan()).blend(self + y, z)
1440 }
1441 #[inline]
1442 pub fn powf(self, y: f32) -> Self {
1443 Self::pow_f32x8(self, f32x8::splat(y))
1444 }
1445
1446 #[must_use]
1448 #[inline]
1449 pub fn transpose(data: [f32x8; 8]) -> [f32x8; 8] {
1450 pick! {
1451 if #[cfg(target_feature="avx")] {
1452 let a0 = unpack_lo_m256(data[0].avx, data[1].avx);
1453 let a1 = unpack_hi_m256(data[0].avx, data[1].avx);
1454 let a2 = unpack_lo_m256(data[2].avx, data[3].avx);
1455 let a3 = unpack_hi_m256(data[2].avx, data[3].avx);
1456 let a4 = unpack_lo_m256(data[4].avx, data[5].avx);
1457 let a5 = unpack_hi_m256(data[4].avx, data[5].avx);
1458 let a6 = unpack_lo_m256(data[6].avx, data[7].avx);
1459 let a7 = unpack_hi_m256(data[6].avx, data[7].avx);
1460
1461 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
1462 (z << 6) | (y << 4) | (x << 2) | w
1463 }
1464
1465 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
1466 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
1467
1468 let b0 = shuffle_m256::<SHUFF_LO>(a0,a2);
1471 let b1 = shuffle_m256::<SHUFF_HI>(a0,a2);
1472 let b2 = shuffle_m256::<SHUFF_LO>(a1,a3);
1473 let b3 = shuffle_m256::<SHUFF_HI>(a1,a3);
1474 let b4 = shuffle_m256::<SHUFF_LO>(a4,a6);
1475 let b5 = shuffle_m256::<SHUFF_HI>(a4,a6);
1476 let b6 = shuffle_m256::<SHUFF_LO>(a5,a7);
1477 let b7 = shuffle_m256::<SHUFF_HI>(a5,a7);
1478
1479 [
1480 f32x8 { avx: permute2z_m256::<0x20>(b0, b4) },
1481 f32x8 { avx: permute2z_m256::<0x20>(b1, b5) },
1482 f32x8 { avx: permute2z_m256::<0x20>(b2, b6) },
1483 f32x8 { avx: permute2z_m256::<0x20>(b3, b7) },
1484 f32x8 { avx: permute2z_m256::<0x31>(b0, b4) },
1485 f32x8 { avx: permute2z_m256::<0x31>(b1, b5) },
1486 f32x8 { avx: permute2z_m256::<0x31>(b2, b6) },
1487 f32x8 { avx: permute2z_m256::<0x31>(b3, b7) }
1488 ]
1489 } else {
1490 #[inline(always)]
1493 fn transpose_column(data: &[f32x8; 8], index: usize) -> f32x8 {
1494 f32x8::new([
1495 data[0].as_array()[index],
1496 data[1].as_array()[index],
1497 data[2].as_array()[index],
1498 data[3].as_array()[index],
1499 data[4].as_array()[index],
1500 data[5].as_array()[index],
1501 data[6].as_array()[index],
1502 data[7].as_array()[index],
1503 ])
1504 }
1505
1506 [
1507 transpose_column(&data, 0),
1508 transpose_column(&data, 1),
1509 transpose_column(&data, 2),
1510 transpose_column(&data, 3),
1511 transpose_column(&data, 4),
1512 transpose_column(&data, 5),
1513 transpose_column(&data, 6),
1514 transpose_column(&data, 7),
1515 ]
1516 }
1517 }
1518 }
1519
1520 #[inline]
1521 pub fn to_array(self) -> [f32; 8] {
1522 cast(self)
1523 }
1524
1525 #[inline]
1526 pub fn as_array(&self) -> &[f32; 8] {
1527 cast_ref(self)
1528 }
1529
1530 #[inline]
1531 pub fn as_mut_array(&mut self) -> &mut [f32; 8] {
1532 cast_mut(self)
1533 }
1534
1535 #[inline]
1536 pub fn from_i32x8(v: i32x8) -> Self {
1537 pick! {
1538 if #[cfg(target_feature="avx2")] {
1539 Self { avx: convert_to_m256_from_i32_m256i(v.avx2) }
1540 } else {
1541 Self::new([
1542 v.as_array()[0] as f32,
1543 v.as_array()[1] as f32,
1544 v.as_array()[2] as f32,
1545 v.as_array()[3] as f32,
1546 v.as_array()[4] as f32,
1547 v.as_array()[5] as f32,
1548 v.as_array()[6] as f32,
1549 v.as_array()[7] as f32,
1550 ])
1551 }
1552 }
1553 }
1554}
1555
1556impl Not for f32x8 {
1557 type Output = Self;
1558 #[inline]
1559 fn not(self) -> Self {
1560 pick! {
1561 if #[cfg(target_feature="avx")] {
1562 Self { avx: self.avx.not() }
1563 } else {
1564 Self {
1565 a : self.a.not(),
1566 b : self.b.not(),
1567 }
1568 }
1569 }
1570 }
1571}