1#![allow(
61 non_camel_case_types,
62 unknown_lints,
63 clippy::zero_prefixed_literal,
64 clippy::identity_op,
65 clippy::too_many_arguments,
66 clippy::type_complexity,
67 clippy::missing_transmute_annotations,
68 clippy::tabs_in_doc_comments,
69 clippy::modulo_one,
70 clippy::useless_transmute,
71 clippy::not_unsafe_ptr_arg_deref,
72 clippy::manual_is_multiple_of
73)]
74#![cfg_attr(
75 all(feature = "nightly", any(target_arch = "aarch64")),
76 feature(stdarch_neon_i8mm),
77 feature(stdarch_neon_sm4),
78 feature(stdarch_neon_ftts),
79 feature(stdarch_neon_fcma),
80 feature(stdarch_neon_dotprod)
81)]
82#![cfg_attr(not(feature = "std"), no_std)]
83#![cfg_attr(docsrs, feature(doc_cfg))]
84
85macro_rules! match_cfg {
86 (item, match cfg!() {
87 $(
88 const { $i_meta:meta } => { $( $i_tokens:tt )* },
89 )*
90 $(_ => { $( $e_tokens:tt )* },)?
91 }) => {
92 $crate::match_cfg! {
93 @__items () ;
94 $(
95 (( $i_meta ) ( $( $i_tokens )* )) ,
96 )*
97 $((() ( $( $e_tokens )* )),)?
98 }
99 };
100
101 (match cfg!() {
102 $(
103 const { $i_meta:meta } => $i_expr: expr,
104 )*
105 $(_ => $e_expr: expr,)?
106 }) => {
107 $crate::match_cfg! {
108 @ __result @ __exprs ();
109 $(
110 (( $i_meta ) ( $i_expr )) ,
111 )*
112 $((() ( $e_expr )),)?
113 }
114 };
115
116 (@__items ( $( $_:meta , )* ) ; ) => {};
121 (
122 @__items ( $( $no:meta , )* ) ;
123 (( $( $yes:meta )? ) ( $( $tokens:tt )* )) ,
124 $( $rest:tt , )*
125 ) => {
126 #[cfg(all(
130 $( $yes , )?
131 not(any( $( $no ),* ))
132 ))]
133 $crate::match_cfg! { @__identity $( $tokens )* }
134
135 $crate::match_cfg! {
139 @__items ( $( $no , )* $( $yes , )? ) ;
140 $( $rest , )*
141 }
142 };
143
144 (@ $ret: ident @ __exprs ( $( $_:meta , )* ) ; ) => {
149 $ret
150 };
151
152 (
153 @ $ret: ident @__exprs ( $( $no:meta , )* ) ;
154 (( $( $yes:meta )? ) ( $( $tokens:tt )* )) ,
155 $( $rest:tt , )*
156 ) => {{
157 #[cfg(all(
161 $( $yes , )?
162 not(any( $( $no ),* ))
163 ))]
164 let $ret = $crate::match_cfg! { @__identity $( $tokens )* };
165
166 $crate::match_cfg! {
170 @ $ret @ __exprs ( $( $no , )* $( $yes , )? ) ;
171 $( $rest , )*
172 }
173 }};
174
175 (@__identity $( $tokens:tt )* ) => {
178 $( $tokens )*
179 };
180}
181
182const MAX_REGISTER_BYTES: usize = 256;
183
184use match_cfg;
185
186#[macro_export]
190macro_rules! cast {
191 ($val: expr $(,)?) => {{
192 let __val = $val;
193 if const { false } {
194 $crate::cast(__val)
196 } else {
197 #[allow(
198 unused_unsafe,
199 unnecessary_transmutes,
200 clippy::missing_transmute_annotations
201 )]
202 unsafe {
203 ::core::mem::transmute(__val)
204 }
205 }
206 }};
207}
208
209use bytemuck::{AnyBitPattern, CheckedBitPattern, NoUninit, Pod, Zeroable, checked};
210use core::fmt::Debug;
211use core::marker::PhantomData;
212use core::mem::MaybeUninit;
213use core::ops::*;
214use core::slice::{from_raw_parts, from_raw_parts_mut};
215use num_complex::Complex;
216use paste::paste;
217use seal::Seal;
218
219#[cfg(feature = "macro")]
223#[cfg_attr(docsrs, doc(cfg(feature = "macro")))]
224pub use pulp_macro::with_simd;
225
226pub use {bytemuck, num_complex};
227
228pub type c32 = Complex<f32>;
229pub type c64 = Complex<f64>;
230
231#[derive(Copy, Clone)]
232#[repr(transparent)]
233struct DebugCplx<T>(T);
234
235unsafe impl<T: Zeroable> Zeroable for DebugCplx<T> {}
236unsafe impl<T: Pod> Pod for DebugCplx<T> {}
237
238impl Debug for DebugCplx<c32> {
239 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
240 let c32 { re, im } = self.0;
241 re.fmt(f)?;
242
243 let sign = if im.is_sign_positive() { " + " } else { " - " };
244 f.write_str(sign)?;
245
246 let im = f32::from_bits(im.to_bits() & (u32::MAX >> 1));
247 im.abs().fmt(f)?;
248
249 f.write_str("i")?;
250
251 Ok(())
252 }
253}
254
255impl Debug for DebugCplx<c64> {
256 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
257 let c64 { re, im } = self.0;
258 re.fmt(f)?;
259
260 let sign = if im.is_sign_positive() { " + " } else { " - " };
261 f.write_str(sign)?;
262
263 let im = f64::from_bits(im.to_bits() & (u64::MAX >> 1));
264 im.abs().fmt(f)?;
265
266 f.write_str("i")?;
267
268 Ok(())
269 }
270}
271
272match_cfg!(
273 item,
274 match cfg!() {
275 const { any(target_arch = "x86_64") } => {
276 #[derive(Debug, Copy, Clone)]
277 pub struct MemMask<T> {
278 mask: T,
279 load: Option<unsafe extern "C" fn()>,
280 store: Option<unsafe extern "C" fn()>,
281 }
282
283 impl<T> MemMask<T> {
284 #[inline]
285 pub fn new(mask: T) -> Self {
286 Self {
287 mask,
288 load: None,
289 store: None,
290 }
291 }
292 }
293
294 impl<T> From<T> for MemMask<T> {
295 #[inline]
296 fn from(value: T) -> Self {
297 Self {
298 mask: value,
299 load: None,
300 store: None,
301 }
302 }
303 }
304 },
305
306 _ => {
307 #[derive(Debug, Copy, Clone)]
308 pub struct MemMask<T> {
309 mask: T,
310 }
311
312 impl<T> MemMask<T> {
313 #[inline]
314 pub fn new(mask: T) -> Self {
315 Self { mask }
316 }
317 }
318
319 impl<T> From<T> for MemMask<T> {
320 #[inline]
321 fn from(value: T) -> Self {
322 Self { mask: value }
323 }
324 }
325 },
326 }
327);
328
329impl<T: Copy> MemMask<T> {
330 #[inline]
331 pub fn mask(self) -> T {
332 self.mask
333 }
334}
335
336mod seal {
337 pub trait Seal {}
338}
339
340pub trait NullaryFnOnce {
341 type Output;
342
343 fn call(self) -> Self::Output;
344}
345
346impl<R, F: FnOnce() -> R> NullaryFnOnce for F {
347 type Output = R;
348
349 #[inline(always)]
350 fn call(self) -> Self::Output {
351 self()
352 }
353}
354
355pub trait WithSimd {
356 type Output;
357
358 fn with_simd<S: Simd>(self, simd: S) -> Self::Output;
359}
360
361impl<F: NullaryFnOnce> WithSimd for F {
362 type Output = F::Output;
363
364 #[inline(always)]
365 fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
366 let _simd = &simd;
367 self.call()
368 }
369}
370
371#[inline(always)]
372fn fma_f32(a: f32, b: f32, c: f32) -> f32 {
373 match_cfg!(match cfg!() {
374 const { feature = "std" } => f32::mul_add(a, b, c),
375 _ => libm::fmaf(a, b, c),
376 })
377}
378
379#[inline(always)]
380fn fma_f64(a: f64, b: f64, c: f64) -> f64 {
381 match_cfg!(match cfg!() {
382 const { feature = "std" } => f64::mul_add(a, b, c),
383 _ => libm::fma(a, b, c),
384 })
385}
386
387#[inline(always)]
388fn sqrt_f32(a: f32) -> f32 {
389 match_cfg!(match cfg!() {
390 const { feature = "std" } => f32::sqrt(a),
391 _ => libm::sqrtf(a),
392 })
393}
394
395#[inline(always)]
396fn sqrt_f64(a: f64) -> f64 {
397 match_cfg!(match cfg!() {
398 const { feature = "std" } => f64::sqrt(a, ),
399 _ => libm::sqrt(a),
400 })
401}
402
403#[inline(always)]
407unsafe fn interleave_fallback<Unit: Pod, Reg: Pod, AosReg>(x: AosReg) -> AosReg {
408 assert!(core::mem::size_of::<AosReg>() % core::mem::size_of::<Reg>() == 0);
409 assert!(core::mem::size_of::<Reg>() % core::mem::size_of::<Unit>() == 0);
410 assert!(!core::mem::needs_drop::<AosReg>());
411
412 if const { core::mem::size_of::<AosReg>() == core::mem::size_of::<Reg>() } {
413 x
414 } else {
415 let mut y = core::ptr::read(&x);
416
417 let n = const { core::mem::size_of::<AosReg>() / core::mem::size_of::<Reg>() };
418 let m = const { core::mem::size_of::<Reg>() / core::mem::size_of::<Unit>() };
419
420 unsafe {
421 let y = (&mut y) as *mut _ as *mut Unit;
422 let x = (&x) as *const _ as *const Unit;
423 for j in 0..m {
424 for i in 0..n {
425 *y.add(i + n * j) = *x.add(j + i * m);
426 }
427 }
428 }
429
430 y
431 }
432}
433
434#[inline(always)]
435unsafe fn deinterleave_fallback<Unit: Pod, Reg: Pod, SoaReg>(y: SoaReg) -> SoaReg {
436 assert!(core::mem::size_of::<SoaReg>() % core::mem::size_of::<Reg>() == 0);
437 assert!(core::mem::size_of::<Reg>() % core::mem::size_of::<Unit>() == 0);
438 assert!(!core::mem::needs_drop::<SoaReg>());
439
440 if const { core::mem::size_of::<SoaReg>() == core::mem::size_of::<Reg>() } {
441 y
442 } else {
443 let mut x = core::ptr::read(&y);
444
445 let n = const { core::mem::size_of::<SoaReg>() / core::mem::size_of::<Reg>() };
446 let m = const { core::mem::size_of::<Reg>() / core::mem::size_of::<Unit>() };
447
448 unsafe {
449 let y = (&y) as *const _ as *const Unit;
450 let x = (&mut x) as *mut _ as *mut Unit;
451 for j in 0..m {
452 for i in 0..n {
453 *x.add(j + i * m) = *y.add(i + n * j);
454 }
455 }
456 }
457
458 x
459 }
460}
461
462macro_rules! define_binop {
463 ($func: ident, $ty: ident, $out: ident) => {
464 paste! {
465 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$out s>];
466 }
467 };
468}
469
470macro_rules! define_binop_all {
471 ($func: ident, $($ty: ident),*) => {
472 $(define_binop!($func, $ty, $ty);)*
473 };
474 ($func: ident, $($ty: ident => $out: ident),*) => {
475 $(define_binop!($func, $ty, $out);)*
476 };
477}
478
479macro_rules! transmute_binop {
480 ($func: ident, $ty: ident, $to: ident) => {
481 paste! {
482 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$ty s>] {
483 self.[<transmute_ $ty s_ $to s>](
484 self.[<$func _ $to s>](self.[<transmute_ $to s_ $ty s>](a), self.[<transmute_ $to s_ $ty s>](b)),
485 )
486 }
487 }
488 };
489 ($func: ident, $($ty: ident => $to: ident),*) => {
490 $(transmute_binop!($func, $ty, $to);)*
491 };
492}
493
494macro_rules! define_unop {
495 ($func: ident, $ty: ident, $out: ident) => {
496 paste! {
497 fn [<$func _ $ty s>](self, a: Self::[<$ty s>]) -> Self::[<$out s>];
498 }
499 };
500}
501
502macro_rules! define_unop_all {
503 ($func: ident, $($ty: ident),*) => {
504 $(define_unop!($func, $ty, $ty);)*
505 };
506 ($func: ident, $($ty: ident => $out: ident),*) => {
507 $(define_unop!($func, $ty, $out);)*
508 };
509}
510
511macro_rules! transmute_unop {
512 ($func: ident, $ty: ident, $to: ident) => {
513 paste! {
514 fn [<$func _ $ty s>](self, a: Self::[<$ty s>]) -> Self::[<$ty s>] {
515 self.[<transmute_ $ty s_ $to s>](
516 self.[<$func _ $to s>](self.[<transmute_ $to s_ $ty s>](a)),
517 )
518 }
519 }
520 };
521 ($func: ident, $($ty: ident => $to: ident),*) => {
522 $(transmute_unop!($func, $ty, $to);)*
523 };
524}
525
526macro_rules! transmute_cmp {
527 ($func: ident, $ty: ident, $to: ident, $out: ident) => {
528 paste! {
529 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$out s>] {
530 self.[<$func _ $to s>](self.[<transmute_ $to s_ $ty s>](a), self.[<transmute_ $to s_ $ty s>](b))
531 }
532 }
533 };
534 ($func: ident, $($ty: ident => $to: ident => $out: ident),*) => {
535 $(transmute_cmp!($func, $ty, $to, $out);)*
536 };
537}
538
539macro_rules! define_splat {
540 ($ty: ty) => {
541 paste! {
542 fn [<splat_ $ty s>](self, value: $ty) -> Self::[<$ty s>];
543 }
544 };
545 ($($ty: ident),*) => {
546 $(define_splat!($ty);)*
547 };
548}
549
550macro_rules! split_slice {
551 ($ty: ident) => {
552 paste! {
553 #[inline(always)]
554 fn [<as_mut_rsimd_ $ty s>](slice: &mut [$ty]) -> (&mut [$ty], &mut [Self::[<$ty s>]]) {
555 unsafe { rsplit_mut_slice(slice) }
556 }
557 #[inline(always)]
558 fn [<as_rsimd_ $ty s>](slice: &[$ty]) -> (&[$ty], &[Self::[<$ty s>]]) {
559 unsafe { rsplit_slice(slice) }
560 }
561 #[inline(always)]
562 fn [<as_mut_simd_ $ty s>](slice: &mut [$ty]) -> (&mut [Self::[<$ty s>]], &mut [$ty]) {
563 unsafe { split_mut_slice(slice) }
564 }
565 #[inline(always)]
566 fn [<as_simd_ $ty s>](slice: &[$ty]) -> (&[Self::[<$ty s>]], &[$ty]) {
567 unsafe { split_slice(slice) }
568 }
569 #[inline(always)]
570 fn [<as_uninit_mut_rsimd_ $ty s>](
571 slice: &mut [MaybeUninit<$ty>],
572 ) -> (&mut [MaybeUninit<$ty>], &mut [MaybeUninit<Self::[<$ty s>]>]) {
573 unsafe { rsplit_mut_slice(slice) }
574 }
575 #[inline(always)]
576 fn [<as_uninit_mut_simd_ $ty s>](
577 slice: &mut [MaybeUninit<$ty>],
578 ) -> (&mut [MaybeUninit<Self::[<$ty s>]>], &mut [MaybeUninit<$ty>]) {
579 unsafe { split_mut_slice(slice) }
580 }
581 }
582 };
583 ($($ty: ident),*) => {
584 $(split_slice!($ty);)*
585 };
586}
587
588pub unsafe trait Interleave {}
593unsafe impl<T: Pod> Interleave for T {}
594
595pub trait Simd: Seal + Debug + Copy + Send + Sync + 'static {
596 const IS_SCALAR: bool = false;
597
598 const M64_LANES: usize = core::mem::size_of::<Self::m64s>() / core::mem::size_of::<m64>();
599 const U64_LANES: usize = core::mem::size_of::<Self::u64s>() / core::mem::size_of::<u64>();
600 const I64_LANES: usize = core::mem::size_of::<Self::i64s>() / core::mem::size_of::<i64>();
601 const F64_LANES: usize = core::mem::size_of::<Self::f64s>() / core::mem::size_of::<f64>();
602 const C64_LANES: usize = core::mem::size_of::<Self::c64s>() / core::mem::size_of::<c64>();
603
604 const M32_LANES: usize = core::mem::size_of::<Self::m32s>() / core::mem::size_of::<m32>();
605 const U32_LANES: usize = core::mem::size_of::<Self::u32s>() / core::mem::size_of::<u32>();
606 const I32_LANES: usize = core::mem::size_of::<Self::i32s>() / core::mem::size_of::<i32>();
607 const F32_LANES: usize = core::mem::size_of::<Self::f32s>() / core::mem::size_of::<f32>();
608 const C32_LANES: usize = core::mem::size_of::<Self::c32s>() / core::mem::size_of::<c32>();
609
610 const M16_LANES: usize = core::mem::size_of::<Self::m16s>() / core::mem::size_of::<m16>();
611 const U16_LANES: usize = core::mem::size_of::<Self::u16s>() / core::mem::size_of::<u16>();
612 const I16_LANES: usize = core::mem::size_of::<Self::i16s>() / core::mem::size_of::<i16>();
613
614 const M8_LANES: usize = core::mem::size_of::<Self::m8s>() / core::mem::size_of::<m8>();
615 const U8_LANES: usize = core::mem::size_of::<Self::u8s>() / core::mem::size_of::<u8>();
616 const I8_LANES: usize = core::mem::size_of::<Self::i8s>() / core::mem::size_of::<i8>();
617
618 const REGISTER_COUNT: usize;
619
620 type m8s: Debug + Copy + Send + Sync + Zeroable + NoUninit + CheckedBitPattern + 'static;
621 type i8s: Debug + Copy + Send + Sync + Pod + 'static;
622 type u8s: Debug + Copy + Send + Sync + Pod + 'static;
623
624 type m16s: Debug + Copy + Send + Sync + Zeroable + NoUninit + CheckedBitPattern + 'static;
625 type i16s: Debug + Copy + Send + Sync + Pod + 'static;
626 type u16s: Debug + Copy + Send + Sync + Pod + 'static;
627
628 type m32s: Debug + Copy + Send + Sync + Zeroable + NoUninit + CheckedBitPattern + 'static;
629 type f32s: Debug + Copy + Send + Sync + Pod + 'static;
630 type c32s: Debug + Copy + Send + Sync + Pod + 'static;
631 type i32s: Debug + Copy + Send + Sync + Pod + 'static;
632 type u32s: Debug + Copy + Send + Sync + Pod + 'static;
633
634 type m64s: Debug + Copy + Send + Sync + Zeroable + NoUninit + CheckedBitPattern + 'static;
635 type f64s: Debug + Copy + Send + Sync + Pod + 'static;
636 type c64s: Debug + Copy + Send + Sync + Pod + 'static;
637 type i64s: Debug + Copy + Send + Sync + Pod + 'static;
638 type u64s: Debug + Copy + Send + Sync + Pod + 'static;
639
640 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s;
642
643 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s;
645 #[inline]
646 fn abs_f32s(self, a: Self::f32s) -> Self::f32s {
647 self.and_f32s(self.not_f32s(self.splat_f32s(-0.0)), a)
648 }
649 #[inline]
650 fn abs_f64s(self, a: Self::f64s) -> Self::f64s {
651 self.and_f64s(self.not_f64s(self.splat_f64s(-0.0)), a)
652 }
653 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s;
655 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s;
657
658 define_binop_all!(add, c32, c64, f32, f64, u8, u16, u32, u64);
659 define_binop_all!(
660 sub, c32, c64, f32, f64, u8, i8, u16, i16, u32, i32, u64, i64
661 );
662 define_binop_all!(mul, c32, c64, f32, f64, u16, i16, u32, i32, u64, i64);
663 define_binop_all!(div, f32, f64);
664 define_binop_all!(equal, u8 => m8, u16 => m16, u32 => m32, u64 => m64, c32 => m32, f32 => m32, c64 => m64, f64 => m64);
665 define_binop_all!(greater_than, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
666 define_binop_all!(greater_than_or_equal, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
667 define_binop_all!(less_than_or_equal, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
668 define_binop_all!(less_than, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
669
670 define_binop_all!(and, u8, u16, u32, u64);
671 define_binop_all!(or, u8, u16, u32, u64);
672 define_binop_all!(xor, u8, u16, u32, u64);
673
674 transmute_binop!(and, m8 => u8, i8 => u8, m16 => u16, i16 => u16, m32 => u32, i32 => u32, m64 => u64, i64 => u64, f32 => u32, f64 => u64);
675 transmute_binop!(or, m8 => u8, i8 => u8, m16 => u16, i16 => u16, m32 => u32, i32 => u32, m64 => u64, i64 => u64, f32 => u32, f64 => u64);
676 transmute_binop!(xor, m8 => u8, i8 => u8, m16 => u16, i16 => u16, m32 => u32, i32 => u32, m64 => u64, i64 => u64, f32 => u32, f64 => u64);
677
678 transmute_binop!(add, i8 => u8, i16 => u16, i32 => u32, i64 => u64);
679 transmute_cmp!(equal, m8 => u8 => m8, i8 => u8 => m8, m16 => u16 => m16, i16 => u16 => m16, m32 => u32 => m32, i32 => u32 => m32, m64 => u64 => m64, i64 => u64 => m64);
680
681 define_binop_all!(min, f32, f64, u8, i8, u16, i16, u32, i32, u64, i64);
682 define_binop_all!(max, f32, f64, u8, i8, u16, i16, u32, i32, u64, i64);
683
684 define_unop_all!(neg, c32, c64);
685 define_unop_all!(not, m8, u8, m16, u16, m32, u32, m64, u64);
686
687 transmute_unop!(not, i8 => u8, i16 => u16, i32 => u32, i64 => u64, f32 => u32, f64 => u64);
688
689 split_slice!(u8, i8, u16, i16, u32, i32, u64, i64, c32, f32, c64, f64);
690 define_splat!(u8, i8, u16, i16, u32, i32, u64, i64, c32, f32, c64, f64);
691
692 fn sqrt_f32s(self, a: Self::f32s) -> Self::f32s;
693 fn sqrt_f64s(self, a: Self::f64s) -> Self::f64s;
694
695 fn conj_c32s(self, a: Self::c32s) -> Self::c32s;
696 fn conj_c64s(self, a: Self::c64s) -> Self::c64s;
697 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s;
698 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s;
699
700 #[inline]
702 fn conj_mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
703 self.conj_mul_add_c32s(a, b, c)
704 }
705 #[inline]
707 fn conj_mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
708 self.conj_mul_add_c64s(a, b, c)
709 }
710 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s;
711
712 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s;
713 #[inline]
715 fn conj_mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
716 self.conj_mul_c32s(a, b)
717 }
718 #[inline]
720 fn conj_mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
721 self.conj_mul_c64s(a, b)
722 }
723 #[inline(always)]
724 fn deinterleave_shfl_f32s<T: Interleave>(self, values: T) -> T {
725 unsafe { deinterleave_fallback::<f32, Self::f32s, T>(values) }
726 }
727
728 #[inline(always)]
729 fn deinterleave_shfl_f64s<T: Interleave>(self, values: T) -> T {
730 unsafe { deinterleave_fallback::<f64, Self::f64s, T>(values) }
731 }
732
733 #[inline(always)]
734 fn first_true_m8s(self, mask: Self::m8s) -> usize {
735 if const { core::mem::size_of::<Self::m8s>() == core::mem::size_of::<Self::u8s>() } {
736 let mask: Self::u8s = bytemuck::cast(mask);
737 let slice = bytemuck::cast_slice::<Self::u8s, u8>(core::slice::from_ref(&mask));
738 let mut i = 0;
739 for &x in slice.iter() {
740 if x != 0 {
741 break;
742 }
743 i += 1;
744 }
745 i
746 } else if const { core::mem::size_of::<Self::m8s>() == core::mem::size_of::<u8>() } {
747 let mask: u8 = bytemuck::cast(mask);
748 mask.leading_zeros() as usize
749 } else if const { core::mem::size_of::<Self::m8s>() == core::mem::size_of::<u16>() } {
750 let mask: u16 = bytemuck::cast(mask);
751 mask.leading_zeros() as usize
752 } else {
753 panic!()
754 }
755 }
756
757 #[inline(always)]
758 fn first_true_m16s(self, mask: Self::m16s) -> usize {
759 if const { core::mem::size_of::<Self::m16s>() == core::mem::size_of::<Self::u16s>() } {
760 let mask: Self::u16s = bytemuck::cast(mask);
761 let slice = bytemuck::cast_slice::<Self::u16s, u16>(core::slice::from_ref(&mask));
762 let mut i = 0;
763 for &x in slice.iter() {
764 if x != 0 {
765 break;
766 }
767 i += 1;
768 }
769 i
770 } else if const { core::mem::size_of::<Self::m16s>() == core::mem::size_of::<u8>() } {
771 let mask: u8 = bytemuck::cast(mask);
772 mask.leading_zeros() as usize
773 } else if const { core::mem::size_of::<Self::m16s>() == core::mem::size_of::<u16>() } {
774 let mask: u16 = bytemuck::cast(mask);
775 mask.leading_zeros() as usize
776 } else {
777 panic!()
778 }
779 }
780
781 #[inline(always)]
782 fn first_true_m32s(self, mask: Self::m32s) -> usize {
783 if const { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<Self::u32s>() } {
784 let mask: Self::u32s = bytemuck::cast(mask);
785 let slice = bytemuck::cast_slice::<Self::u32s, u32>(core::slice::from_ref(&mask));
786 let mut i = 0;
787 for &x in slice.iter() {
788 if x != 0 {
789 break;
790 }
791 i += 1;
792 }
793 i
794 } else if const { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<u8>() } {
795 let mask: u8 = bytemuck::cast(mask);
796 mask.leading_zeros() as usize
797 } else if const { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<u16>() } {
798 let mask: u16 = bytemuck::cast(mask);
799 mask.leading_zeros() as usize
800 } else {
801 panic!()
802 }
803 }
804
805 #[inline(always)]
806 fn first_true_m64s(self, mask: Self::m64s) -> usize {
807 if const { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<Self::u64s>() } {
808 let mask: Self::u64s = bytemuck::cast(mask);
809 let slice = bytemuck::cast_slice::<Self::u64s, u64>(core::slice::from_ref(&mask));
810 let mut i = 0;
811 for &x in slice.iter() {
812 if x != 0 {
813 break;
814 }
815 i += 1;
816 }
817 i
818 } else if const { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<u8>() } {
819 let mask: u8 = bytemuck::cast(mask);
820 mask.leading_zeros() as usize
821 } else if const { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<u16>() } {
822 let mask: u16 = bytemuck::cast(mask);
823 mask.leading_zeros() as usize
824 } else {
825 panic!()
826 }
827 }
828
829 #[inline(always)]
830 fn interleave_shfl_f32s<T: Interleave>(self, values: T) -> T {
831 unsafe { interleave_fallback::<f32, Self::f32s, T>(values) }
832 }
833
834 #[inline(always)]
835 fn interleave_shfl_f64s<T: Interleave>(self, values: T) -> T {
836 unsafe { interleave_fallback::<f64, Self::f64s, T>(values) }
837 }
838
839 #[inline(always)]
840 fn mask_between_m8s(self, start: u8, end: u8) -> MemMask<Self::m8s> {
841 let iota: Self::u8s = const {
842 unsafe { core::mem::transmute_copy(&iota_8::<u8, { MAX_REGISTER_BYTES / 1 }>()) }
843 };
844 self.and_m8s(
845 self.greater_than_or_equal_u8s(iota, self.splat_u8s(start)),
846 self.less_than_u8s(iota, self.splat_u8s(end)),
847 )
848 .into()
849 }
850
851 #[inline(always)]
852 fn mask_between_m16s(self, start: u16, end: u16) -> MemMask<Self::m16s> {
853 let iota: Self::u16s = const {
854 unsafe { core::mem::transmute_copy(&iota_16::<u16, { MAX_REGISTER_BYTES / 2 }>()) }
855 };
856 self.and_m16s(
857 self.greater_than_or_equal_u16s(iota, self.splat_u16s(start)),
858 self.less_than_u16s(iota, self.splat_u16s(end)),
859 )
860 .into()
861 }
862
863 #[inline(always)]
864 fn mask_between_m32s(self, start: u32, end: u32) -> MemMask<Self::m32s> {
865 let iota: Self::u32s = const {
866 unsafe { core::mem::transmute_copy(&iota_32::<u32, { MAX_REGISTER_BYTES / 4 }>()) }
867 };
868 self.and_m32s(
869 self.greater_than_or_equal_u32s(iota, self.splat_u32s(start)),
870 self.less_than_u32s(iota, self.splat_u32s(end)),
871 )
872 .into()
873 }
874
875 #[inline(always)]
876 fn mask_between_m64s(self, start: u64, end: u64) -> MemMask<Self::m64s> {
877 let iota: Self::u64s = const {
878 unsafe { core::mem::transmute_copy(&iota_64::<u64, { MAX_REGISTER_BYTES / 8 }>()) }
879 };
880 self.and_m64s(
881 self.greater_than_or_equal_u64s(iota, self.splat_u64s(start)),
882 self.less_than_u64s(iota, self.splat_u64s(end)),
883 )
884 .into()
885 }
886 unsafe fn mask_load_ptr_c32s(self, mask: MemMask<Self::m32s>, ptr: *const c32) -> Self::c32s;
891 unsafe fn mask_load_ptr_c64s(self, mask: MemMask<Self::m64s>, ptr: *const c64) -> Self::c64s;
896 #[inline(always)]
901 unsafe fn mask_load_ptr_f32s(self, mask: MemMask<Self::m32s>, ptr: *const f32) -> Self::f32s {
902 self.transmute_f32s_u32s(self.mask_load_ptr_u32s(mask, ptr as *const u32))
903 }
904
905 #[inline(always)]
910 unsafe fn mask_load_ptr_f64s(self, mask: MemMask<Self::m64s>, ptr: *const f64) -> Self::f64s {
911 self.transmute_f64s_u64s(self.mask_load_ptr_u64s(mask, ptr as *const u64))
912 }
913 #[inline(always)]
918 unsafe fn mask_load_ptr_i8s(self, mask: MemMask<Self::m8s>, ptr: *const i8) -> Self::i8s {
919 self.transmute_i8s_u8s(self.mask_load_ptr_u8s(mask, ptr as *const u8))
920 }
921 #[inline(always)]
926 unsafe fn mask_load_ptr_i16s(self, mask: MemMask<Self::m16s>, ptr: *const i16) -> Self::i16s {
927 self.transmute_i16s_u16s(self.mask_load_ptr_u16s(mask, ptr as *const u16))
928 }
929 #[inline(always)]
934 unsafe fn mask_load_ptr_i32s(self, mask: MemMask<Self::m32s>, ptr: *const i32) -> Self::i32s {
935 self.transmute_i32s_u32s(self.mask_load_ptr_u32s(mask, ptr as *const u32))
936 }
937 #[inline(always)]
942 unsafe fn mask_load_ptr_i64s(self, mask: MemMask<Self::m64s>, ptr: *const i64) -> Self::i64s {
943 self.transmute_i64s_u64s(self.mask_load_ptr_u64s(mask, ptr as *const u64))
944 }
945
946 unsafe fn mask_load_ptr_u8s(self, mask: MemMask<Self::m8s>, ptr: *const u8) -> Self::u8s;
951
952 unsafe fn mask_load_ptr_u16s(self, mask: MemMask<Self::m16s>, ptr: *const u16) -> Self::u16s;
957
958 unsafe fn mask_load_ptr_u32s(self, mask: MemMask<Self::m32s>, ptr: *const u32) -> Self::u32s;
963
964 unsafe fn mask_load_ptr_u64s(self, mask: MemMask<Self::m64s>, ptr: *const u64) -> Self::u64s;
969 unsafe fn mask_store_ptr_c32s(
974 self,
975 mask: MemMask<Self::m32s>,
976 ptr: *mut c32,
977 values: Self::c32s,
978 );
979 unsafe fn mask_store_ptr_c64s(
984 self,
985 mask: MemMask<Self::m64s>,
986 ptr: *mut c64,
987 values: Self::c64s,
988 );
989 #[inline(always)]
994 unsafe fn mask_store_ptr_f32s(
995 self,
996 mask: MemMask<Self::m32s>,
997 ptr: *mut f32,
998 values: Self::f32s,
999 ) {
1000 self.mask_store_ptr_u32s(mask, ptr as *mut u32, self.transmute_u32s_f32s(values));
1001 }
1002
1003 #[inline(always)]
1008 unsafe fn mask_store_ptr_f64s(
1009 self,
1010 mask: MemMask<Self::m64s>,
1011 ptr: *mut f64,
1012 values: Self::f64s,
1013 ) {
1014 self.mask_store_ptr_u64s(mask, ptr as *mut u64, self.transmute_u64s_f64s(values));
1015 }
1016 #[inline(always)]
1021 unsafe fn mask_store_ptr_i8s(self, mask: MemMask<Self::m8s>, ptr: *mut i8, values: Self::i8s) {
1022 self.mask_store_ptr_u8s(mask, ptr as *mut u8, self.transmute_u8s_i8s(values));
1023 }
1024 #[inline(always)]
1029 unsafe fn mask_store_ptr_i16s(
1030 self,
1031 mask: MemMask<Self::m16s>,
1032 ptr: *mut i16,
1033 values: Self::i16s,
1034 ) {
1035 self.mask_store_ptr_u16s(mask, ptr as *mut u16, self.transmute_u16s_i16s(values));
1036 }
1037 #[inline(always)]
1042 unsafe fn mask_store_ptr_i32s(
1043 self,
1044 mask: MemMask<Self::m32s>,
1045 ptr: *mut i32,
1046 values: Self::i32s,
1047 ) {
1048 self.mask_store_ptr_u32s(mask, ptr as *mut u32, self.transmute_u32s_i32s(values));
1049 }
1050 #[inline(always)]
1055 unsafe fn mask_store_ptr_i64s(
1056 self,
1057 mask: MemMask<Self::m64s>,
1058 ptr: *mut i64,
1059 values: Self::i64s,
1060 ) {
1061 self.mask_store_ptr_u64s(mask, ptr as *mut u64, self.transmute_u64s_i64s(values));
1062 }
1063
1064 unsafe fn mask_store_ptr_u8s(self, mask: MemMask<Self::m8s>, ptr: *mut u8, values: Self::u8s);
1069
1070 unsafe fn mask_store_ptr_u16s(
1075 self,
1076 mask: MemMask<Self::m16s>,
1077 ptr: *mut u16,
1078 values: Self::u16s,
1079 );
1080
1081 unsafe fn mask_store_ptr_u32s(
1086 self,
1087 mask: MemMask<Self::m32s>,
1088 ptr: *mut u32,
1089 values: Self::u32s,
1090 );
1091
1092 unsafe fn mask_store_ptr_u64s(
1097 self,
1098 mask: MemMask<Self::m64s>,
1099 ptr: *mut u64,
1100 values: Self::u64s,
1101 );
1102
1103 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s;
1104 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s;
1105 #[inline]
1107 fn mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
1108 self.mul_add_c32s(a, b, c)
1109 }
1110 #[inline]
1112 fn mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
1113 self.mul_add_c64s(a, b, c)
1114 }
1115 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s;
1116 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s;
1117 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s;
1118 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s;
1119 #[inline]
1121 fn mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
1122 self.mul_c32s(a, b)
1123 }
1124 fn mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
1126 self.mul_c64s(a, b)
1127 }
1128
1129 #[inline]
1130 fn neg_f32s(self, a: Self::f32s) -> Self::f32s {
1131 self.xor_f32s(self.splat_f32s(-0.0), a)
1132 }
1133 #[inline]
1134 fn neg_f64s(self, a: Self::f64s) -> Self::f64s {
1135 self.xor_f64s(a, self.splat_f64s(-0.0))
1136 }
1137
1138 #[inline(always)]
1139 fn partial_load_c32s(self, slice: &[c32]) -> Self::c32s {
1140 cast(self.partial_load_f64s(bytemuck::cast_slice(slice)))
1141 }
1142 #[inline(always)]
1143 fn partial_load_c64s(self, slice: &[c64]) -> Self::c64s {
1144 cast(self.partial_load_f64s(bytemuck::cast_slice(slice)))
1145 }
1146 #[inline(always)]
1147 fn partial_load_f32s(self, slice: &[f32]) -> Self::f32s {
1148 cast(self.partial_load_u32s(bytemuck::cast_slice(slice)))
1149 }
1150 #[inline(always)]
1151 fn partial_load_f64s(self, slice: &[f64]) -> Self::f64s {
1152 cast(self.partial_load_u64s(bytemuck::cast_slice(slice)))
1153 }
1154 #[inline(always)]
1155 fn partial_load_i8s(self, slice: &[i8]) -> Self::i8s {
1156 cast(self.partial_load_u8s(bytemuck::cast_slice(slice)))
1157 }
1158 #[inline(always)]
1159 fn partial_load_i16s(self, slice: &[i16]) -> Self::i16s {
1160 cast(self.partial_load_u16s(bytemuck::cast_slice(slice)))
1161 }
1162 #[inline(always)]
1163 fn partial_load_i32s(self, slice: &[i32]) -> Self::i32s {
1164 cast(self.partial_load_u32s(bytemuck::cast_slice(slice)))
1165 }
1166 #[inline(always)]
1167 fn partial_load_i64s(self, slice: &[i64]) -> Self::i64s {
1168 cast(self.partial_load_u64s(bytemuck::cast_slice(slice)))
1169 }
1170 #[inline(always)]
1171 fn partial_load_u8s(self, slice: &[u8]) -> Self::u8s {
1172 unsafe {
1173 self.mask_load_ptr_u8s(self.mask_between_m8s(0, slice.len() as u8), slice.as_ptr())
1174 }
1175 }
1176 #[inline(always)]
1177 fn partial_load_u16s(self, slice: &[u16]) -> Self::u16s {
1178 unsafe {
1179 self.mask_load_ptr_u16s(
1180 self.mask_between_m16s(0, slice.len() as u16),
1181 slice.as_ptr(),
1182 )
1183 }
1184 }
1185 #[inline(always)]
1186 fn partial_load_u32s(self, slice: &[u32]) -> Self::u32s {
1187 unsafe {
1188 self.mask_load_ptr_u32s(
1189 self.mask_between_m32s(0, slice.len() as u32),
1190 slice.as_ptr(),
1191 )
1192 }
1193 }
1194 #[inline(always)]
1195 fn partial_load_u64s(self, slice: &[u64]) -> Self::u64s {
1196 unsafe {
1197 self.mask_load_ptr_u64s(
1198 self.mask_between_m64s(0, slice.len() as u64),
1199 slice.as_ptr(),
1200 )
1201 }
1202 }
1203
1204 #[inline(always)]
1205 fn partial_store_c32s(self, slice: &mut [c32], values: Self::c32s) {
1206 self.partial_store_f64s(bytemuck::cast_slice_mut(slice), cast(values))
1207 }
1208 #[inline(always)]
1209 fn partial_store_c64s(self, slice: &mut [c64], values: Self::c64s) {
1210 self.partial_store_f64s(bytemuck::cast_slice_mut(slice), cast(values))
1211 }
1212
1213 #[inline(always)]
1214 fn partial_store_f32s(self, slice: &mut [f32], values: Self::f32s) {
1215 self.partial_store_u32s(bytemuck::cast_slice_mut(slice), cast(values))
1216 }
1217 #[inline(always)]
1218 fn partial_store_f64s(self, slice: &mut [f64], values: Self::f64s) {
1219 self.partial_store_u64s(bytemuck::cast_slice_mut(slice), cast(values))
1220 }
1221 #[inline(always)]
1222 fn partial_store_i8s(self, slice: &mut [i8], values: Self::i8s) {
1223 self.partial_store_u16s(bytemuck::cast_slice_mut(slice), cast(values))
1224 }
1225 #[inline(always)]
1226 fn partial_store_i16s(self, slice: &mut [i16], values: Self::i16s) {
1227 self.partial_store_u16s(bytemuck::cast_slice_mut(slice), cast(values))
1228 }
1229 #[inline(always)]
1230 fn partial_store_i32s(self, slice: &mut [i32], values: Self::i32s) {
1231 self.partial_store_u32s(bytemuck::cast_slice_mut(slice), cast(values))
1232 }
1233 #[inline(always)]
1234 fn partial_store_i64s(self, slice: &mut [i64], values: Self::i64s) {
1235 self.partial_store_u64s(bytemuck::cast_slice_mut(slice), cast(values))
1236 }
1237 #[inline(always)]
1238 fn partial_store_u8s(self, slice: &mut [u8], values: Self::u8s) {
1239 unsafe {
1240 self.mask_store_ptr_u8s(
1241 self.mask_between_m8s(0, slice.len() as u8),
1242 slice.as_mut_ptr(),
1243 values,
1244 )
1245 }
1246 }
1247 #[inline(always)]
1248 fn partial_store_u16s(self, slice: &mut [u16], values: Self::u16s) {
1249 unsafe {
1250 self.mask_store_ptr_u16s(
1251 self.mask_between_m16s(0, slice.len() as u16),
1252 slice.as_mut_ptr(),
1253 values,
1254 )
1255 }
1256 }
1257 #[inline(always)]
1258 fn partial_store_u32s(self, slice: &mut [u32], values: Self::u32s) {
1259 unsafe {
1260 self.mask_store_ptr_u32s(
1261 self.mask_between_m32s(0, slice.len() as u32),
1262 slice.as_mut_ptr(),
1263 values,
1264 )
1265 }
1266 }
1267 #[inline(always)]
1268 fn partial_store_u64s(self, slice: &mut [u64], values: Self::u64s) {
1269 unsafe {
1270 self.mask_store_ptr_u64s(
1271 self.mask_between_m64s(0, slice.len() as u64),
1272 slice.as_mut_ptr(),
1273 values,
1274 )
1275 }
1276 }
1277 fn reduce_max_c32s(self, a: Self::c32s) -> c32;
1278 fn reduce_max_c64s(self, a: Self::c64s) -> c64;
1279 fn reduce_max_f32s(self, a: Self::f32s) -> f32;
1280 fn reduce_max_f64s(self, a: Self::f64s) -> f64;
1281 fn reduce_min_c32s(self, a: Self::c32s) -> c32;
1282 fn reduce_min_c64s(self, a: Self::c64s) -> c64;
1283 fn reduce_min_f32s(self, a: Self::f32s) -> f32;
1284 fn reduce_min_f64s(self, a: Self::f64s) -> f64;
1285
1286 fn reduce_product_f32s(self, a: Self::f32s) -> f32;
1287 fn reduce_product_f64s(self, a: Self::f64s) -> f64;
1288 fn reduce_sum_c32s(self, a: Self::c32s) -> c32;
1289 fn reduce_sum_c64s(self, a: Self::c64s) -> c64;
1290
1291 fn reduce_sum_f32s(self, a: Self::f32s) -> f32;
1292 fn reduce_sum_f64s(self, a: Self::f64s) -> f64;
1293 #[inline(always)]
1294 fn rotate_left_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s {
1295 self.rotate_right_c32s(a, amount.wrapping_neg())
1296 }
1297 #[inline(always)]
1298 fn rotate_left_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s {
1299 self.rotate_right_c64s(a, amount.wrapping_neg())
1300 }
1301
1302 #[inline(always)]
1303 fn rotate_left_f32s(self, a: Self::f32s, amount: usize) -> Self::f32s {
1304 cast(self.rotate_left_u32s(cast(a), amount))
1305 }
1306 #[inline(always)]
1307 fn rotate_left_f64s(self, a: Self::f64s, amount: usize) -> Self::f64s {
1308 cast(self.rotate_left_u64s(cast(a), amount))
1309 }
1310 #[inline(always)]
1311 fn rotate_left_i32s(self, a: Self::i32s, amount: usize) -> Self::i32s {
1312 cast(self.rotate_left_u32s(cast(a), amount))
1313 }
1314
1315 #[inline(always)]
1316 fn rotate_left_i64s(self, a: Self::i64s, amount: usize) -> Self::i64s {
1317 cast(self.rotate_left_u64s(cast(a), amount))
1318 }
1319
1320 #[inline(always)]
1321 fn rotate_left_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s {
1322 self.rotate_right_u32s(a, amount.wrapping_neg())
1323 }
1324 #[inline(always)]
1325 fn rotate_left_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s {
1326 self.rotate_right_u64s(a, amount.wrapping_neg())
1327 }
1328 fn rotate_right_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s;
1329 fn rotate_right_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s;
1330 #[inline(always)]
1331 fn rotate_right_f32s(self, a: Self::f32s, amount: usize) -> Self::f32s {
1332 cast(self.rotate_right_u32s(cast(a), amount))
1333 }
1334 #[inline(always)]
1335 fn rotate_right_f64s(self, a: Self::f64s, amount: usize) -> Self::f64s {
1336 cast(self.rotate_right_u64s(cast(a), amount))
1337 }
1338 #[inline(always)]
1339 fn rotate_right_i32s(self, a: Self::i32s, amount: usize) -> Self::i32s {
1340 cast(self.rotate_right_u32s(cast(a), amount))
1341 }
1342 #[inline(always)]
1343 fn rotate_right_i64s(self, a: Self::i64s, amount: usize) -> Self::i64s {
1344 cast(self.rotate_right_u64s(cast(a), amount))
1345 }
1346 fn rotate_right_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s;
1347 fn rotate_right_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s;
1348
1349 #[inline]
1350 fn select_f32s(
1351 self,
1352 mask: Self::m32s,
1353 if_true: Self::f32s,
1354 if_false: Self::f32s,
1355 ) -> Self::f32s {
1356 self.transmute_f32s_u32s(self.select_u32s(
1357 mask,
1358 self.transmute_u32s_f32s(if_true),
1359 self.transmute_u32s_f32s(if_false),
1360 ))
1361 }
1362 #[inline]
1363 fn select_f64s(
1364 self,
1365 mask: Self::m64s,
1366 if_true: Self::f64s,
1367 if_false: Self::f64s,
1368 ) -> Self::f64s {
1369 self.transmute_f64s_u64s(self.select_u64s(
1370 mask,
1371 self.transmute_u64s_f64s(if_true),
1372 self.transmute_u64s_f64s(if_false),
1373 ))
1374 }
1375 #[inline]
1376 fn select_i32s(
1377 self,
1378 mask: Self::m32s,
1379 if_true: Self::i32s,
1380 if_false: Self::i32s,
1381 ) -> Self::i32s {
1382 self.transmute_i32s_u32s(self.select_u32s(
1383 mask,
1384 self.transmute_u32s_i32s(if_true),
1385 self.transmute_u32s_i32s(if_false),
1386 ))
1387 }
1388 #[inline]
1389 fn select_i64s(
1390 self,
1391 mask: Self::m64s,
1392 if_true: Self::i64s,
1393 if_false: Self::i64s,
1394 ) -> Self::i64s {
1395 self.transmute_i64s_u64s(self.select_u64s(
1396 mask,
1397 self.transmute_u64s_i64s(if_true),
1398 self.transmute_u64s_i64s(if_false),
1399 ))
1400 }
1401 fn select_u32s(self, mask: Self::m32s, if_true: Self::u32s, if_false: Self::u32s)
1402 -> Self::u32s;
1403 fn select_u64s(self, mask: Self::m64s, if_true: Self::u64s, if_false: Self::u64s)
1404 -> Self::u64s;
1405
1406 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s;
1407 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s;
1408
1409 #[inline]
1410 fn transmute_f32s_i32s(self, a: Self::i32s) -> Self::f32s {
1411 cast(a)
1412 }
1413 #[inline]
1414 fn transmute_f32s_u32s(self, a: Self::u32s) -> Self::f32s {
1415 cast(a)
1416 }
1417
1418 #[inline]
1419 fn transmute_f64s_i64s(self, a: Self::i64s) -> Self::f64s {
1420 cast(a)
1421 }
1422 #[inline]
1423 fn transmute_f64s_u64s(self, a: Self::u64s) -> Self::f64s {
1424 cast(a)
1425 }
1426 #[inline]
1427 fn transmute_i32s_f32s(self, a: Self::f32s) -> Self::i32s {
1428 cast(a)
1429 }
1430 #[inline]
1431 fn transmute_m8s_u8s(self, a: Self::u8s) -> Self::m8s {
1432 checked::cast(a)
1433 }
1434 #[inline]
1435 fn transmute_u8s_m8s(self, a: Self::m8s) -> Self::u8s {
1436 cast(a)
1437 }
1438 #[inline]
1439 fn transmute_m16s_u16s(self, a: Self::u16s) -> Self::m16s {
1440 checked::cast(a)
1441 }
1442 #[inline]
1443 fn transmute_u16s_m16s(self, a: Self::m16s) -> Self::u16s {
1444 cast(a)
1445 }
1446 #[inline]
1447 fn transmute_m32s_u32s(self, a: Self::u32s) -> Self::m32s {
1448 checked::cast(a)
1449 }
1450 #[inline]
1451 fn transmute_u32s_m32s(self, a: Self::m32s) -> Self::u32s {
1452 cast(a)
1453 }
1454 #[inline]
1455 fn transmute_m64s_u64s(self, a: Self::u64s) -> Self::m64s {
1456 checked::cast(a)
1457 }
1458 #[inline]
1459 fn transmute_u64s_m64s(self, a: Self::m64s) -> Self::u64s {
1460 cast(a)
1461 }
1462 #[inline]
1463 fn transmute_i8s_u8s(self, a: Self::u8s) -> Self::i8s {
1464 cast(a)
1465 }
1466 #[inline]
1467 fn transmute_u8s_i8s(self, a: Self::i8s) -> Self::u8s {
1468 cast(a)
1469 }
1470 #[inline]
1471 fn transmute_u16s_i16s(self, a: Self::i16s) -> Self::u16s {
1472 cast(a)
1473 }
1474 #[inline]
1475 fn transmute_i16s_u16s(self, a: Self::u16s) -> Self::i16s {
1476 cast(a)
1477 }
1478 #[inline]
1479 fn transmute_i32s_u32s(self, a: Self::u32s) -> Self::i32s {
1480 cast(a)
1481 }
1482 #[inline]
1483 fn transmute_i64s_f64s(self, a: Self::f64s) -> Self::i64s {
1484 cast(a)
1485 }
1486 #[inline]
1487 fn transmute_i64s_u64s(self, a: Self::u64s) -> Self::i64s {
1488 cast(a)
1489 }
1490
1491 #[inline]
1492 fn transmute_u32s_f32s(self, a: Self::f32s) -> Self::u32s {
1493 cast(a)
1494 }
1495 #[inline]
1496 fn transmute_u32s_i32s(self, a: Self::i32s) -> Self::u32s {
1497 cast(a)
1498 }
1499 #[inline]
1500 fn transmute_u64s_f64s(self, a: Self::f64s) -> Self::u64s {
1501 cast(a)
1502 }
1503 #[inline]
1504 fn transmute_u64s_i64s(self, a: Self::i64s) -> Self::u64s {
1505 cast(a)
1506 }
1507
1508 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output;
1509 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s);
1510 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s;
1511 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s;
1512}
1513
1514pub trait PortableSimd: Simd {}
1515
1516impl PortableSimd for Scalar {}
1517impl PortableSimd for Scalar128b {}
1518impl PortableSimd for Scalar256b {}
1519impl PortableSimd for Scalar512b {}
1520
1521#[derive(Debug, Copy, Clone)]
1522pub struct Scalar;
1523
1524#[derive(Debug, Copy, Clone)]
1525pub struct Scalar128b;
1526#[derive(Debug, Copy, Clone)]
1527pub struct Scalar256b;
1528#[derive(Debug, Copy, Clone)]
1529pub struct Scalar512b;
1530
1531macro_rules! scalar_simd_binop_impl {
1532 ($func: ident, $op: ident, $ty: ty) => {
1533 paste! {
1534 #[inline]
1535 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>],) -> Self::[<$ty s>] {
1536 let mut out = [<$ty as Default>::default(); Self::[<$ty:upper _LANES>]];
1537 let a: [$ty; Self::[<$ty:upper _LANES>]] = cast(a);
1538 let b: [$ty; Self::[<$ty:upper _LANES>]] = cast(b);
1539
1540 for i in 0..Self::[<$ty:upper _LANES>] {
1541 out[i] = a[i].$op(b[i]);
1542 }
1543
1544 cast(out)
1545 }
1546 }
1547 };
1548}
1549
1550macro_rules! scalar_simd_binop {
1551 ($func: ident, op $op: ident, $($ty: ty),*) => {
1552 $(scalar_simd_binop_impl!($func, $op, $ty);)*
1553 };
1554 ($func: ident, $($ty: ty),*) => {
1555 $(scalar_simd_binop_impl!($func, $func, $ty);)*
1556 };
1557}
1558
1559macro_rules! scalar_simd_unop_impl {
1560 ($func: ident, $op: ident, $ty: ty) => {
1561 paste! {
1562 #[inline]
1563 fn [<$func _ $ty s>](self, a: Self::[<$ty s>]) -> Self::[<$ty s>] {
1564 let mut out = [<$ty as Default>::default(); Self::[<$ty:upper _LANES>]];
1565 let a: [$ty; Self::[<$ty:upper _LANES>]] = cast(a);
1566
1567 for i in 0..Self::[<$ty:upper _LANES>] {
1568 out[i] = a[i].$op();
1569 }
1570
1571 cast(out)
1572 }
1573 }
1574 };
1575}
1576
1577macro_rules! scalar_simd_unop {
1578 ($func: ident, $($ty: ty),*) => {
1579 $(scalar_simd_unop_impl!($func, $func, $ty);)*
1580 };
1581}
1582
1583macro_rules! scalar_simd_cmp {
1584 ($func: ident, $op: ident, $ty: ty, $mask: ty) => {
1585 paste! {
1586 #[inline]
1587 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$mask s>] {
1588 let mut out = [$mask::new(false); Self::[<$ty:upper _LANES>]];
1589 let a: [$ty; Self::[<$ty:upper _LANES>]] = cast(a);
1590 let b: [$ty; Self::[<$ty:upper _LANES>]] = cast(b);
1591 for i in 0..Self::[<$ty:upper _LANES>] {
1592 out[i] = $mask::new(a[i].$op(&b[i]));
1593 }
1594 cast(out)
1595 }
1596 }
1597 };
1598 ($func: ident, op $op: ident, $($ty: ty => $mask: ty),*) => {
1599 $(scalar_simd_cmp!($func, $op, $ty, $mask);)*
1600 };
1601 ($func: ident, $($ty: ty => $mask: ty),*) => {
1602 $(scalar_simd_cmp!($func, $func, $ty, $mask);)*
1603 };
1604}
1605
1606macro_rules! scalar_splat {
1607 ($ty: ident) => {
1608 paste! {
1609 #[inline]
1610 fn [<splat_ $ty s>](self, value: $ty) -> Self::[<$ty s>] {
1611 cast([value; Self::[<$ty:upper _LANES>]])
1612 }
1613 }
1614 };
1615 ($($ty: ident),*) => {
1616 $(scalar_splat!($ty);)*
1617 };
1618}
1619
1620macro_rules! scalar_partial_load {
1621 ($ty: ident) => {
1622 paste! {
1623 #[inline]
1624 fn [<partial_load_ $ty s>](self, slice: &[$ty]) -> Self::[<$ty s>] {
1625 let mut values = [<$ty as Default>::default(); Self::[<$ty:upper _LANES>]];
1626 for i in 0..Ord::min(values.len(), slice.len()) {
1627 values[i] = slice[i];
1628 }
1629 cast(values)
1630 }
1631 }
1632 };
1633 ($($ty: ident),*) => {
1634 $(scalar_partial_load!($ty);)*
1635 };
1636}
1637
1638macro_rules! scalar_partial_store {
1639 ($ty: ident) => {
1640 paste! {
1641 #[inline]
1642 fn [<partial_store_ $ty s>](self, slice: &mut [$ty], values: Self::[<$ty s>]) {
1643 let values: [$ty; Self::[<$ty:upper _LANES>]] = cast(values);
1644 for i in 0..Ord::min(values.len(), slice.len()) {
1645 slice[i] = values[i];
1646 }
1647 }
1648 }
1649 };
1650 ($($ty: ident),*) => {
1651 $(scalar_partial_store!($ty);)*
1652 };
1653}
1654
1655macro_rules! mask_load_ptr {
1656 ($ty: ident, $mask: ident) => {
1657 paste! {
1658 #[inline]
1659 unsafe fn [<mask_load_ptr_ $ty s>](
1660 self,
1661 mask: MemMask<Self::[<$mask s>]>,
1662 ptr: *const $ty,
1663 ) -> Self::[<$ty s>] {
1664 let mut values = [<$ty as Default>::default(); Self::[<$ty:upper _LANES>]];
1665 let mask: [$mask; Self::[<$ty:upper _LANES>]] = cast(mask.mask());
1666 for i in 0..Self::[<$ty:upper _LANES>] {
1667 if mask[i].is_set() {
1668 values[i] = *ptr.add(i);
1669 }
1670 }
1671 cast(values)
1672 }
1673 }
1674 };
1675 (cast $ty: ident, $to: ident, $mask: ident) => {
1676 paste! {
1677 #[inline]
1678 unsafe fn [<mask_load_ptr_ $ty s>](
1679 self,
1680 mask: MemMask<Self::[<$mask s>]>,
1681 ptr: *const $ty,
1682 ) -> Self::[<$ty s>] {
1683 cast(self.[<mask_load_ptr_ $to s>](mask, ptr as *const $to))
1684 }
1685 }
1686 };
1687 ($($ty: ident: $mask: ident),*) => {
1688 $(mask_load_ptr!($ty, $mask);)*
1689 };
1690 (cast $($ty: ident: $mask: ident => $to: ident),*) => {
1691 $(mask_load_ptr!(cast $ty, $to, $mask);)*
1692 };
1693}
1694
1695macro_rules! mask_store_ptr {
1696 ($ty: ident, $mask: ident) => {
1697 paste! {
1698 #[inline]
1699 unsafe fn [<mask_store_ptr_ $ty s>](
1700 self,
1701 mask: MemMask<Self::[<$mask s>]>,
1702 ptr: *mut $ty,
1703 values: Self::[<$ty s>],
1704 ) {
1705 let mask: [$mask; Self::[<$ty:upper _LANES>]] = cast(mask.mask());
1706 let values: [$ty; Self::[<$ty:upper _LANES>]] = cast(values);
1707 for i in 0..Self::[<$ty:upper _LANES>] {
1708 if mask[i].is_set() {
1709 *ptr.add(i) = values[i];
1710 }
1711 }
1712 }
1713 }
1714 };
1715 (cast $ty: ident, $to: ident, $mask: ident) => {
1716 paste! {
1717 #[inline]
1718 unsafe fn [<mask_store_ptr_ $ty s>](
1719 self,
1720 mask: MemMask<Self::[<$mask s>]>,
1721 ptr: *mut $ty,
1722 values: Self::[<$ty s>],
1723 ) {
1724 self.[<mask_store_ptr_ $to s>](mask, ptr as *mut $to, cast(values));
1725 }
1726 }
1727 };
1728 ($($ty: ident: $mask: ident),*) => {
1729 $(mask_store_ptr!($ty, $mask);)*
1730 };
1731 (cast $($ty: ident: $mask: ident => $to: ident),*) => {
1732 $(mask_store_ptr!(cast $ty, $to, $mask);)*
1733 };
1734}
1735
1736macro_rules! scalar_simd {
1737 ($ty: ty, $register_count: expr, $m8s: ty, $i8s: ty, $u8s: ty, $m16s: ty, $i16s: ty, $u16s: ty, $m32s: ty, $f32s: ty, $i32s: ty, $u32s: ty, $m64s: ty, $f64s: ty, $i64s: ty, $u64s: ty $(,)?) => {
1738 impl Seal for $ty {}
1739 impl Simd for $ty {
1740 type m8s = $m8s;
1741 type m16s = $m16s;
1742 type c32s = $f32s;
1743 type c64s = $f64s;
1744 type f32s = $f32s;
1745 type f64s = $f64s;
1746 type i16s = $i16s;
1747 type i32s = $i32s;
1748 type i64s = $i64s;
1749 type i8s = $i8s;
1750 type m32s = $m32s;
1751 type m64s = $m64s;
1752 type u16s = $u16s;
1753 type u32s = $u32s;
1754 type u64s = $u64s;
1755 type u8s = $u8s;
1756
1757 const REGISTER_COUNT: usize = $register_count;
1758
1759 scalar_simd_binop!(min, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
1760
1761 scalar_simd_binop!(max, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
1762
1763 scalar_simd_binop!(add, c32, f32, c64, f64);
1764 scalar_simd_binop!(add, op wrapping_add, u8, i8, u16, i16, u32, i32, u64, i64);
1765 scalar_simd_binop!(sub, c32, f32, c64, f64);
1766 scalar_simd_binop!(sub, op wrapping_sub, u8, i8, u16, i16, u32, i32, u64, i64);
1767 scalar_simd_binop!(mul, c32, f32, c64, f64);
1768 scalar_simd_binop!(mul, op wrapping_mul, u16, i16, u32, i32, u64, i64);
1769 scalar_simd_binop!(div, f32, f64);
1770
1771 scalar_simd_binop!(and, op bitand, u8, u16, u32, u64);
1772 scalar_simd_binop!(or, op bitor, u8, u16, u32, u64);
1773 scalar_simd_binop!(xor, op bitxor, u8, u16, u32, u64);
1774
1775 scalar_simd_cmp!(equal, op eq, u8 => m8, u16 => m16, u32 => m32, u64 => m64, c32 => m32, f32 => m32, c64 => m64, f64 => m64);
1776 scalar_simd_cmp!(greater_than, op gt, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
1777 scalar_simd_cmp!(greater_than_or_equal, op ge, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
1778 scalar_simd_cmp!(less_than_or_equal, op le, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
1779 scalar_simd_cmp!(less_than, op lt, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
1780
1781 scalar_simd_unop!(not, m8, u8, m16, u16, m32, u32, m64, u64);
1782
1783 scalar_splat!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
1784
1785 scalar_partial_load!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
1786 scalar_partial_store!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
1787
1788 mask_load_ptr!(u8: m8, u16: m16, u32: m32, u64: m64);
1789 mask_load_ptr!(cast i8: m8 => u8, i16: m16 => u16, i32: m32 => u32, i64: m64 => u64, c32: m32 => u32, f32: m32 => u32, c64: m64 => u64, f64: m64 => u64);
1790 mask_store_ptr!(u8: m8, u16: m16, u32: m32, u64: m64);
1791 mask_store_ptr!(cast i8: m8 => u8, i16: m16 => u16, i32: m32 => u32, i64: m64 => u64, c32: m32 => u32, f32: m32 => u32, c64: m64 => u64, f64: m64 => u64);
1792
1793 #[inline]
1794 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output {
1795 op.with_simd(self)
1796 }
1797
1798 #[inline]
1799 fn and_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1800 let mut out = [m32::new(false); Self::F32_LANES];
1801 let a: [m32; Self::F32_LANES] = cast(a);
1802 let b: [m32; Self::F32_LANES] = cast(b);
1803 for i in 0..Self::F32_LANES {
1804 out[i] = a[i] & b[i];
1805 }
1806 cast(out)
1807 }
1808
1809 #[inline]
1810 fn or_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1811 let mut out = [m32::new(false); Self::F32_LANES];
1812 let a: [m32; Self::F32_LANES] = cast(a);
1813 let b: [m32; Self::F32_LANES] = cast(b);
1814 for i in 0..Self::F32_LANES {
1815 out[i] = a[i] | b[i];
1816 }
1817 cast(out)
1818 }
1819
1820 #[inline]
1821 fn xor_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1822 let mut out = [m32::new(false); Self::F32_LANES];
1823 let a: [m32; Self::F32_LANES] = cast(a);
1824 let b: [m32; Self::F32_LANES] = cast(b);
1825 for i in 0..Self::F32_LANES {
1826 out[i] = a[i] ^ b[i];
1827 }
1828 cast(out)
1829 }
1830
1831 #[inline]
1832 fn and_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1833 let mut out = [m64::new(false); Self::F64_LANES];
1834 let a: [m64; Self::F64_LANES] = cast(a);
1835 let b: [m64; Self::F64_LANES] = cast(b);
1836 for i in 0..Self::F64_LANES {
1837 out[i] = a[i] & b[i];
1838 }
1839 cast(out)
1840 }
1841
1842 #[inline]
1843 fn or_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1844 let mut out = [m64::new(false); Self::F64_LANES];
1845 let a: [m64; Self::F64_LANES] = cast(a);
1846 let b: [m64; Self::F64_LANES] = cast(b);
1847 for i in 0..Self::F64_LANES {
1848 out[i] = a[i] | b[i];
1849 }
1850 cast(out)
1851 }
1852
1853 #[inline]
1854 fn xor_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1855 let mut out = [m64::new(false); Self::F64_LANES];
1856 let a: [m64; Self::F64_LANES] = cast(a);
1857 let b: [m64; Self::F64_LANES] = cast(b);
1858 for i in 0..Self::F64_LANES {
1859 out[i] = a[i] ^ b[i];
1860 }
1861 cast(out)
1862 }
1863
1864 #[inline]
1865 fn select_u32s(
1866 self,
1867 mask: Self::m32s,
1868 if_true: Self::u32s,
1869 if_false: Self::u32s,
1870 ) -> Self::u32s {
1871 let mut out = [0u32; Self::F32_LANES];
1872 let mask: [m32; Self::F32_LANES] = cast(mask);
1873 let if_true: [u32; Self::F32_LANES] = cast(if_true);
1874 let if_false: [u32; Self::F32_LANES] = cast(if_false);
1875
1876 for i in 0..Self::F32_LANES {
1877 out[i] = if mask[i].is_set() {
1878 if_true[i]
1879 } else {
1880 if_false[i]
1881 };
1882 }
1883
1884 cast(out)
1885 }
1886
1887 #[inline]
1888 fn select_u64s(
1889 self,
1890 mask: Self::m64s,
1891 if_true: Self::u64s,
1892 if_false: Self::u64s,
1893 ) -> Self::u64s {
1894 let mut out = [0u64; Self::F64_LANES];
1895 let mask: [m64; Self::F64_LANES] = cast(mask);
1896 let if_true: [u64; Self::F64_LANES] = cast(if_true);
1897 let if_false: [u64; Self::F64_LANES] = cast(if_false);
1898
1899 for i in 0..Self::F64_LANES {
1900 out[i] = if mask[i].is_set() {
1901 if_true[i]
1902 } else {
1903 if_false[i]
1904 };
1905 }
1906
1907 cast(out)
1908 }
1909
1910 #[inline]
1911 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
1912 let mut out = [0u32; Self::F32_LANES];
1913 let a: [u32; Self::F32_LANES] = cast(a);
1914 let b: [u32; Self::F32_LANES] = cast(amount);
1915 for i in 0..Self::F32_LANES {
1916 out[i] = a[i].wrapping_shl(b[i]);
1917 }
1918 cast(out)
1919 }
1920
1921 #[inline]
1922 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
1923 let mut out = [0u32; Self::F32_LANES];
1924 let a: [u32; Self::F32_LANES] = cast(a);
1925 let b: [u32; Self::F32_LANES] = cast(amount);
1926 for i in 0..Self::F32_LANES {
1927 out[i] = a[i].wrapping_shr(b[i]);
1928 }
1929 cast(out)
1930 }
1931
1932 #[inline]
1933 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s) {
1934 let mut lo = [0u32; Self::F32_LANES];
1935 let mut hi = [0u32; Self::F32_LANES];
1936 let a: [u32; Self::F32_LANES] = cast(a);
1937 let b: [u32; Self::F32_LANES] = cast(b);
1938 for i in 0..Self::F32_LANES {
1939 let m = a[i] as u64 * b[i] as u64;
1940
1941 (lo[i], hi[i]) = (m as u32, (m >> 32) as u32);
1942 }
1943 (cast(lo), cast(hi))
1944 }
1945
1946 #[inline]
1947 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
1948 let mut out = [0.0f32; Self::F32_LANES];
1949 let a: [f32; Self::F32_LANES] = cast(a);
1950 let b: [f32; Self::F32_LANES] = cast(b);
1951 let c: [f32; Self::F32_LANES] = cast(c);
1952
1953 for i in 0..Self::F32_LANES {
1954 out[i] = fma_f32(a[i], b[i], c[i]);
1955 }
1956
1957 cast(out)
1958 }
1959
1960 #[inline]
1961 fn reduce_sum_f32s(self, a: Self::f32s) -> f32 {
1962 let mut a: [f32; Self::F32_LANES] = cast(a);
1963
1964 let mut n = Self::F32_LANES;
1965 while n > 1 {
1966 n /= 2;
1967 for i in 0..n {
1968 a[i] += a[i + n];
1969 }
1970 }
1971
1972 a[0]
1973 }
1974
1975 #[inline]
1976 fn reduce_product_f32s(self, a: Self::f32s) -> f32 {
1977 let mut a: [f32; Self::F32_LANES] = cast(a);
1978
1979 let mut n = Self::F32_LANES;
1980 while n > 1 {
1981 n /= 2;
1982 for i in 0..n {
1983 a[i] *= a[i + n];
1984 }
1985 }
1986
1987 a[0]
1988 }
1989
1990 #[inline]
1991 fn reduce_min_f32s(self, a: Self::f32s) -> f32 {
1992 let mut a: [f32; Self::F32_LANES] = cast(a);
1993
1994 let mut n = Self::F32_LANES;
1995 while n > 1 {
1996 n /= 2;
1997 for i in 0..n {
1998 a[i] = f32::min(a[i], a[i + n]);
1999 }
2000 }
2001
2002 a[0]
2003 }
2004
2005 #[inline]
2006 fn reduce_max_f32s(self, a: Self::f32s) -> f32 {
2007 let mut a: [f32; Self::F32_LANES] = cast(a);
2008
2009 let mut n = Self::F32_LANES;
2010 while n > 1 {
2011 n /= 2;
2012 for i in 0..n {
2013 a[i] = f32::max(a[i], a[i + n]);
2014 }
2015 }
2016
2017 a[0]
2018 }
2019
2020 #[inline]
2021 fn splat_c32s(self, value: c32) -> Self::c32s {
2022 cast([value; Self::C32_LANES])
2023 }
2024
2025 #[inline]
2026 fn conj_c32s(self, a: Self::c32s) -> Self::c32s {
2027 let mut out = [c32::ZERO; Self::C32_LANES];
2028 let a: [c32; Self::C32_LANES] = cast(a);
2029
2030 for i in 0..Self::C32_LANES {
2031 out[i] = c32::new(a[i].re, -a[i].im);
2032 }
2033
2034 cast(out)
2035 }
2036
2037 #[inline]
2038 fn neg_c32s(self, a: Self::c32s) -> Self::c32s {
2039 let mut out = [c32::ZERO; Self::C32_LANES];
2040 let a: [c32; Self::C32_LANES] = cast(a);
2041
2042 for i in 0..Self::C32_LANES {
2043 out[i] = c32::new(-a[i].re, -a[i].im);
2044 }
2045
2046 cast(out)
2047 }
2048
2049 #[inline]
2050 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s {
2051 let mut out = [c32::ZERO; Self::C32_LANES];
2052 let a: [c32; Self::C32_LANES] = cast(a);
2053
2054 for i in 0..Self::C32_LANES {
2055 out[i] = c32::new(a[i].im, a[i].re);
2056 }
2057
2058 cast(out)
2059 }
2060
2061 #[inline]
2062 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2063 let mut out = [c32::ZERO; Self::C32_LANES];
2064 let a: [c32; Self::C32_LANES] = cast(a);
2065 let b: [c32; Self::C32_LANES] = cast(b);
2066
2067 for i in 0..Self::C32_LANES {
2068 out[i].re = fma_f32(a[i].re, b[i].re, a[i].im * b[i].im);
2069 out[i].im = fma_f32(a[i].re, b[i].im, -(a[i].im * b[i].re));
2070 }
2071
2072 cast(out)
2073 }
2074
2075 #[inline]
2076 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2077 let mut out = [c32::ZERO; Self::C32_LANES];
2078 let a: [c32; Self::C32_LANES] = cast(a);
2079 let b: [c32; Self::C32_LANES] = cast(b);
2080 let c: [c32; Self::C32_LANES] = cast(c);
2081
2082 for i in 0..Self::C32_LANES {
2083 out[i].re = fma_f32(a[i].re, b[i].re, -fma_f32(a[i].im, b[i].im, -c[i].re));
2084 out[i].im = fma_f32(a[i].re, b[i].im, fma_f32(a[i].im, b[i].re, c[i].im));
2085 }
2086
2087 cast(out)
2088 }
2089
2090 #[inline]
2091 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2092 let mut out = [c32::ZERO; Self::C32_LANES];
2093 let a: [c32; Self::C32_LANES] = cast(a);
2094 let b: [c32; Self::C32_LANES] = cast(b);
2095 let c: [c32; Self::C32_LANES] = cast(c);
2096
2097 for i in 0..Self::C32_LANES {
2098 out[i].re = fma_f32(a[i].re, b[i].re, fma_f32(a[i].im, b[i].im, c[i].re));
2099 out[i].im = fma_f32(a[i].re, b[i].im, -fma_f32(a[i].im, b[i].re, -c[i].im));
2100 }
2101
2102 cast(out)
2103 }
2104
2105 #[inline]
2106 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s {
2107 let mut out = [c32::ZERO; Self::C32_LANES];
2108 let a: [c32; Self::C32_LANES] = cast(a);
2109
2110 for i in 0..Self::C32_LANES {
2111 let x = a[i].re * a[i].re + a[i].im * a[i].im;
2112 out[i].re = x;
2113 out[i].im = x;
2114 }
2115
2116 cast(out)
2117 }
2118
2119 #[inline]
2120 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s {
2121 let mut out = [c32::ZERO; Self::C32_LANES];
2122 let a: [c32; Self::C32_LANES] = cast(self.abs_f32s(a));
2123
2124 for i in 0..Self::C32_LANES {
2125 let x = f32::max(a[i].re, a[i].im);
2126 out[i].re = x;
2127 out[i].im = x;
2128 }
2129
2130 cast(out)
2131 }
2132
2133 #[inline]
2134 fn reduce_sum_c32s(self, a: Self::c32s) -> c32 {
2135 let mut a: [c32; Self::C32_LANES] = cast(a);
2136
2137 let mut n = Self::C32_LANES;
2138 while n > 1 {
2139 n /= 2;
2140 for i in 0..n {
2141 a[i].re += a[i + n].re;
2142 a[i].im += a[i + n].im;
2143 }
2144 }
2145
2146 a[0]
2147 }
2148
2149 #[inline]
2150 fn reduce_min_c32s(self, a: Self::c32s) -> c32 {
2151 let mut a: [c32; Self::C32_LANES] = cast(a);
2152
2153 let mut n = Self::C32_LANES;
2154 while n > 1 {
2155 n /= 2;
2156 for i in 0..n {
2157 a[i].re = f32::min(a[i].re, a[i + n].re);
2158 a[i].im = f32::min(a[i].im, a[i + n].im);
2159 }
2160 }
2161
2162 a[0]
2163 }
2164
2165 #[inline]
2166 fn reduce_max_c32s(self, a: Self::c32s) -> c32 {
2167 let mut a: [c32; Self::C32_LANES] = cast(a);
2168
2169 let mut n = Self::C32_LANES;
2170 while n > 1 {
2171 n /= 2;
2172 for i in 0..n {
2173 a[i].re = f32::max(a[i].re, a[i + n].re);
2174 a[i].im = f32::max(a[i].im, a[i + n].im);
2175 }
2176 }
2177
2178 a[0]
2179 }
2180
2181 #[inline]
2182 fn rotate_right_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s {
2183 let mut a: [u32; Self::F32_LANES] = cast(a);
2184 let amount = amount % Self::F32_LANES;
2185 a.rotate_right(amount);
2186 cast(a)
2187 }
2188
2189 #[inline]
2190 fn rotate_right_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s {
2191 let mut a: [c32; Self::C32_LANES] = cast(a);
2192 let amount = amount % Self::C32_LANES;
2193 a.rotate_right(amount);
2194 cast(a)
2195 }
2196
2197 #[inline]
2198 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2199 let mut out = [0.0f64; Self::F64_LANES];
2200 let a: [f64; Self::F64_LANES] = cast(a);
2201 let b: [f64; Self::F64_LANES] = cast(b);
2202 let c: [f64; Self::F64_LANES] = cast(c);
2203
2204 for i in 0..Self::F64_LANES {
2205 out[i] = fma_f64(a[i], b[i], c[i]);
2206 }
2207
2208 cast(out)
2209 }
2210
2211 #[inline]
2212 fn reduce_sum_f64s(self, a: Self::f64s) -> f64 {
2213 let mut a: [f64; Self::F64_LANES] = cast(a);
2214
2215 let mut n = Self::F64_LANES;
2216 while n > 1 {
2217 n /= 2;
2218 for i in 0..n {
2219 a[i] += a[i + n];
2220 }
2221 }
2222
2223 a[0]
2224 }
2225
2226 #[inline]
2227 fn reduce_product_f64s(self, a: Self::f64s) -> f64 {
2228 let mut a: [f64; Self::F64_LANES] = cast(a);
2229
2230 let mut n = Self::F64_LANES;
2231 while n > 1 {
2232 n /= 2;
2233 for i in 0..n {
2234 a[i] *= a[i + n];
2235 }
2236 }
2237
2238 a[0]
2239 }
2240
2241 #[inline]
2242 fn reduce_min_f64s(self, a: Self::f64s) -> f64 {
2243 let mut a: [f64; Self::F64_LANES] = cast(a);
2244
2245 let mut n = Self::F64_LANES;
2246 while n > 1 {
2247 n /= 2;
2248 for i in 0..n {
2249 a[i] = f64::min(a[i], a[i + n]);
2250 }
2251 }
2252
2253 a[0]
2254 }
2255
2256 #[inline]
2257 fn reduce_max_f64s(self, a: Self::f64s) -> f64 {
2258 let mut a: [f64; Self::F64_LANES] = cast(a);
2259
2260 let mut n = Self::F64_LANES;
2261 while n > 1 {
2262 n /= 2;
2263 for i in 0..n {
2264 a[i] = f64::max(a[i], a[i + n]);
2265 }
2266 }
2267
2268 a[0]
2269 }
2270
2271 #[inline]
2272 fn splat_c64s(self, value: c64) -> Self::c64s {
2273 cast([value; Self::C64_LANES])
2274 }
2275
2276 #[inline]
2277 fn conj_c64s(self, a: Self::c64s) -> Self::c64s {
2278 let mut out = [c64::ZERO; Self::C64_LANES];
2279 let a: [c64; Self::C64_LANES] = cast(a);
2280
2281 for i in 0..Self::C64_LANES {
2282 out[i] = c64::new(a[i].re, -a[i].im);
2283 }
2284
2285 cast(out)
2286 }
2287
2288 #[inline]
2289 fn neg_c64s(self, a: Self::c64s) -> Self::c64s {
2290 let mut out = [c64::ZERO; Self::C64_LANES];
2291 let a: [c64; Self::C64_LANES] = cast(a);
2292
2293 for i in 0..Self::C64_LANES {
2294 out[i] = c64::new(-a[i].re, -a[i].im);
2295 }
2296
2297 cast(out)
2298 }
2299
2300 #[inline]
2301 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s {
2302 let mut out = [c64::ZERO; Self::C64_LANES];
2303 let a: [c64; Self::C64_LANES] = cast(a);
2304
2305 for i in 0..Self::C64_LANES {
2306 out[i] = c64::new(a[i].im, a[i].re);
2307 }
2308
2309 cast(out)
2310 }
2311
2312 #[inline]
2313 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2314 let mut out = [c64::ZERO; Self::C64_LANES];
2315 let a: [c64; Self::C64_LANES] = cast(a);
2316 let b: [c64; Self::C64_LANES] = cast(b);
2317
2318 for i in 0..Self::C64_LANES {
2319 out[i].re = fma_f64(a[i].re, b[i].re, a[i].im * b[i].im);
2320 out[i].im = fma_f64(a[i].re, b[i].im, -(a[i].im * b[i].re));
2321 }
2322
2323 cast(out)
2324 }
2325
2326 #[inline]
2327 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2328 let mut out = [c64::ZERO; Self::C64_LANES];
2329 let a: [c64; Self::C64_LANES] = cast(a);
2330 let b: [c64; Self::C64_LANES] = cast(b);
2331 let c: [c64; Self::C64_LANES] = cast(c);
2332
2333 for i in 0..Self::C64_LANES {
2334 out[i].re = fma_f64(a[i].re, b[i].re, -fma_f64(a[i].im, b[i].im, -c[i].re));
2335 out[i].im = fma_f64(a[i].re, b[i].im, fma_f64(a[i].im, b[i].re, c[i].im));
2336 }
2337
2338 cast(out)
2339 }
2340
2341 #[inline]
2342 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2343 let mut out = [c64::ZERO; Self::C64_LANES];
2344 let a: [c64; Self::C64_LANES] = cast(a);
2345 let b: [c64; Self::C64_LANES] = cast(b);
2346 let c: [c64; Self::C64_LANES] = cast(c);
2347
2348 for i in 0..Self::C64_LANES {
2349 out[i].re = fma_f64(a[i].re, b[i].re, fma_f64(a[i].im, b[i].im, c[i].re));
2350 out[i].im = fma_f64(a[i].re, b[i].im, -fma_f64(a[i].im, b[i].re, -c[i].im));
2351 }
2352
2353 cast(out)
2354 }
2355
2356 #[inline]
2357 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s {
2358 let mut out = [c64::ZERO; Self::C64_LANES];
2359 let a: [c64; Self::C64_LANES] = cast(a);
2360
2361 for i in 0..Self::C64_LANES {
2362 let x = a[i].re * a[i].re + a[i].im * a[i].im;
2363 out[i].re = x;
2364 out[i].im = x;
2365 }
2366
2367 cast(out)
2368 }
2369
2370 #[inline]
2371 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s {
2372 let mut out = [c64::ZERO; Self::C64_LANES];
2373 let a: [c64; Self::C64_LANES] = cast(self.abs_f64s(a));
2374
2375 for i in 0..Self::C64_LANES {
2376 let x = f64::max(a[i].re, a[i].im);
2377 out[i].re = x;
2378 out[i].im = x;
2379 }
2380
2381 cast(out)
2382 }
2383
2384 #[inline]
2385 fn reduce_sum_c64s(self, a: Self::c64s) -> c64 {
2386 let mut a: [c64; Self::C64_LANES] = cast(a);
2387
2388 let mut n = Self::C64_LANES;
2389 while n > 1 {
2390 n /= 2;
2391 for i in 0..n {
2392 a[i].re += a[i + n].re;
2393 a[i].im += a[i + n].im;
2394 }
2395 }
2396
2397 a[0]
2398 }
2399
2400 #[inline]
2401 fn reduce_min_c64s(self, a: Self::c64s) -> c64 {
2402 let mut a: [c64; Self::C64_LANES] = cast(a);
2403
2404 let mut n = Self::C64_LANES;
2405 while n > 1 {
2406 n /= 2;
2407 for i in 0..n {
2408 a[i].re = f64::min(a[i].re, a[i + n].re);
2409 a[i].im = f64::min(a[i].im, a[i + n].im);
2410 }
2411 }
2412
2413 a[0]
2414 }
2415
2416 #[inline]
2417 fn reduce_max_c64s(self, a: Self::c64s) -> c64 {
2418 let mut a: [c64; Self::C64_LANES] = cast(a);
2419
2420 let mut n = Self::C64_LANES;
2421 while n > 1 {
2422 n /= 2;
2423 for i in 0..n {
2424 a[i].re = f64::max(a[i].re, a[i + n].re);
2425 a[i].im = f64::max(a[i].im, a[i + n].im);
2426 }
2427 }
2428
2429 a[0]
2430 }
2431
2432 #[inline]
2433 fn rotate_right_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s {
2434 let mut a: [u64; Self::F64_LANES] = cast(a);
2435 let amount = amount % Self::F64_LANES;
2436 a.rotate_right(amount);
2437 cast(a)
2438 }
2439
2440 #[inline]
2441 fn rotate_right_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s {
2442 let mut a: [c64; Self::C64_LANES] = cast(a);
2443 let amount = amount % Self::C64_LANES;
2444 a.rotate_right(amount);
2445 cast(a)
2446 }
2447
2448 #[inline]
2449 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
2450 self.mul_add_f32s(a, b, c)
2451 }
2452
2453 #[inline]
2454 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2455 self.mul_add_f64s(a, b, c)
2456 }
2457
2458 #[inline(always)]
2459 fn sqrt_f32s(self, a: Self::f32s) -> Self::f32s {
2460 let mut out = [0.0_f32; Self::F32_LANES];
2461 let a: [f32; Self::F32_LANES] = cast(a);
2462
2463 for i in 0..Self::F32_LANES {
2464 out[i] = sqrt_f32(a[i]);
2465 }
2466
2467 cast(out)
2468 }
2469 #[inline(always)]
2470 fn sqrt_f64s(self, a: Self::f64s) -> Self::f64s {
2471 let mut out = [0.0_f64; Self::F64_LANES];
2472 let a: [f64; Self::F64_LANES] = cast(a);
2473
2474 for i in 0..Self::F64_LANES {
2475 out[i] = sqrt_f64(a[i]);
2476 }
2477
2478 cast(out)
2479 }
2480 }
2481 };
2482}
2483
2484scalar_simd!(
2485 Scalar128b, 16, m8x16, i8x16, u8x16, m16x8, i16x8, u16x8, m32x4, f32x4, i32x4, u32x4, m64x2,
2486 f64x2, i64x2, u64x2
2487);
2488scalar_simd!(
2489 Scalar256b, 16, m8x32, i8x32, u8x32, m16x16, i16x16, u16x16, m32x8, f32x8, i32x8, u32x8, m64x4,
2490 f64x4, i64x4, u64x4
2491);
2492scalar_simd!(
2493 Scalar512b, 8, m8x64, i8x64, u8x64, m16x32, i16x32, u16x32, m32x16, f32x16, i32x16, u32x16,
2494 m64x8, f64x8, i64x8, u64x8
2495);
2496
2497impl Default for Scalar {
2498 #[inline]
2499 fn default() -> Self {
2500 Self::new()
2501 }
2502}
2503
2504impl Scalar {
2505 #[inline]
2506 pub fn new() -> Self {
2507 Self
2508 }
2509}
2510
2511macro_rules! impl_primitive_binop {
2512 ($func: ident, $op: ident, $ty: ident, $out: ty) => {
2513 paste! {
2514 #[inline(always)]
2515 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$out s>] {
2516 a.$op(b)
2517 }
2518 }
2519 };
2520 (ref $func: ident, $op: ident, $ty: ident, $out: ty) => {
2521 paste! {
2522 #[inline(always)]
2523 fn [<$func _ $ty s>](self, a: Self::[<$ty s>], b: Self::[<$ty s>]) -> Self::[<$out s>] {
2524 a.$op(&b)
2525 }
2526 }
2527 };
2528}
2529
2530macro_rules! primitive_binop {
2531 (ref $func: ident, op $op: ident, $($ty: ident => $out: ty),*) => {
2532 $(impl_primitive_binop!(ref $func, $op, $ty, $out);)*
2533 };
2534 ($func: ident, $($ty: ident => $out: ty),*) => {
2535 $(impl_primitive_binop!($func, $func, $ty, $out);)*
2536 };
2537 ($func: ident, op $op: ident, $($ty: ident),*) => {
2538 $(impl_primitive_binop!($func, $op, $ty, $ty);)*
2539 };
2540 ($func: ident, $($ty: ident),*) => {
2541 $(impl_primitive_binop!($func, $func, $ty, $ty);)*
2542 };
2543}
2544
2545macro_rules! impl_primitive_unop {
2546 ($func: ident, $op: ident, $ty: ident, $out: ty) => {
2547 paste! {
2548 #[inline(always)]
2549 fn [<$func _ $ty s>](self, a: Self::[<$ty s>]) -> Self::[<$out s>] {
2550 a.$op()
2551 }
2552 }
2553 };
2554}
2555
2556macro_rules! primitive_unop {
2557 ($func: ident, $($ty: ident),*) => {
2558 $(impl_primitive_unop!($func, $func, $ty, $ty);)*
2559 };
2560}
2561
2562macro_rules! splat_primitive {
2563 ($ty: ty) => {
2564 paste! {
2565 #[inline]
2566 fn [<splat_ $ty s>](self, value: $ty) -> Self::[<$ty s>] {
2567 value
2568 }
2569 }
2570 };
2571 ($($ty: ty),*) => {
2572 $(splat_primitive!($ty);)*
2573 }
2574}
2575
2576impl Seal for Scalar {}
2577impl Simd for Scalar {
2578 type c32s = c32;
2579 type c64s = c64;
2580 type f32s = f32;
2581 type f64s = f64;
2582 type i16s = i16;
2583 type i32s = i32;
2584 type i64s = i64;
2585 type i8s = i8;
2586 type m16s = bool;
2587 type m32s = bool;
2588 type m64s = bool;
2589 type m8s = bool;
2590 type u16s = u16;
2591 type u32s = u32;
2592 type u64s = u64;
2593 type u8s = u8;
2594
2595 const IS_SCALAR: bool = true;
2596 const REGISTER_COUNT: usize = 16;
2597
2598 primitive_binop!(add, c32, f32, c64, f64);
2599
2600 primitive_binop!(add, op wrapping_add, u8, i8, u16, i16, u32, i32, u64, i64);
2601
2602 primitive_binop!(sub, c32, f32, c64, f64);
2603
2604 primitive_binop!(sub, op wrapping_sub, u8, i8, u16, i16, u32, i32, u64, i64);
2605
2606 primitive_binop!(mul, f32, f64);
2607
2608 primitive_binop!(mul, op wrapping_mul, u16, i16, u32, i32, u64, i64);
2609
2610 primitive_binop!(div, f32, f64);
2611
2612 primitive_binop!(and, op bitand, m8, u8, m16, u16, m32, u32, m64, u64);
2613
2614 primitive_binop!(or, op bitor, m8, u8, m16, u16, m32, u32, m64, u64);
2615
2616 primitive_binop!(xor, op bitxor, m8, u8, m16, u16, m32, u32, m64, u64);
2617
2618 primitive_binop!(ref equal, op eq, m8 => m8, u8 => m8, m16 => m16, u16 => m16, m32 => m32, u32 => m32, m64 => m64, u64 => m64, c32 => m32, f32 => m32, c64 => m64, f64 => m64);
2619
2620 primitive_binop!(ref greater_than, op gt, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
2621
2622 primitive_binop!(ref greater_than_or_equal, op ge, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
2623
2624 primitive_binop!(ref less_than, op lt, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
2625
2626 primitive_binop!(ref less_than_or_equal, op le, u8 => m8, i8 => m8, u16 => m16, i16 => m16, u32 => m32, i32 => m32, u64 => m64, i64 => m64, f32 => m32, f64 => m64);
2627
2628 primitive_binop!(min, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
2629
2630 primitive_binop!(max, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
2631
2632 primitive_unop!(neg, c32, c64, f32, f64);
2633
2634 primitive_unop!(not, m8, u8, m16, u16, m32, u32, m64, u64);
2635
2636 splat_primitive!(u8, i8, u16, i16, u32, i32, u64, i64, c32, f32, c64, f64);
2637
2638 #[inline]
2639 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s {
2640 let norm2 = a.re * a.re + a.im * a.im;
2641 c32::new(norm2, norm2)
2642 }
2643
2644 #[inline]
2645 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s {
2646 let norm2 = a.re * a.re + a.im * a.im;
2647 c64::new(norm2, norm2)
2648 }
2649
2650 #[inline(always)]
2651 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s {
2652 let re = if a.re > a.im { a.re } else { a.im };
2653 let im = re;
2654 Complex { re, im }
2655 }
2656
2657 #[inline(always)]
2658 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s {
2659 let re = if a.re > a.im { a.re } else { a.im };
2660 let im = re;
2661 Complex { re, im }
2662 }
2663
2664 #[inline]
2665 fn conj_c32s(self, a: Self::c32s) -> Self::c32s {
2666 a.conj()
2667 }
2668
2669 #[inline]
2670 fn conj_c64s(self, a: Self::c64s) -> Self::c64s {
2671 a.conj()
2672 }
2673
2674 #[inline]
2675 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2676 let re = fma_f32(a.re, b.re, fma_f32(a.im, b.im, c.re));
2677 let im = fma_f32(a.re, b.im, -fma_f32(a.im, b.re, -c.im));
2678 Complex { re, im }
2679 }
2680
2681 #[inline]
2682 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2683 let re = fma_f64(a.re, b.re, fma_f64(a.im, b.im, c.re));
2684 let im = fma_f64(a.re, b.im, -fma_f64(a.im, b.re, -c.im));
2685 Complex { re, im }
2686 }
2687
2688 #[inline]
2689 fn conj_mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2690 a.conj() * b + c
2691 }
2692
2693 #[inline]
2694 fn conj_mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2695 a.conj() * b + c
2696 }
2697
2698 #[inline]
2699 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2700 let re = fma_f32(a.re, b.re, a.im * b.im);
2701 let im = fma_f32(a.re, b.im, -(a.im * b.re));
2702 Complex { re, im }
2703 }
2704
2705 #[inline]
2706 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2707 let re = fma_f64(a.re, b.re, a.im * b.im);
2708 let im = fma_f64(a.re, b.im, -(a.im * b.re));
2709 Complex { re, im }
2710 }
2711
2712 #[inline]
2713 fn conj_mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2714 a.conj() * b
2715 }
2716
2717 #[inline]
2718 fn conj_mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2719 a.conj() * b
2720 }
2721
2722 #[inline(always)]
2723 fn first_true_m32s(self, mask: Self::m32s) -> usize {
2724 if mask { 0 } else { 1 }
2725 }
2726
2727 #[inline(always)]
2728 fn first_true_m64s(self, mask: Self::m64s) -> usize {
2729 if mask { 0 } else { 1 }
2730 }
2731
2732 #[inline(always)]
2733 unsafe fn mask_load_ptr_c32s(self, mask: MemMask<Self::m32s>, ptr: *const c32) -> Self::c32s {
2734 if mask.mask { *ptr } else { core::mem::zeroed() }
2735 }
2736
2737 #[inline(always)]
2738 unsafe fn mask_load_ptr_c64s(self, mask: MemMask<Self::m64s>, ptr: *const c64) -> Self::c64s {
2739 if mask.mask { *ptr } else { core::mem::zeroed() }
2740 }
2741
2742 #[inline(always)]
2743 unsafe fn mask_load_ptr_u32s(self, mask: MemMask<Self::m32s>, ptr: *const u32) -> Self::u32s {
2744 if mask.mask { *ptr } else { 0 }
2745 }
2746
2747 #[inline(always)]
2748 unsafe fn mask_load_ptr_u64s(self, mask: MemMask<Self::m64s>, ptr: *const u64) -> Self::u64s {
2749 if mask.mask { *ptr } else { 0 }
2750 }
2751
2752 #[inline(always)]
2753 unsafe fn mask_store_ptr_c32s(
2754 self,
2755 mask: MemMask<Self::m32s>,
2756 ptr: *mut c32,
2757 values: Self::c32s,
2758 ) {
2759 if mask.mask {
2760 *ptr = values
2761 }
2762 }
2763
2764 #[inline(always)]
2765 unsafe fn mask_store_ptr_c64s(
2766 self,
2767 mask: MemMask<Self::m64s>,
2768 ptr: *mut c64,
2769 values: Self::c64s,
2770 ) {
2771 if mask.mask {
2772 *ptr = values
2773 }
2774 }
2775
2776 #[inline(always)]
2777 unsafe fn mask_store_ptr_u8s(self, mask: MemMask<Self::m8s>, ptr: *mut u8, values: Self::u8s) {
2778 if mask.mask {
2779 *ptr = values
2780 }
2781 }
2782
2783 #[inline(always)]
2784 unsafe fn mask_store_ptr_u16s(
2785 self,
2786 mask: MemMask<Self::m16s>,
2787 ptr: *mut u16,
2788 values: Self::u16s,
2789 ) {
2790 if mask.mask {
2791 *ptr = values
2792 }
2793 }
2794
2795 #[inline(always)]
2796 unsafe fn mask_store_ptr_u32s(
2797 self,
2798 mask: MemMask<Self::m32s>,
2799 ptr: *mut u32,
2800 values: Self::u32s,
2801 ) {
2802 if mask.mask {
2803 *ptr = values
2804 }
2805 }
2806
2807 #[inline(always)]
2808 unsafe fn mask_store_ptr_u64s(
2809 self,
2810 mask: MemMask<Self::m64s>,
2811 ptr: *mut u64,
2812 values: Self::u64s,
2813 ) {
2814 if mask.mask {
2815 *ptr = values
2816 }
2817 }
2818
2819 #[inline]
2820 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2821 let re = fma_f32(a.re, b.re, -fma_f32(a.im, b.im, -c.re));
2822 let im = fma_f32(a.re, b.im, fma_f32(a.im, b.re, c.im));
2823 Complex { re, im }
2824 }
2825
2826 #[inline]
2827 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2828 let re = fma_f64(a.re, b.re, -fma_f64(a.im, b.im, -c.re));
2829 let im = fma_f64(a.re, b.im, fma_f64(a.im, b.re, c.im));
2830 Complex { re, im }
2831 }
2832
2833 #[inline]
2834 fn mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2835 a * b + c
2836 }
2837
2838 #[inline]
2839 fn mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2840 a * b + c
2841 }
2842
2843 #[inline(always)]
2844 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
2845 a * b + c
2846 }
2847
2848 #[inline(always)]
2849 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2850 a * b + c
2851 }
2852
2853 #[inline]
2854 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
2855 fma_f32(a, b, c)
2856 }
2857
2858 #[inline]
2859 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2860 fma_f64(a, b, c)
2861 }
2862
2863 #[inline]
2864 fn mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2865 let re = fma_f32(a.re, b.re, -(a.im * b.im));
2866 let im = fma_f32(a.re, b.im, a.im * b.re);
2867 Complex { re, im }
2868 }
2869
2870 #[inline]
2871 fn mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2872 let re = fma_f64(a.re, b.re, -(a.im * b.im));
2873 let im = fma_f64(a.re, b.im, a.im * b.re);
2874 Complex { re, im }
2875 }
2876
2877 #[inline]
2878 fn mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2879 a * b
2880 }
2881
2882 #[inline]
2883 fn mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2884 a * b
2885 }
2886
2887 #[inline]
2888 fn partial_load_c64s(self, slice: &[c64]) -> Self::c64s {
2889 if let Some((head, _)) = slice.split_first() {
2890 *head
2891 } else {
2892 c64 { re: 0.0, im: 0.0 }
2893 }
2894 }
2895
2896 #[inline]
2897 fn partial_load_u32s(self, slice: &[u32]) -> Self::u32s {
2898 if let Some((head, _)) = slice.split_first() {
2899 *head
2900 } else {
2901 0
2902 }
2903 }
2904
2905 #[inline]
2906 fn partial_load_u64s(self, slice: &[u64]) -> Self::u64s {
2907 if let Some((head, _)) = slice.split_first() {
2908 *head
2909 } else {
2910 0
2911 }
2912 }
2913
2914 #[inline]
2915 fn partial_store_c64s(self, slice: &mut [c64], values: Self::c64s) {
2916 if let Some((head, _)) = slice.split_first_mut() {
2917 *head = values;
2918 }
2919 }
2920
2921 #[inline]
2922 fn partial_store_u32s(self, slice: &mut [u32], values: Self::u32s) {
2923 if let Some((head, _)) = slice.split_first_mut() {
2924 *head = values;
2925 }
2926 }
2927
2928 #[inline]
2929 fn partial_store_u64s(self, slice: &mut [u64], values: Self::u64s) {
2930 if let Some((head, _)) = slice.split_first_mut() {
2931 *head = values;
2932 }
2933 }
2934
2935 #[inline(always)]
2936 fn reduce_max_c32s(self, a: Self::c32s) -> c32 {
2937 a
2938 }
2939
2940 #[inline(always)]
2941 fn reduce_max_c64s(self, a: Self::c64s) -> c64 {
2942 a
2943 }
2944
2945 #[inline]
2946 fn reduce_max_f32s(self, a: Self::f32s) -> f32 {
2947 a
2948 }
2949
2950 #[inline]
2951 fn reduce_max_f64s(self, a: Self::f64s) -> f64 {
2952 a
2953 }
2954
2955 #[inline(always)]
2956 fn reduce_min_c32s(self, a: Self::c32s) -> c32 {
2957 a
2958 }
2959
2960 #[inline(always)]
2961 fn reduce_min_c64s(self, a: Self::c64s) -> c64 {
2962 a
2963 }
2964
2965 #[inline]
2966 fn reduce_min_f32s(self, a: Self::f32s) -> f32 {
2967 a
2968 }
2969
2970 #[inline]
2971 fn reduce_min_f64s(self, a: Self::f64s) -> f64 {
2972 a
2973 }
2974
2975 #[inline]
2976 fn reduce_product_f32s(self, a: Self::f32s) -> f32 {
2977 a
2978 }
2979
2980 #[inline]
2981 fn reduce_product_f64s(self, a: Self::f64s) -> f64 {
2982 a
2983 }
2984
2985 #[inline]
2986 fn reduce_sum_c32s(self, a: Self::c32s) -> c32 {
2987 a
2988 }
2989
2990 #[inline]
2991 fn reduce_sum_c64s(self, a: Self::c64s) -> c64 {
2992 a
2993 }
2994
2995 #[inline]
2996 fn reduce_sum_f32s(self, a: Self::f32s) -> f32 {
2997 a
2998 }
2999
3000 #[inline]
3001 fn reduce_sum_f64s(self, a: Self::f64s) -> f64 {
3002 a
3003 }
3004
3005 #[inline(always)]
3006 fn rotate_right_c32s(self, a: Self::c32s, _amount: usize) -> Self::c32s {
3007 a
3008 }
3009
3010 #[inline(always)]
3011 fn rotate_right_c64s(self, a: Self::c64s, _amount: usize) -> Self::c64s {
3012 a
3013 }
3014
3015 #[inline(always)]
3016 fn rotate_right_u32s(self, a: Self::u32s, _amount: usize) -> Self::u32s {
3017 a
3018 }
3019
3020 #[inline(always)]
3021 fn rotate_right_u64s(self, a: Self::u64s, _amount: usize) -> Self::u64s {
3022 a
3023 }
3024
3025 #[inline]
3026 fn select_u32s(
3027 self,
3028 mask: Self::m32s,
3029 if_true: Self::u32s,
3030 if_false: Self::u32s,
3031 ) -> Self::u32s {
3032 if mask { if_true } else { if_false }
3033 }
3034
3035 #[inline]
3036 fn select_u64s(
3037 self,
3038 mask: Self::m64s,
3039 if_true: Self::u64s,
3040 if_false: Self::u64s,
3041 ) -> Self::u64s {
3042 if mask { if_true } else { if_false }
3043 }
3044
3045 #[inline]
3046 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s {
3047 c32 { re: a.im, im: a.re }
3048 }
3049
3050 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s {
3051 c64 { re: a.im, im: a.re }
3052 }
3053
3054 #[inline]
3055 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output {
3056 op.with_simd(self)
3057 }
3058
3059 #[inline]
3060 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s) {
3061 let c = a as u64 * b as u64;
3062 let lo = c as u32;
3063 let hi = (c >> 32) as u32;
3064 (lo, hi)
3065 }
3066
3067 #[inline]
3068 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
3069 a.wrapping_shl(amount)
3070 }
3071
3072 #[inline]
3073 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
3074 a.wrapping_shr(amount)
3075 }
3076
3077 unsafe fn mask_load_ptr_u8s(self, mask: MemMask<Self::m8s>, ptr: *const u8) -> Self::u8s {
3078 if mask.mask { *ptr } else { 0 }
3079 }
3080
3081 unsafe fn mask_load_ptr_u16s(self, mask: MemMask<Self::m16s>, ptr: *const u16) -> Self::u16s {
3082 if mask.mask { *ptr } else { 0 }
3083 }
3084
3085 #[inline(always)]
3086 fn sqrt_f32s(self, a: Self::f32s) -> Self::f32s {
3087 sqrt_f32(a)
3088 }
3089
3090 #[inline(always)]
3091 fn sqrt_f64s(self, a: Self::f64s) -> Self::f64s {
3092 sqrt_f64(a)
3093 }
3094}
3095
3096#[inline(always)]
3097unsafe fn split_slice<T, U>(slice: &[T]) -> (&[U], &[T]) {
3098 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3099 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3100
3101 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3102
3103 let len = slice.len();
3104 let data = slice.as_ptr();
3105
3106 let div = len / chunk_size;
3107 let rem = len % chunk_size;
3108 (
3109 from_raw_parts(data as *const U, div),
3110 from_raw_parts(data.add(len - rem), rem),
3111 )
3112}
3113
3114#[inline(always)]
3115unsafe fn split_mut_slice<T, U>(slice: &mut [T]) -> (&mut [U], &mut [T]) {
3116 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3117 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3118
3119 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3120
3121 let len = slice.len();
3122 let data = slice.as_mut_ptr();
3123
3124 let div = len / chunk_size;
3125 let rem = len % chunk_size;
3126 (
3127 from_raw_parts_mut(data as *mut U, div),
3128 from_raw_parts_mut(data.add(len - rem), rem),
3129 )
3130}
3131
3132#[inline(always)]
3133unsafe fn rsplit_slice<T, U>(slice: &[T]) -> (&[T], &[U]) {
3134 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3135 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3136
3137 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3138
3139 let len = slice.len();
3140 let data = slice.as_ptr();
3141
3142 let div = len / chunk_size;
3143 let rem = len % chunk_size;
3144 (
3145 from_raw_parts(data, rem),
3146 from_raw_parts(data.add(rem) as *const U, div),
3147 )
3148}
3149
3150#[inline(always)]
3151unsafe fn rsplit_mut_slice<T, U>(slice: &mut [T]) -> (&mut [T], &mut [U]) {
3152 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3153 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3154
3155 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3156
3157 let len = slice.len();
3158 let data = slice.as_mut_ptr();
3159
3160 let div = len / chunk_size;
3161 let rem = len % chunk_size;
3162 (
3163 from_raw_parts_mut(data, rem),
3164 from_raw_parts_mut(data.add(rem) as *mut U, div),
3165 )
3166}
3167
3168match_cfg!(
3169 item,
3170 match cfg!() {
3171 const { any(target_arch = "x86", target_arch = "x86_64") } => {
3172 pub use x86::Arch;
3173 },
3174 const { target_arch = "aarch64" } => {
3175 pub use aarch64::Arch;
3176 },
3177 const { target_arch = "wasm32" } => {
3178 pub use wasm::Arch;
3179 },
3180 _ => {
3181 #[derive(Debug, Clone, Copy)]
3182 #[non_exhaustive]
3183 pub enum Arch {
3184 Scalar,
3185 }
3186
3187 impl Arch {
3188 #[inline(always)]
3189 pub fn new() -> Self {
3190 Self::Scalar
3191 }
3192
3193 #[inline(always)]
3194 pub fn dispatch<Op: WithSimd>(self, op: Op) -> Op::Output {
3195 op.with_simd(Scalar)
3196 }
3197 }
3198 impl Default for Arch {
3199 #[inline]
3200 fn default() -> Self {
3201 Self::new()
3202 }
3203 }
3204 },
3205 }
3206);
3207
3208#[doc(hidden)]
3209pub struct CheckSameSize<T, U>(PhantomData<(T, U)>);
3210impl<T, U> CheckSameSize<T, U> {
3211 pub const VALID: () = {
3212 assert!(core::mem::size_of::<T>() == core::mem::size_of::<U>());
3213 };
3214}
3215
3216#[doc(hidden)]
3217pub struct CheckSizeLessThanOrEqual<T, U>(PhantomData<(T, U)>);
3218impl<T, U> CheckSizeLessThanOrEqual<T, U> {
3219 pub const VALID: () = {
3220 assert!(core::mem::size_of::<T>() <= core::mem::size_of::<U>());
3221 };
3222}
3223
3224#[macro_export]
3225macro_rules! static_assert_same_size {
3226 ($t: ty, $u: ty) => {
3227 let _ = $crate::CheckSameSize::<$t, $u>::VALID;
3228 };
3229}
3230#[macro_export]
3231macro_rules! static_assert_size_less_than_or_equal {
3232 ($t: ty, $u: ty) => {
3233 let _ = $crate::CheckSizeLessThanOrEqual::<$t, $u>::VALID;
3234 };
3235}
3236
3237#[inline(always)]
3241pub const fn cast<T: NoUninit, U: AnyBitPattern>(value: T) -> U {
3242 static_assert_same_size!(T, U);
3243 let ptr = &raw const value as *const U;
3244 unsafe { ptr.read_unaligned() }
3245}
3246
3247#[inline(always)]
3251pub const fn cast_lossy<T: NoUninit, U: AnyBitPattern>(value: T) -> U {
3252 static_assert_size_less_than_or_equal!(U, T);
3253 let value = core::mem::ManuallyDrop::new(value);
3254 let ptr = &raw const value as *const U;
3255 unsafe { ptr.read_unaligned() }
3256}
3257
3258#[inline(always)]
3262pub fn as_arrays<const N: usize, T>(slice: &[T]) -> (&[[T; N]], &[T]) {
3263 let n = slice.len();
3264 let mid_div_n = n / N;
3265 let mid = mid_div_n * N;
3266 let ptr = slice.as_ptr();
3267 unsafe {
3268 (
3269 from_raw_parts(ptr as *const [T; N], mid_div_n),
3270 from_raw_parts(ptr.add(mid), n - mid),
3271 )
3272 }
3273}
3274
3275#[inline(always)]
3279pub fn as_arrays_mut<const N: usize, T>(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) {
3280 let n = slice.len();
3281 let mid_div_n = n / N;
3282 let mid = mid_div_n * N;
3283 let ptr = slice.as_mut_ptr();
3284 unsafe {
3285 (
3286 from_raw_parts_mut(ptr as *mut [T; N], mid_div_n),
3287 from_raw_parts_mut(ptr.add(mid), n - mid),
3288 )
3289 }
3290}
3291
3292pub mod core_arch;
3294
3295#[allow(unused_macros)]
3296macro_rules! inherit {
3297 ({$(
3298 $(#[$attr: meta])*
3299 $(unsafe $($placeholder: lifetime)?)?
3300 fn $func: ident(self
3301 $(,$arg: ident: $ty: ty)* $(,)?
3302 ) $(-> $ret: ty)?;
3303 )*}) => {
3304 $(
3305 $(#[$attr])*
3306 #[inline(always)]
3307 $(unsafe $($placeholder)?)? fn $func (self, $($arg: $ty,)*) $(-> $ret)? {
3308 (*self).$func ($($arg,)*)
3309 }
3310 )*
3311 };
3312}
3313
3314#[allow(unused_macros)]
3315macro_rules! inherit_x2 {
3316 ($base: expr, {$(
3317 $(#[$attr: meta])*
3318 $(unsafe $($placeholder: lifetime)?)?
3319 fn $func: ident ($self: ident
3320 $(,$arg: ident: $ty: ty)* $(,)?
3321 ) $(-> $ret: ty)?;
3322 )*}) => {
3323 $(
3324 $(#[$attr])*
3325 #[inline(always)]
3326 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
3327 $(let $arg: [_; 2] = cast!($arg);)*
3328 cast!([($base).$func ($($arg[0],)*), ($base).$func ($($arg[1],)*)])
3329 }
3330 )*
3331 };
3332
3333 ($base: expr, splat, {$(
3334 $(#[$attr: meta])*
3335 $(unsafe $($placeholder: lifetime)?)?
3336 fn $func: ident ($self: ident
3337 $(,$arg: ident: $ty: ty)* $(,)?
3338 ) $(-> $ret: ty)?;
3339 )*}) => {
3340 $(
3341 $(#[$attr])*
3342 #[inline(always)]
3343 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
3344 cast!([($base).$func ($($arg,)*), ($base).$func ($($arg,)*)])
3345 }
3346 )*
3347 };
3348
3349 ($base: expr, wide, {$(
3350 $(#[$attr: meta])*
3351 $(unsafe $($placeholder: lifetime)?)?
3352 fn $func: ident ($self: ident
3353 $(,$arg: ident: $ty: ty)* $(,)?
3354 ) $(-> $ret: ty)?;
3355 )*}) => {
3356 $(
3357 $(#[$attr])*
3358 #[inline(always)]
3359 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
3360 $(let $arg: [_; 2] = cast!($arg);)*
3361 let (r0, r1) = ($base).$func ($($arg[0],)*); let (s0, s1) = ($base).$func ($($arg[1],)*);
3362 (cast!([r0, s0]), cast!([r1, s1]))
3363 }
3364 )*
3365 };
3366}
3367
3368#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3369#[cfg_attr(docsrs, doc(cfg(any(target_arch = "x86", target_arch = "x86_64"))))]
3370pub mod x86;
3372
3373#[cfg(target_arch = "wasm32")]
3374#[cfg_attr(docsrs, doc(cfg(target_arch = "wasm32")))]
3375pub mod wasm;
3377
3378#[cfg(target_arch = "aarch64")]
3379#[cfg_attr(docsrs, doc(cfg(target_arch = "aarch64")))]
3380pub mod aarch64;
3382
3383#[derive(Copy, Clone, PartialEq, Eq, Default)]
3386#[repr(transparent)]
3387pub struct m8(u8);
3388#[derive(Copy, Clone, PartialEq, Eq, Default)]
3391#[repr(transparent)]
3392pub struct m16(u16);
3393#[derive(Copy, Clone, PartialEq, Eq, Default)]
3396#[repr(transparent)]
3397pub struct m32(u32);
3398#[derive(Copy, Clone, PartialEq, Eq, Default)]
3401#[repr(transparent)]
3402pub struct m64(u64);
3403
3404#[derive(Copy, Clone, PartialEq, Eq)]
3406#[repr(transparent)]
3407pub struct b8(pub u8);
3408#[derive(Copy, Clone, PartialEq, Eq)]
3410#[repr(transparent)]
3411pub struct b16(pub u16);
3412#[derive(Copy, Clone, PartialEq, Eq)]
3414#[repr(transparent)]
3415pub struct b32(pub u32);
3416#[derive(Copy, Clone, PartialEq, Eq)]
3418#[repr(transparent)]
3419pub struct b64(pub u64);
3420
3421impl core::ops::Not for b8 {
3422 type Output = b8;
3423
3424 #[inline(always)]
3425 fn not(self) -> Self::Output {
3426 b8(!self.0)
3427 }
3428}
3429impl core::ops::BitAnd for b8 {
3430 type Output = b8;
3431
3432 #[inline(always)]
3433 fn bitand(self, rhs: Self) -> Self::Output {
3434 b8(self.0 & rhs.0)
3435 }
3436}
3437impl core::ops::BitOr for b8 {
3438 type Output = b8;
3439
3440 #[inline(always)]
3441 fn bitor(self, rhs: Self) -> Self::Output {
3442 b8(self.0 | rhs.0)
3443 }
3444}
3445impl core::ops::BitXor for b8 {
3446 type Output = b8;
3447
3448 #[inline(always)]
3449 fn bitxor(self, rhs: Self) -> Self::Output {
3450 b8(self.0 ^ rhs.0)
3451 }
3452}
3453
3454impl core::ops::Not for m8 {
3455 type Output = m8;
3456
3457 #[inline(always)]
3458 fn not(self) -> Self::Output {
3459 m8(!self.0)
3460 }
3461}
3462impl core::ops::BitAnd for m8 {
3463 type Output = m8;
3464
3465 #[inline(always)]
3466 fn bitand(self, rhs: Self) -> Self::Output {
3467 m8(self.0 & rhs.0)
3468 }
3469}
3470impl core::ops::BitOr for m8 {
3471 type Output = m8;
3472
3473 #[inline(always)]
3474 fn bitor(self, rhs: Self) -> Self::Output {
3475 m8(self.0 | rhs.0)
3476 }
3477}
3478impl core::ops::BitXor for m8 {
3479 type Output = m8;
3480
3481 #[inline(always)]
3482 fn bitxor(self, rhs: Self) -> Self::Output {
3483 m8(self.0 ^ rhs.0)
3484 }
3485}
3486
3487impl core::ops::Not for m16 {
3488 type Output = m16;
3489
3490 #[inline(always)]
3491 fn not(self) -> Self::Output {
3492 m16(!self.0)
3493 }
3494}
3495impl core::ops::BitAnd for m16 {
3496 type Output = m16;
3497
3498 #[inline(always)]
3499 fn bitand(self, rhs: Self) -> Self::Output {
3500 m16(self.0 & rhs.0)
3501 }
3502}
3503impl core::ops::BitOr for m16 {
3504 type Output = m16;
3505
3506 #[inline(always)]
3507 fn bitor(self, rhs: Self) -> Self::Output {
3508 m16(self.0 | rhs.0)
3509 }
3510}
3511impl core::ops::BitXor for m16 {
3512 type Output = m16;
3513
3514 #[inline(always)]
3515 fn bitxor(self, rhs: Self) -> Self::Output {
3516 m16(self.0 ^ rhs.0)
3517 }
3518}
3519
3520impl core::ops::Not for m32 {
3521 type Output = m32;
3522
3523 #[inline(always)]
3524 fn not(self) -> Self::Output {
3525 m32(!self.0)
3526 }
3527}
3528impl core::ops::BitAnd for m32 {
3529 type Output = m32;
3530
3531 #[inline(always)]
3532 fn bitand(self, rhs: Self) -> Self::Output {
3533 m32(self.0 & rhs.0)
3534 }
3535}
3536impl core::ops::BitOr for m32 {
3537 type Output = m32;
3538
3539 #[inline(always)]
3540 fn bitor(self, rhs: Self) -> Self::Output {
3541 m32(self.0 | rhs.0)
3542 }
3543}
3544impl core::ops::BitXor for m32 {
3545 type Output = m32;
3546
3547 #[inline(always)]
3548 fn bitxor(self, rhs: Self) -> Self::Output {
3549 m32(self.0 ^ rhs.0)
3550 }
3551}
3552
3553impl core::ops::Not for m64 {
3554 type Output = m64;
3555
3556 #[inline(always)]
3557 fn not(self) -> Self::Output {
3558 m64(!self.0)
3559 }
3560}
3561impl core::ops::BitAnd for m64 {
3562 type Output = m64;
3563
3564 #[inline(always)]
3565 fn bitand(self, rhs: Self) -> Self::Output {
3566 m64(self.0 & rhs.0)
3567 }
3568}
3569impl core::ops::BitOr for m64 {
3570 type Output = m64;
3571
3572 #[inline(always)]
3573 fn bitor(self, rhs: Self) -> Self::Output {
3574 m64(self.0 | rhs.0)
3575 }
3576}
3577impl core::ops::BitXor for m64 {
3578 type Output = m64;
3579
3580 #[inline(always)]
3581 fn bitxor(self, rhs: Self) -> Self::Output {
3582 m64(self.0 ^ rhs.0)
3583 }
3584}
3585
3586impl core::ops::Not for b16 {
3587 type Output = b16;
3588
3589 #[inline(always)]
3590 fn not(self) -> Self::Output {
3591 b16(!self.0)
3592 }
3593}
3594impl core::ops::BitAnd for b16 {
3595 type Output = b16;
3596
3597 #[inline(always)]
3598 fn bitand(self, rhs: Self) -> Self::Output {
3599 b16(self.0 & rhs.0)
3600 }
3601}
3602impl core::ops::BitOr for b16 {
3603 type Output = b16;
3604
3605 #[inline(always)]
3606 fn bitor(self, rhs: Self) -> Self::Output {
3607 b16(self.0 | rhs.0)
3608 }
3609}
3610impl core::ops::BitXor for b16 {
3611 type Output = b16;
3612
3613 #[inline(always)]
3614 fn bitxor(self, rhs: Self) -> Self::Output {
3615 b16(self.0 ^ rhs.0)
3616 }
3617}
3618
3619impl core::ops::Not for b32 {
3620 type Output = b32;
3621
3622 #[inline(always)]
3623 fn not(self) -> Self::Output {
3624 b32(!self.0)
3625 }
3626}
3627impl core::ops::BitAnd for b32 {
3628 type Output = b32;
3629
3630 #[inline(always)]
3631 fn bitand(self, rhs: Self) -> Self::Output {
3632 b32(self.0 & rhs.0)
3633 }
3634}
3635impl core::ops::BitOr for b32 {
3636 type Output = b32;
3637
3638 #[inline(always)]
3639 fn bitor(self, rhs: Self) -> Self::Output {
3640 b32(self.0 | rhs.0)
3641 }
3642}
3643impl core::ops::BitXor for b32 {
3644 type Output = b32;
3645
3646 #[inline(always)]
3647 fn bitxor(self, rhs: Self) -> Self::Output {
3648 b32(self.0 ^ rhs.0)
3649 }
3650}
3651
3652impl core::ops::Not for b64 {
3653 type Output = b64;
3654
3655 #[inline(always)]
3656 fn not(self) -> Self::Output {
3657 b64(!self.0)
3658 }
3659}
3660impl core::ops::BitAnd for b64 {
3661 type Output = b64;
3662
3663 #[inline(always)]
3664 fn bitand(self, rhs: Self) -> Self::Output {
3665 b64(self.0 & rhs.0)
3666 }
3667}
3668impl core::ops::BitOr for b64 {
3669 type Output = b64;
3670
3671 #[inline(always)]
3672 fn bitor(self, rhs: Self) -> Self::Output {
3673 b64(self.0 | rhs.0)
3674 }
3675}
3676impl core::ops::BitXor for b64 {
3677 type Output = b64;
3678
3679 #[inline(always)]
3680 fn bitxor(self, rhs: Self) -> Self::Output {
3681 b64(self.0 ^ rhs.0)
3682 }
3683}
3684
3685impl Debug for b8 {
3686 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3687 #[allow(dead_code)]
3688 #[derive(Copy, Clone, Debug)]
3689 struct b8(bool, bool, bool, bool, bool, bool, bool, bool);
3690 b8(
3691 ((self.0 >> 0) & 1) == 1,
3692 ((self.0 >> 1) & 1) == 1,
3693 ((self.0 >> 2) & 1) == 1,
3694 ((self.0 >> 3) & 1) == 1,
3695 ((self.0 >> 4) & 1) == 1,
3696 ((self.0 >> 5) & 1) == 1,
3697 ((self.0 >> 6) & 1) == 1,
3698 ((self.0 >> 7) & 1) == 1,
3699 )
3700 .fmt(f)
3701 }
3702}
3703impl Debug for b16 {
3704 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3705 #[allow(dead_code)]
3706 #[derive(Copy, Clone, Debug)]
3707 struct b16(
3708 bool,
3709 bool,
3710 bool,
3711 bool,
3712 bool,
3713 bool,
3714 bool,
3715 bool,
3716 bool,
3717 bool,
3718 bool,
3719 bool,
3720 bool,
3721 bool,
3722 bool,
3723 bool,
3724 );
3725 b16(
3726 ((self.0 >> 00) & 1) == 1,
3727 ((self.0 >> 01) & 1) == 1,
3728 ((self.0 >> 02) & 1) == 1,
3729 ((self.0 >> 03) & 1) == 1,
3730 ((self.0 >> 04) & 1) == 1,
3731 ((self.0 >> 05) & 1) == 1,
3732 ((self.0 >> 06) & 1) == 1,
3733 ((self.0 >> 07) & 1) == 1,
3734 ((self.0 >> 08) & 1) == 1,
3735 ((self.0 >> 09) & 1) == 1,
3736 ((self.0 >> 10) & 1) == 1,
3737 ((self.0 >> 11) & 1) == 1,
3738 ((self.0 >> 12) & 1) == 1,
3739 ((self.0 >> 13) & 1) == 1,
3740 ((self.0 >> 14) & 1) == 1,
3741 ((self.0 >> 15) & 1) == 1,
3742 )
3743 .fmt(f)
3744 }
3745}
3746impl Debug for b32 {
3747 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3748 #[allow(dead_code)]
3749 #[derive(Copy, Clone, Debug)]
3750 struct b32(
3751 bool,
3752 bool,
3753 bool,
3754 bool,
3755 bool,
3756 bool,
3757 bool,
3758 bool,
3759 bool,
3760 bool,
3761 bool,
3762 bool,
3763 bool,
3764 bool,
3765 bool,
3766 bool,
3767 bool,
3768 bool,
3769 bool,
3770 bool,
3771 bool,
3772 bool,
3773 bool,
3774 bool,
3775 bool,
3776 bool,
3777 bool,
3778 bool,
3779 bool,
3780 bool,
3781 bool,
3782 bool,
3783 );
3784 b32(
3785 ((self.0 >> 00) & 1) == 1,
3786 ((self.0 >> 01) & 1) == 1,
3787 ((self.0 >> 02) & 1) == 1,
3788 ((self.0 >> 03) & 1) == 1,
3789 ((self.0 >> 04) & 1) == 1,
3790 ((self.0 >> 05) & 1) == 1,
3791 ((self.0 >> 06) & 1) == 1,
3792 ((self.0 >> 07) & 1) == 1,
3793 ((self.0 >> 08) & 1) == 1,
3794 ((self.0 >> 09) & 1) == 1,
3795 ((self.0 >> 10) & 1) == 1,
3796 ((self.0 >> 11) & 1) == 1,
3797 ((self.0 >> 12) & 1) == 1,
3798 ((self.0 >> 13) & 1) == 1,
3799 ((self.0 >> 14) & 1) == 1,
3800 ((self.0 >> 15) & 1) == 1,
3801 ((self.0 >> 16) & 1) == 1,
3802 ((self.0 >> 17) & 1) == 1,
3803 ((self.0 >> 18) & 1) == 1,
3804 ((self.0 >> 19) & 1) == 1,
3805 ((self.0 >> 20) & 1) == 1,
3806 ((self.0 >> 21) & 1) == 1,
3807 ((self.0 >> 22) & 1) == 1,
3808 ((self.0 >> 23) & 1) == 1,
3809 ((self.0 >> 24) & 1) == 1,
3810 ((self.0 >> 25) & 1) == 1,
3811 ((self.0 >> 26) & 1) == 1,
3812 ((self.0 >> 27) & 1) == 1,
3813 ((self.0 >> 28) & 1) == 1,
3814 ((self.0 >> 29) & 1) == 1,
3815 ((self.0 >> 30) & 1) == 1,
3816 ((self.0 >> 31) & 1) == 1,
3817 )
3818 .fmt(f)
3819 }
3820}
3821impl Debug for b64 {
3822 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3823 #[allow(dead_code)]
3824 #[derive(Copy, Clone, Debug)]
3825 struct b64(
3826 bool,
3827 bool,
3828 bool,
3829 bool,
3830 bool,
3831 bool,
3832 bool,
3833 bool,
3834 bool,
3835 bool,
3836 bool,
3837 bool,
3838 bool,
3839 bool,
3840 bool,
3841 bool,
3842 bool,
3843 bool,
3844 bool,
3845 bool,
3846 bool,
3847 bool,
3848 bool,
3849 bool,
3850 bool,
3851 bool,
3852 bool,
3853 bool,
3854 bool,
3855 bool,
3856 bool,
3857 bool,
3858 bool,
3859 bool,
3860 bool,
3861 bool,
3862 bool,
3863 bool,
3864 bool,
3865 bool,
3866 bool,
3867 bool,
3868 bool,
3869 bool,
3870 bool,
3871 bool,
3872 bool,
3873 bool,
3874 bool,
3875 bool,
3876 bool,
3877 bool,
3878 bool,
3879 bool,
3880 bool,
3881 bool,
3882 bool,
3883 bool,
3884 bool,
3885 bool,
3886 bool,
3887 bool,
3888 bool,
3889 bool,
3890 );
3891 b64(
3892 ((self.0 >> 00) & 1) == 1,
3893 ((self.0 >> 01) & 1) == 1,
3894 ((self.0 >> 02) & 1) == 1,
3895 ((self.0 >> 03) & 1) == 1,
3896 ((self.0 >> 04) & 1) == 1,
3897 ((self.0 >> 05) & 1) == 1,
3898 ((self.0 >> 06) & 1) == 1,
3899 ((self.0 >> 07) & 1) == 1,
3900 ((self.0 >> 08) & 1) == 1,
3901 ((self.0 >> 09) & 1) == 1,
3902 ((self.0 >> 10) & 1) == 1,
3903 ((self.0 >> 11) & 1) == 1,
3904 ((self.0 >> 12) & 1) == 1,
3905 ((self.0 >> 13) & 1) == 1,
3906 ((self.0 >> 14) & 1) == 1,
3907 ((self.0 >> 15) & 1) == 1,
3908 ((self.0 >> 16) & 1) == 1,
3909 ((self.0 >> 17) & 1) == 1,
3910 ((self.0 >> 18) & 1) == 1,
3911 ((self.0 >> 19) & 1) == 1,
3912 ((self.0 >> 20) & 1) == 1,
3913 ((self.0 >> 21) & 1) == 1,
3914 ((self.0 >> 22) & 1) == 1,
3915 ((self.0 >> 23) & 1) == 1,
3916 ((self.0 >> 24) & 1) == 1,
3917 ((self.0 >> 25) & 1) == 1,
3918 ((self.0 >> 26) & 1) == 1,
3919 ((self.0 >> 27) & 1) == 1,
3920 ((self.0 >> 28) & 1) == 1,
3921 ((self.0 >> 29) & 1) == 1,
3922 ((self.0 >> 30) & 1) == 1,
3923 ((self.0 >> 31) & 1) == 1,
3924 ((self.0 >> 32) & 1) == 1,
3925 ((self.0 >> 33) & 1) == 1,
3926 ((self.0 >> 34) & 1) == 1,
3927 ((self.0 >> 35) & 1) == 1,
3928 ((self.0 >> 36) & 1) == 1,
3929 ((self.0 >> 37) & 1) == 1,
3930 ((self.0 >> 38) & 1) == 1,
3931 ((self.0 >> 39) & 1) == 1,
3932 ((self.0 >> 40) & 1) == 1,
3933 ((self.0 >> 41) & 1) == 1,
3934 ((self.0 >> 42) & 1) == 1,
3935 ((self.0 >> 43) & 1) == 1,
3936 ((self.0 >> 44) & 1) == 1,
3937 ((self.0 >> 45) & 1) == 1,
3938 ((self.0 >> 46) & 1) == 1,
3939 ((self.0 >> 47) & 1) == 1,
3940 ((self.0 >> 48) & 1) == 1,
3941 ((self.0 >> 49) & 1) == 1,
3942 ((self.0 >> 50) & 1) == 1,
3943 ((self.0 >> 51) & 1) == 1,
3944 ((self.0 >> 52) & 1) == 1,
3945 ((self.0 >> 53) & 1) == 1,
3946 ((self.0 >> 54) & 1) == 1,
3947 ((self.0 >> 55) & 1) == 1,
3948 ((self.0 >> 56) & 1) == 1,
3949 ((self.0 >> 57) & 1) == 1,
3950 ((self.0 >> 58) & 1) == 1,
3951 ((self.0 >> 59) & 1) == 1,
3952 ((self.0 >> 60) & 1) == 1,
3953 ((self.0 >> 61) & 1) == 1,
3954 ((self.0 >> 62) & 1) == 1,
3955 ((self.0 >> 63) & 1) == 1,
3956 )
3957 .fmt(f)
3958 }
3959}
3960
3961impl Debug for m8 {
3962 #[inline]
3963 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3964 self.is_set().fmt(f)
3965 }
3966}
3967impl Debug for m16 {
3968 #[inline]
3969 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3970 self.is_set().fmt(f)
3971 }
3972}
3973impl Debug for m32 {
3974 #[inline]
3975 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3976 self.is_set().fmt(f)
3977 }
3978}
3979impl Debug for m64 {
3980 #[inline]
3981 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
3982 self.is_set().fmt(f)
3983 }
3984}
3985
3986impl m8 {
3987 #[inline(always)]
3990 pub const fn new(flag: bool) -> Self {
3991 Self(if flag { u8::MAX } else { 0 })
3992 }
3993
3994 #[inline(always)]
3996 pub const fn is_set(self) -> bool {
3997 self.0 != 0
3998 }
3999}
4000impl m16 {
4001 #[inline(always)]
4004 pub const fn new(flag: bool) -> Self {
4005 Self(if flag { u16::MAX } else { 0 })
4006 }
4007
4008 #[inline(always)]
4010 pub const fn is_set(self) -> bool {
4011 self.0 != 0
4012 }
4013}
4014impl m32 {
4015 #[inline(always)]
4018 pub const fn new(flag: bool) -> Self {
4019 Self(if flag { u32::MAX } else { 0 })
4020 }
4021
4022 #[inline(always)]
4024 pub const fn is_set(self) -> bool {
4025 self.0 != 0
4026 }
4027}
4028impl m64 {
4029 #[inline(always)]
4032 pub const fn new(flag: bool) -> Self {
4033 Self(if flag { u64::MAX } else { 0 })
4034 }
4035
4036 #[inline(always)]
4038 pub const fn is_set(self) -> bool {
4039 self.0 != 0
4040 }
4041}
4042
4043#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4045#[repr(C)]
4046pub struct i8x16(
4047 pub i8,
4048 pub i8,
4049 pub i8,
4050 pub i8,
4051 pub i8,
4052 pub i8,
4053 pub i8,
4054 pub i8,
4055 pub i8,
4056 pub i8,
4057 pub i8,
4058 pub i8,
4059 pub i8,
4060 pub i8,
4061 pub i8,
4062 pub i8,
4063);
4064#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4066#[repr(C)]
4067pub struct i8x32(
4068 pub i8,
4069 pub i8,
4070 pub i8,
4071 pub i8,
4072 pub i8,
4073 pub i8,
4074 pub i8,
4075 pub i8,
4076 pub i8,
4077 pub i8,
4078 pub i8,
4079 pub i8,
4080 pub i8,
4081 pub i8,
4082 pub i8,
4083 pub i8,
4084 pub i8,
4085 pub i8,
4086 pub i8,
4087 pub i8,
4088 pub i8,
4089 pub i8,
4090 pub i8,
4091 pub i8,
4092 pub i8,
4093 pub i8,
4094 pub i8,
4095 pub i8,
4096 pub i8,
4097 pub i8,
4098 pub i8,
4099 pub i8,
4100);
4101#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4103#[repr(C)]
4104pub struct i8x64(
4105 pub i8,
4106 pub i8,
4107 pub i8,
4108 pub i8,
4109 pub i8,
4110 pub i8,
4111 pub i8,
4112 pub i8,
4113 pub i8,
4114 pub i8,
4115 pub i8,
4116 pub i8,
4117 pub i8,
4118 pub i8,
4119 pub i8,
4120 pub i8,
4121 pub i8,
4122 pub i8,
4123 pub i8,
4124 pub i8,
4125 pub i8,
4126 pub i8,
4127 pub i8,
4128 pub i8,
4129 pub i8,
4130 pub i8,
4131 pub i8,
4132 pub i8,
4133 pub i8,
4134 pub i8,
4135 pub i8,
4136 pub i8,
4137 pub i8,
4138 pub i8,
4139 pub i8,
4140 pub i8,
4141 pub i8,
4142 pub i8,
4143 pub i8,
4144 pub i8,
4145 pub i8,
4146 pub i8,
4147 pub i8,
4148 pub i8,
4149 pub i8,
4150 pub i8,
4151 pub i8,
4152 pub i8,
4153 pub i8,
4154 pub i8,
4155 pub i8,
4156 pub i8,
4157 pub i8,
4158 pub i8,
4159 pub i8,
4160 pub i8,
4161 pub i8,
4162 pub i8,
4163 pub i8,
4164 pub i8,
4165 pub i8,
4166 pub i8,
4167 pub i8,
4168 pub i8,
4169);
4170
4171#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4173#[repr(C)]
4174pub struct u8x16(
4175 pub u8,
4176 pub u8,
4177 pub u8,
4178 pub u8,
4179 pub u8,
4180 pub u8,
4181 pub u8,
4182 pub u8,
4183 pub u8,
4184 pub u8,
4185 pub u8,
4186 pub u8,
4187 pub u8,
4188 pub u8,
4189 pub u8,
4190 pub u8,
4191);
4192#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4194#[repr(C)]
4195pub struct u8x32(
4196 pub u8,
4197 pub u8,
4198 pub u8,
4199 pub u8,
4200 pub u8,
4201 pub u8,
4202 pub u8,
4203 pub u8,
4204 pub u8,
4205 pub u8,
4206 pub u8,
4207 pub u8,
4208 pub u8,
4209 pub u8,
4210 pub u8,
4211 pub u8,
4212 pub u8,
4213 pub u8,
4214 pub u8,
4215 pub u8,
4216 pub u8,
4217 pub u8,
4218 pub u8,
4219 pub u8,
4220 pub u8,
4221 pub u8,
4222 pub u8,
4223 pub u8,
4224 pub u8,
4225 pub u8,
4226 pub u8,
4227 pub u8,
4228);
4229#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4231#[repr(C)]
4232pub struct u8x64(
4233 pub u8,
4234 pub u8,
4235 pub u8,
4236 pub u8,
4237 pub u8,
4238 pub u8,
4239 pub u8,
4240 pub u8,
4241 pub u8,
4242 pub u8,
4243 pub u8,
4244 pub u8,
4245 pub u8,
4246 pub u8,
4247 pub u8,
4248 pub u8,
4249 pub u8,
4250 pub u8,
4251 pub u8,
4252 pub u8,
4253 pub u8,
4254 pub u8,
4255 pub u8,
4256 pub u8,
4257 pub u8,
4258 pub u8,
4259 pub u8,
4260 pub u8,
4261 pub u8,
4262 pub u8,
4263 pub u8,
4264 pub u8,
4265 pub u8,
4266 pub u8,
4267 pub u8,
4268 pub u8,
4269 pub u8,
4270 pub u8,
4271 pub u8,
4272 pub u8,
4273 pub u8,
4274 pub u8,
4275 pub u8,
4276 pub u8,
4277 pub u8,
4278 pub u8,
4279 pub u8,
4280 pub u8,
4281 pub u8,
4282 pub u8,
4283 pub u8,
4284 pub u8,
4285 pub u8,
4286 pub u8,
4287 pub u8,
4288 pub u8,
4289 pub u8,
4290 pub u8,
4291 pub u8,
4292 pub u8,
4293 pub u8,
4294 pub u8,
4295 pub u8,
4296 pub u8,
4297);
4298
4299#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4301#[repr(C)]
4302pub struct m8x16(
4303 pub m8,
4304 pub m8,
4305 pub m8,
4306 pub m8,
4307 pub m8,
4308 pub m8,
4309 pub m8,
4310 pub m8,
4311 pub m8,
4312 pub m8,
4313 pub m8,
4314 pub m8,
4315 pub m8,
4316 pub m8,
4317 pub m8,
4318 pub m8,
4319);
4320#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4322#[repr(C)]
4323pub struct m8x32(
4324 pub m8,
4325 pub m8,
4326 pub m8,
4327 pub m8,
4328 pub m8,
4329 pub m8,
4330 pub m8,
4331 pub m8,
4332 pub m8,
4333 pub m8,
4334 pub m8,
4335 pub m8,
4336 pub m8,
4337 pub m8,
4338 pub m8,
4339 pub m8,
4340 pub m8,
4341 pub m8,
4342 pub m8,
4343 pub m8,
4344 pub m8,
4345 pub m8,
4346 pub m8,
4347 pub m8,
4348 pub m8,
4349 pub m8,
4350 pub m8,
4351 pub m8,
4352 pub m8,
4353 pub m8,
4354 pub m8,
4355 pub m8,
4356);
4357
4358#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4360#[repr(C)]
4361pub struct m8x64(
4362 pub m8,
4363 pub m8,
4364 pub m8,
4365 pub m8,
4366 pub m8,
4367 pub m8,
4368 pub m8,
4369 pub m8,
4370 pub m8,
4371 pub m8,
4372 pub m8,
4373 pub m8,
4374 pub m8,
4375 pub m8,
4376 pub m8,
4377 pub m8,
4378 pub m8,
4379 pub m8,
4380 pub m8,
4381 pub m8,
4382 pub m8,
4383 pub m8,
4384 pub m8,
4385 pub m8,
4386 pub m8,
4387 pub m8,
4388 pub m8,
4389 pub m8,
4390 pub m8,
4391 pub m8,
4392 pub m8,
4393 pub m8,
4394 pub m8,
4395 pub m8,
4396 pub m8,
4397 pub m8,
4398 pub m8,
4399 pub m8,
4400 pub m8,
4401 pub m8,
4402 pub m8,
4403 pub m8,
4404 pub m8,
4405 pub m8,
4406 pub m8,
4407 pub m8,
4408 pub m8,
4409 pub m8,
4410 pub m8,
4411 pub m8,
4412 pub m8,
4413 pub m8,
4414 pub m8,
4415 pub m8,
4416 pub m8,
4417 pub m8,
4418 pub m8,
4419 pub m8,
4420 pub m8,
4421 pub m8,
4422 pub m8,
4423 pub m8,
4424 pub m8,
4425 pub m8,
4426);
4427
4428#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4430#[repr(C)]
4431pub struct i16x8(
4432 pub i16,
4433 pub i16,
4434 pub i16,
4435 pub i16,
4436 pub i16,
4437 pub i16,
4438 pub i16,
4439 pub i16,
4440);
4441#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4443#[repr(C)]
4444pub struct i16x16(
4445 pub i16,
4446 pub i16,
4447 pub i16,
4448 pub i16,
4449 pub i16,
4450 pub i16,
4451 pub i16,
4452 pub i16,
4453 pub i16,
4454 pub i16,
4455 pub i16,
4456 pub i16,
4457 pub i16,
4458 pub i16,
4459 pub i16,
4460 pub i16,
4461);
4462#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4464#[repr(C)]
4465pub struct i16x32(
4466 pub i16,
4467 pub i16,
4468 pub i16,
4469 pub i16,
4470 pub i16,
4471 pub i16,
4472 pub i16,
4473 pub i16,
4474 pub i16,
4475 pub i16,
4476 pub i16,
4477 pub i16,
4478 pub i16,
4479 pub i16,
4480 pub i16,
4481 pub i16,
4482 pub i16,
4483 pub i16,
4484 pub i16,
4485 pub i16,
4486 pub i16,
4487 pub i16,
4488 pub i16,
4489 pub i16,
4490 pub i16,
4491 pub i16,
4492 pub i16,
4493 pub i16,
4494 pub i16,
4495 pub i16,
4496 pub i16,
4497 pub i16,
4498);
4499
4500#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4502#[repr(C)]
4503pub struct u16x8(
4504 pub u16,
4505 pub u16,
4506 pub u16,
4507 pub u16,
4508 pub u16,
4509 pub u16,
4510 pub u16,
4511 pub u16,
4512);
4513#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4515#[repr(C)]
4516pub struct u16x16(
4517 pub u16,
4518 pub u16,
4519 pub u16,
4520 pub u16,
4521 pub u16,
4522 pub u16,
4523 pub u16,
4524 pub u16,
4525 pub u16,
4526 pub u16,
4527 pub u16,
4528 pub u16,
4529 pub u16,
4530 pub u16,
4531 pub u16,
4532 pub u16,
4533);
4534#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4536#[repr(C)]
4537pub struct u16x32(
4538 pub u16,
4539 pub u16,
4540 pub u16,
4541 pub u16,
4542 pub u16,
4543 pub u16,
4544 pub u16,
4545 pub u16,
4546 pub u16,
4547 pub u16,
4548 pub u16,
4549 pub u16,
4550 pub u16,
4551 pub u16,
4552 pub u16,
4553 pub u16,
4554 pub u16,
4555 pub u16,
4556 pub u16,
4557 pub u16,
4558 pub u16,
4559 pub u16,
4560 pub u16,
4561 pub u16,
4562 pub u16,
4563 pub u16,
4564 pub u16,
4565 pub u16,
4566 pub u16,
4567 pub u16,
4568 pub u16,
4569 pub u16,
4570);
4571
4572#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4574#[repr(C)]
4575pub struct m16x8(
4576 pub m16,
4577 pub m16,
4578 pub m16,
4579 pub m16,
4580 pub m16,
4581 pub m16,
4582 pub m16,
4583 pub m16,
4584);
4585#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4587#[repr(C)]
4588pub struct m16x16(
4589 pub m16,
4590 pub m16,
4591 pub m16,
4592 pub m16,
4593 pub m16,
4594 pub m16,
4595 pub m16,
4596 pub m16,
4597 pub m16,
4598 pub m16,
4599 pub m16,
4600 pub m16,
4601 pub m16,
4602 pub m16,
4603 pub m16,
4604 pub m16,
4605);
4606#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4608#[repr(C)]
4609pub struct m16x32(
4610 pub m16,
4611 pub m16,
4612 pub m16,
4613 pub m16,
4614 pub m16,
4615 pub m16,
4616 pub m16,
4617 pub m16,
4618 pub m16,
4619 pub m16,
4620 pub m16,
4621 pub m16,
4622 pub m16,
4623 pub m16,
4624 pub m16,
4625 pub m16,
4626 pub m16,
4627 pub m16,
4628 pub m16,
4629 pub m16,
4630 pub m16,
4631 pub m16,
4632 pub m16,
4633 pub m16,
4634 pub m16,
4635 pub m16,
4636 pub m16,
4637 pub m16,
4638 pub m16,
4639 pub m16,
4640 pub m16,
4641 pub m16,
4642);
4643
4644#[derive(Debug, Copy, Clone, PartialEq)]
4646#[repr(C)]
4647pub struct f32x4(pub f32, pub f32, pub f32, pub f32);
4648
4649#[derive(Debug, Copy, Clone, PartialEq)]
4651#[repr(C)]
4652pub struct f32x8(
4653 pub f32,
4654 pub f32,
4655 pub f32,
4656 pub f32,
4657 pub f32,
4658 pub f32,
4659 pub f32,
4660 pub f32,
4661);
4662#[derive(Debug, Copy, Clone, PartialEq)]
4664#[repr(C)]
4665pub struct f32x16(
4666 pub f32,
4667 pub f32,
4668 pub f32,
4669 pub f32,
4670 pub f32,
4671 pub f32,
4672 pub f32,
4673 pub f32,
4674 pub f32,
4675 pub f32,
4676 pub f32,
4677 pub f32,
4678 pub f32,
4679 pub f32,
4680 pub f32,
4681 pub f32,
4682);
4683
4684#[derive(Copy, Clone, PartialEq)]
4686#[repr(C)]
4687pub struct c32x2(pub c32, pub c32);
4688
4689impl Debug for c32x2 {
4690 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4691 #[derive(Copy, Clone, Debug)]
4692 #[repr(C)]
4693 pub struct c32x2(pub DebugCplx<c32>, pub DebugCplx<c32>);
4694 unsafe impl Zeroable for c32x2 {}
4695 unsafe impl Pod for c32x2 {}
4696
4697 let this: c32x2 = cast!(*self);
4698 this.fmt(f)
4699 }
4700}
4701
4702#[derive(Copy, Clone, PartialEq)]
4704#[repr(C)]
4705pub struct c32x4(pub c32, pub c32, pub c32, pub c32);
4706
4707impl Debug for c32x4 {
4708 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4709 #[derive(Copy, Clone, Debug)]
4710 #[repr(C)]
4711 pub struct c32x4(
4712 pub DebugCplx<c32>,
4713 pub DebugCplx<c32>,
4714 pub DebugCplx<c32>,
4715 pub DebugCplx<c32>,
4716 );
4717 unsafe impl Zeroable for c32x4 {}
4718 unsafe impl Pod for c32x4 {}
4719
4720 let this: c32x4 = cast!(*self);
4721 this.fmt(f)
4722 }
4723}
4724
4725#[derive(Copy, Clone, PartialEq)]
4727#[repr(C)]
4728pub struct c32x8(
4729 pub c32,
4730 pub c32,
4731 pub c32,
4732 pub c32,
4733 pub c32,
4734 pub c32,
4735 pub c32,
4736 pub c32,
4737);
4738
4739impl Debug for c32x8 {
4740 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4741 #[derive(Copy, Clone, Debug)]
4742 #[repr(C)]
4743 pub struct c32x8(
4744 pub DebugCplx<c32>,
4745 pub DebugCplx<c32>,
4746 pub DebugCplx<c32>,
4747 pub DebugCplx<c32>,
4748 pub DebugCplx<c32>,
4749 pub DebugCplx<c32>,
4750 pub DebugCplx<c32>,
4751 pub DebugCplx<c32>,
4752 );
4753 unsafe impl Zeroable for c32x8 {}
4754 unsafe impl Pod for c32x8 {}
4755
4756 let this: c32x8 = cast!(*self);
4757 this.fmt(f)
4758 }
4759}
4760#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4762#[repr(C)]
4763pub struct i32x4(pub i32, pub i32, pub i32, pub i32);
4764#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4766#[repr(C)]
4767pub struct i32x8(
4768 pub i32,
4769 pub i32,
4770 pub i32,
4771 pub i32,
4772 pub i32,
4773 pub i32,
4774 pub i32,
4775 pub i32,
4776);
4777#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4779#[repr(C)]
4780pub struct i32x16(
4781 pub i32,
4782 pub i32,
4783 pub i32,
4784 pub i32,
4785 pub i32,
4786 pub i32,
4787 pub i32,
4788 pub i32,
4789 pub i32,
4790 pub i32,
4791 pub i32,
4792 pub i32,
4793 pub i32,
4794 pub i32,
4795 pub i32,
4796 pub i32,
4797);
4798
4799#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4801#[repr(C)]
4802pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
4803#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4805#[repr(C)]
4806pub struct u32x8(
4807 pub u32,
4808 pub u32,
4809 pub u32,
4810 pub u32,
4811 pub u32,
4812 pub u32,
4813 pub u32,
4814 pub u32,
4815);
4816#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4818#[repr(C)]
4819pub struct u32x16(
4820 pub u32,
4821 pub u32,
4822 pub u32,
4823 pub u32,
4824 pub u32,
4825 pub u32,
4826 pub u32,
4827 pub u32,
4828 pub u32,
4829 pub u32,
4830 pub u32,
4831 pub u32,
4832 pub u32,
4833 pub u32,
4834 pub u32,
4835 pub u32,
4836);
4837
4838#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4840#[repr(C)]
4841pub struct m32x4(pub m32, pub m32, pub m32, pub m32);
4842#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4844#[repr(C)]
4845pub struct m32x8(
4846 pub m32,
4847 pub m32,
4848 pub m32,
4849 pub m32,
4850 pub m32,
4851 pub m32,
4852 pub m32,
4853 pub m32,
4854);
4855#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4857#[repr(C)]
4858pub struct m32x16(
4859 pub m32,
4860 pub m32,
4861 pub m32,
4862 pub m32,
4863 pub m32,
4864 pub m32,
4865 pub m32,
4866 pub m32,
4867 pub m32,
4868 pub m32,
4869 pub m32,
4870 pub m32,
4871 pub m32,
4872 pub m32,
4873 pub m32,
4874 pub m32,
4875);
4876
4877#[derive(Debug, Copy, Clone, PartialEq)]
4879#[repr(C)]
4880pub struct f64x2(pub f64, pub f64);
4881#[derive(Debug, Copy, Clone, PartialEq)]
4883#[repr(C)]
4884pub struct f64x4(pub f64, pub f64, pub f64, pub f64);
4885#[derive(Debug, Copy, Clone, PartialEq)]
4887#[repr(C)]
4888pub struct f64x8(
4889 pub f64,
4890 pub f64,
4891 pub f64,
4892 pub f64,
4893 pub f64,
4894 pub f64,
4895 pub f64,
4896 pub f64,
4897);
4898
4899#[derive(Copy, Clone, PartialEq)]
4901#[repr(C)]
4902pub struct c64x1(pub c64);
4903
4904impl Debug for c64x1 {
4905 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4906 #[derive(Copy, Clone, Debug)]
4907 #[repr(C)]
4908 pub struct c64x1(pub DebugCplx<c64>);
4909 unsafe impl Zeroable for c64x1 {}
4910 unsafe impl Pod for c64x1 {}
4911
4912 let this: c64x1 = cast!(*self);
4913 this.fmt(f)
4914 }
4915}
4916
4917#[derive(Copy, Clone, PartialEq)]
4919#[repr(C)]
4920pub struct c64x2(pub c64, pub c64);
4921
4922impl Debug for c64x2 {
4923 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4924 #[derive(Copy, Clone, Debug)]
4925 #[repr(C)]
4926 pub struct c64x2(pub DebugCplx<c64>, pub DebugCplx<c64>);
4927 unsafe impl Zeroable for c64x2 {}
4928 unsafe impl Pod for c64x2 {}
4929
4930 let this: c64x2 = cast!(*self);
4931 this.fmt(f)
4932 }
4933}
4934
4935#[derive(Copy, Clone, PartialEq)]
4937#[repr(C)]
4938pub struct c64x4(pub c64, pub c64, pub c64, pub c64);
4939
4940impl Debug for c64x4 {
4941 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
4942 #[derive(Copy, Clone, Debug)]
4943 #[repr(C)]
4944 pub struct c64x4(
4945 pub DebugCplx<c64>,
4946 pub DebugCplx<c64>,
4947 pub DebugCplx<c64>,
4948 pub DebugCplx<c64>,
4949 );
4950 unsafe impl Zeroable for c64x4 {}
4951 unsafe impl Pod for c64x4 {}
4952
4953 let this: c64x4 = cast!(*self);
4954 this.fmt(f)
4955 }
4956}
4957
4958#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4960#[repr(C)]
4961pub struct i64x2(pub i64, pub i64);
4962#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4964#[repr(C)]
4965pub struct i64x4(pub i64, pub i64, pub i64, pub i64);
4966#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4968#[repr(C)]
4969pub struct i64x8(
4970 pub i64,
4971 pub i64,
4972 pub i64,
4973 pub i64,
4974 pub i64,
4975 pub i64,
4976 pub i64,
4977 pub i64,
4978);
4979
4980#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4982#[repr(C)]
4983pub struct u64x2(pub u64, pub u64);
4984#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4986#[repr(C)]
4987pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
4988#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4990#[repr(C)]
4991pub struct u64x8(
4992 pub u64,
4993 pub u64,
4994 pub u64,
4995 pub u64,
4996 pub u64,
4997 pub u64,
4998 pub u64,
4999 pub u64,
5000);
5001
5002#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5004#[repr(C)]
5005pub struct m64x2(pub m64, pub m64);
5006#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5008#[repr(C)]
5009pub struct m64x4(pub m64, pub m64, pub m64, pub m64);
5010#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5012#[repr(C)]
5013pub struct m64x8(
5014 pub m64,
5015 pub m64,
5016 pub m64,
5017 pub m64,
5018 pub m64,
5019 pub m64,
5020 pub m64,
5021 pub m64,
5022);
5023
5024unsafe impl Zeroable for m8 {}
5025unsafe impl Zeroable for m16 {}
5026unsafe impl Zeroable for m32 {}
5027unsafe impl Zeroable for m64 {}
5028unsafe impl Pod for m8 {}
5029unsafe impl Pod for m16 {}
5030unsafe impl Pod for m32 {}
5031unsafe impl Pod for m64 {}
5032
5033unsafe impl Zeroable for b8 {}
5034unsafe impl Pod for b8 {}
5035unsafe impl Zeroable for b16 {}
5036unsafe impl Pod for b16 {}
5037unsafe impl Zeroable for b32 {}
5038unsafe impl Pod for b32 {}
5039unsafe impl Zeroable for b64 {}
5040unsafe impl Pod for b64 {}
5041
5042unsafe impl Zeroable for i8x16 {}
5043unsafe impl Zeroable for i8x32 {}
5044unsafe impl Zeroable for i8x64 {}
5045unsafe impl Pod for i8x16 {}
5046unsafe impl Pod for i8x32 {}
5047unsafe impl Pod for i8x64 {}
5048unsafe impl Zeroable for u8x16 {}
5049unsafe impl Zeroable for u8x32 {}
5050unsafe impl Zeroable for u8x64 {}
5051unsafe impl Pod for u8x16 {}
5052unsafe impl Pod for u8x32 {}
5053unsafe impl Pod for u8x64 {}
5054unsafe impl Zeroable for m8x16 {}
5055unsafe impl Zeroable for m8x32 {}
5056unsafe impl Zeroable for m8x64 {}
5057unsafe impl Pod for m8x16 {}
5058unsafe impl Pod for m8x32 {}
5059unsafe impl Pod for m8x64 {}
5060
5061unsafe impl Zeroable for i16x8 {}
5062unsafe impl Zeroable for i16x16 {}
5063unsafe impl Zeroable for i16x32 {}
5064unsafe impl Pod for i16x8 {}
5065unsafe impl Pod for i16x16 {}
5066unsafe impl Pod for i16x32 {}
5067unsafe impl Zeroable for u16x8 {}
5068unsafe impl Zeroable for u16x16 {}
5069unsafe impl Zeroable for u16x32 {}
5070unsafe impl Pod for u16x8 {}
5071unsafe impl Pod for u16x16 {}
5072unsafe impl Pod for u16x32 {}
5073unsafe impl Zeroable for m16x8 {}
5074unsafe impl Zeroable for m16x16 {}
5075unsafe impl Zeroable for m16x32 {}
5076unsafe impl Pod for m16x8 {}
5077unsafe impl Pod for m16x16 {}
5078unsafe impl Pod for m16x32 {}
5079
5080unsafe impl Zeroable for f32x4 {}
5081unsafe impl Zeroable for f32x8 {}
5082unsafe impl Zeroable for f32x16 {}
5083unsafe impl Pod for f32x4 {}
5084unsafe impl Pod for f32x8 {}
5085unsafe impl Pod for f32x16 {}
5086unsafe impl Zeroable for c32x2 {}
5087unsafe impl Zeroable for c32x4 {}
5088unsafe impl Zeroable for c32x8 {}
5089unsafe impl Pod for c32x2 {}
5090unsafe impl Pod for c32x4 {}
5091unsafe impl Pod for c32x8 {}
5092unsafe impl Zeroable for i32x4 {}
5093unsafe impl Zeroable for i32x8 {}
5094unsafe impl Zeroable for i32x16 {}
5095unsafe impl Pod for i32x4 {}
5096unsafe impl Pod for i32x8 {}
5097unsafe impl Pod for i32x16 {}
5098unsafe impl Zeroable for u32x4 {}
5099unsafe impl Zeroable for u32x8 {}
5100unsafe impl Zeroable for u32x16 {}
5101unsafe impl Pod for u32x4 {}
5102unsafe impl Pod for u32x8 {}
5103unsafe impl Pod for u32x16 {}
5104unsafe impl Zeroable for m32x4 {}
5105unsafe impl Zeroable for m32x8 {}
5106unsafe impl Zeroable for m32x16 {}
5107unsafe impl Pod for m32x4 {}
5108unsafe impl Pod for m32x8 {}
5109unsafe impl Pod for m32x16 {}
5110
5111unsafe impl Zeroable for f64x2 {}
5112unsafe impl Zeroable for f64x4 {}
5113unsafe impl Zeroable for f64x8 {}
5114unsafe impl Pod for f64x2 {}
5115unsafe impl Pod for f64x4 {}
5116unsafe impl Pod for f64x8 {}
5117unsafe impl Zeroable for c64x1 {}
5118unsafe impl Zeroable for c64x2 {}
5119unsafe impl Zeroable for c64x4 {}
5120unsafe impl Pod for c64x1 {}
5121unsafe impl Pod for c64x2 {}
5122unsafe impl Pod for c64x4 {}
5123unsafe impl Zeroable for i64x2 {}
5124unsafe impl Zeroable for i64x4 {}
5125unsafe impl Zeroable for i64x8 {}
5126unsafe impl Pod for i64x2 {}
5127unsafe impl Pod for i64x4 {}
5128unsafe impl Pod for i64x8 {}
5129unsafe impl Zeroable for u64x2 {}
5130unsafe impl Zeroable for u64x4 {}
5131unsafe impl Zeroable for u64x8 {}
5132unsafe impl Pod for u64x2 {}
5133unsafe impl Pod for u64x4 {}
5134unsafe impl Pod for u64x8 {}
5135unsafe impl Zeroable for m64x2 {}
5136unsafe impl Zeroable for m64x4 {}
5137unsafe impl Zeroable for m64x8 {}
5138unsafe impl Pod for m64x2 {}
5139unsafe impl Pod for m64x4 {}
5140unsafe impl Pod for m64x8 {}
5141
5142macro_rules! iota {
5143 ($T: ty, $N: expr, $int: ty) => {
5144 const {
5145 unsafe {
5146 let mut iota = [const { core::mem::MaybeUninit::uninit() }; $N];
5147 {
5148 let mut i = 0;
5149 while i < $N {
5150 let v = (&raw mut iota[i]) as *mut $int;
5151
5152 let mut j = 0;
5153 while j < core::mem::size_of::<$T>() / core::mem::size_of::<$int>() {
5154 v.add(j).write_unaligned(i as $int);
5155 j += 1;
5156 }
5157
5158 i += 1;
5159 }
5160 }
5161 iota
5162 }
5163 }
5164 };
5165}
5166
5167pub const fn iota_8<T: Interleave, const N: usize>() -> [MaybeUninit<T>; N] {
5168 iota!(T, N, u8)
5169}
5170pub const fn iota_16<T: Interleave, const N: usize>() -> [MaybeUninit<T>; N] {
5171 iota!(T, N, u16)
5172}
5173pub const fn iota_32<T: Interleave, const N: usize>() -> [MaybeUninit<T>; N] {
5174 iota!(T, N, u32)
5175}
5176pub const fn iota_64<T: Interleave, const N: usize>() -> [MaybeUninit<T>; N] {
5177 iota!(T, N, u64)
5178}
5179
5180#[cfg(target_arch = "x86_64")]
5181#[cfg(test)]
5182mod tests {
5183 use super::*;
5184
5185 #[test]
5186 fn test_interleave() {
5187 if let Some(simd) = x86::V3::try_new() {
5188 {
5189 let src = [f64x4(0.0, 0.1, 1.0, 1.1), f64x4(2.0, 2.1, 3.0, 3.1)];
5190 let dst = unsafe { deinterleave_fallback::<f64, f64x4, [f64x4; 2]>(src) };
5191 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
5192 assert_eq!(src, unsafe {
5193 interleave_fallback::<f64, f64x4, [f64x4; 2]>(dst)
5194 });
5195 }
5196 {
5197 let src = [
5198 f64x4(0.0, 0.1, 0.2, 0.3),
5199 f64x4(1.0, 1.1, 1.2, 1.3),
5200 f64x4(2.0, 2.1, 2.2, 2.3),
5201 f64x4(3.0, 3.1, 3.2, 3.3),
5202 ];
5203 let dst = unsafe { deinterleave_fallback::<f64, f64x4, [f64x4; 4]>(src) };
5204 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
5205 assert_eq!(dst[2], simd.add_f64x4(dst[0], simd.splat_f64x4(0.2)));
5206 assert_eq!(dst[3], simd.add_f64x4(dst[0], simd.splat_f64x4(0.3)));
5207 assert_eq!(src, unsafe {
5208 interleave_fallback::<f64, f64x4, [f64x4; 4]>(dst)
5209 });
5210 }
5211 }
5212 }
5213}