pub trait F32SimdVec:
Sized
+ Copy
+ Debug
+ Send
+ Sync
+ Add<Self, Output = Self>
+ Mul<Self, Output = Self>
+ Sub<Self, Output = Self>
+ Div<Self, Output = Self>
+ AddAssign<Self>
+ MulAssign<Self>
+ SubAssign<Self>
+ DivAssign<Self> {
type Descriptor: SimdDescriptor;
type UnderlyingArray: Copy + Default + Debug;
const LEN: usize;
Show 37 methods
// Required methods
fn splat(d: Self::Descriptor, v: f32) -> Self;
fn zero(d: Self::Descriptor) -> Self;
fn mul_add(self, mul: Self, add: Self) -> Self;
fn neg_mul_add(self, mul: Self, add: Self) -> Self;
fn load(d: Self::Descriptor, mem: &[f32]) -> Self;
fn load_array(d: Self::Descriptor, mem: &Self::UnderlyingArray) -> Self;
fn store(&self, mem: &mut [f32]);
fn store_array(&self, mem: &mut Self::UnderlyingArray);
fn store_interleaved_2(a: Self, b: Self, dest: &mut [f32]);
fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [f32]);
fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [f32]);
fn store_interleaved_8(
a: Self,
b: Self,
c: Self,
d: Self,
e: Self,
f: Self,
g: Self,
h: Self,
dest: &mut [f32],
);
fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self);
fn load_deinterleaved_3(
d: Self::Descriptor,
src: &[f32],
) -> (Self, Self, Self);
fn load_deinterleaved_4(
d: Self::Descriptor,
src: &[f32],
) -> (Self, Self, Self, Self);
fn round_store_u8(self, dest: &mut [u8]);
fn round_store_u16(self, dest: &mut [u16]);
fn abs(self) -> Self;
fn floor(self) -> Self;
fn sqrt(self) -> Self;
fn neg(self) -> Self;
fn copysign(self, sign: Self) -> Self;
fn max(self, other: Self) -> Self;
fn min(self, other: Self) -> Self;
fn gt(
self,
other: Self,
) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Mask;
fn as_i32(
self,
) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec;
fn bitcast_to_i32(
self,
) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec;
fn prepare_table_bf16_8(
d: Self::Descriptor,
table: &[f32; 8],
) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8;
fn table_lookup_bf16_8(
d: Self::Descriptor,
table: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8,
indices: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec,
) -> Self;
fn make_array_slice(slice: &[f32]) -> &[Self::UnderlyingArray];
fn make_array_slice_mut(slice: &mut [f32]) -> &mut [Self::UnderlyingArray];
fn transpose_square(
d: Self::Descriptor,
data: &mut [Self::UnderlyingArray],
stride: usize,
);
fn load_f16_bits(d: Self::Descriptor, mem: &[u16]) -> Self;
fn store_f16_bits(self, dest: &mut [u16]);
// Provided methods
fn load_from(d: Self::Descriptor, mem: &[f32], offset: usize) -> Self { ... }
fn store_at(&self, mem: &mut [f32], offset: usize) { ... }
fn round_store_u8_at(self, dest: &mut [u8], offset: usize) { ... }
}Required Associated Constants§
Required Associated Types§
type Descriptor: SimdDescriptor
Sourcetype UnderlyingArray: Copy + Default + Debug
type UnderlyingArray: Copy + Default + Debug
An array of f32 of length Self::LEN.
Required Methods§
Sourcefn splat(d: Self::Descriptor, v: f32) -> Self
fn splat(d: Self::Descriptor, v: f32) -> Self
Converts v to an array of v.
fn zero(d: Self::Descriptor) -> Self
fn mul_add(self, mul: Self, add: Self) -> Self
Sourcefn neg_mul_add(self, mul: Self, add: Self) -> Self
fn neg_mul_add(self, mul: Self, add: Self) -> Self
Computes add - self * mul, equivalent to self * (-mul) + add.
Uses fused multiply-add with negation when available (FMA3 fnmadd).
fn load(d: Self::Descriptor, mem: &[f32]) -> Self
fn load_array(d: Self::Descriptor, mem: &Self::UnderlyingArray) -> Self
fn store(&self, mem: &mut [f32])
fn store_array(&self, mem: &mut Self::UnderlyingArray)
Sourcefn store_interleaved_2(a: Self, b: Self, dest: &mut [f32])
fn store_interleaved_2(a: Self, b: Self, dest: &mut [f32])
Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, …].
Requires dest.len() >= 2 * Self::LEN or it will panic.
Sourcefn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [f32])
fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [f32])
Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, …].
Requires dest.len() >= 3 * Self::LEN or it will panic.
Sourcefn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [f32])
fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [f32])
Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, …].
Requires dest.len() >= 4 * Self::LEN or it will panic.
Sourcefn store_interleaved_8(
a: Self,
b: Self,
c: Self,
d: Self,
e: Self,
f: Self,
g: Self,
h: Self,
dest: &mut [f32],
)
fn store_interleaved_8( a: Self, b: Self, c: Self, d: Self, e: Self, f: Self, g: Self, h: Self, dest: &mut [f32], )
Stores eight vectors interleaved: [a0, b0, c0, d0, e0, f0, g0, h0, a1, …].
Requires dest.len() >= 8 * Self::LEN or it will panic.
Sourcefn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self)
fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self)
Loads two vectors from interleaved data: [a0, b0, a1, b1, a2, b2, …].
Returns (a, b) where a = [a0, a1, a2, …] and b = [b0, b1, b2, …].
Requires src.len() >= 2 * Self::LEN or it will panic.
Sourcefn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self)
fn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self)
Loads three vectors from interleaved data: [a0, b0, c0, a1, b1, c1, …].
Returns (a, b, c) where a = [a0, a1, …], b = [b0, b1, …], c = [c0, c1, …].
Requires src.len() >= 3 * Self::LEN or it will panic.
Sourcefn load_deinterleaved_4(
d: Self::Descriptor,
src: &[f32],
) -> (Self, Self, Self, Self)
fn load_deinterleaved_4( d: Self::Descriptor, src: &[f32], ) -> (Self, Self, Self, Self)
Loads four vectors from interleaved data: [a0, b0, c0, d0, a1, b1, c1, d1, …].
Returns (a, b, c, d) where each vector contains the deinterleaved components.
Requires src.len() >= 4 * Self::LEN or it will panic.
Sourcefn round_store_u8(self, dest: &mut [u8])
fn round_store_u8(self, dest: &mut [u8])
Rounds to nearest integer and stores as u8.
Behavior is unspecified if values would overflow u8.
Requires dest.len() >= Self::LEN or it will panic.
Sourcefn round_store_u16(self, dest: &mut [u16])
fn round_store_u16(self, dest: &mut [u16])
Rounds to nearest integer and stores as u16.
Behavior is unspecified if values would overflow u16.
Requires dest.len() >= Self::LEN or it will panic.
fn abs(self) -> Self
fn floor(self) -> Self
fn sqrt(self) -> Self
fn copysign(self, sign: Self) -> Self
fn max(self, other: Self) -> Self
fn min(self, other: Self) -> Self
fn gt( self, other: Self, ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Mask
fn as_i32(self) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec
fn bitcast_to_i32( self, ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec
Sourcefn prepare_table_bf16_8(
d: Self::Descriptor,
table: &[f32; 8],
) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8
fn prepare_table_bf16_8( d: Self::Descriptor, table: &[f32; 8], ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8
Prepares an 8-entry f32 table for fast approximate lookups. Values are converted to BF16 format (loses lower 16 mantissa bits).
Use this when you need to perform multiple lookups with the same table.
The prepared table can be reused with [table_lookup_bf16_8].
Sourcefn table_lookup_bf16_8(
d: Self::Descriptor,
table: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8,
indices: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec,
) -> Self
fn table_lookup_bf16_8( d: Self::Descriptor, table: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8, indices: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec, ) -> Self
Performs fast approximate table lookup using a prepared BF16 table.
This is the fastest lookup method when the same table is used multiple times.
Use [prepare_table_bf16_8] to create the prepared table.
§Panics
May panic or produce undefined results if indices contain values outside 0..8 range.
Sourcefn make_array_slice(slice: &[f32]) -> &[Self::UnderlyingArray]
fn make_array_slice(slice: &[f32]) -> &[Self::UnderlyingArray]
Converts a slice of f32 into a slice of Self::UnderlyingArray. If slice.len() is not a
multiple of Self::LEN this will panic.
Sourcefn make_array_slice_mut(slice: &mut [f32]) -> &mut [Self::UnderlyingArray]
fn make_array_slice_mut(slice: &mut [f32]) -> &mut [Self::UnderlyingArray]
Converts a mut slice of f32 into a slice of Self::UnderlyingArray. If slice.len() is not a
multiple of Self::LEN this will panic.
Sourcefn transpose_square(
d: Self::Descriptor,
data: &mut [Self::UnderlyingArray],
stride: usize,
)
fn transpose_square( d: Self::Descriptor, data: &mut [Self::UnderlyingArray], stride: usize, )
Transposes the Self::LEN x Self::LEN matrix formed by array elements
data[stride * i] for i = 0..Self::LEN.
Sourcefn load_f16_bits(d: Self::Descriptor, mem: &[u16]) -> Self
fn load_f16_bits(d: Self::Descriptor, mem: &[u16]) -> Self
Loads f16 values (stored as u16 bit patterns) and converts them to f32.
Uses hardware conversion instructions when available (F16C on x86, NEON fp16 on ARM).
Requires mem.len() >= Self::LEN or it will panic.
Sourcefn store_f16_bits(self, dest: &mut [u16])
fn store_f16_bits(self, dest: &mut [u16])
Converts f32 values to f16 and stores as u16 bit patterns.
Uses hardware conversion instructions when available (F16C on x86, NEON fp16 on ARM).
Requires dest.len() >= Self::LEN or it will panic.
Provided Methods§
Sourcefn load_from(d: Self::Descriptor, mem: &[f32], offset: usize) -> Self
fn load_from(d: Self::Descriptor, mem: &[f32], offset: usize) -> Self
Loads Self::LEN f32 values starting at mem[offset..].
Equivalent to Self::load(d, &mem[offset..]).
Sourcefn store_at(&self, mem: &mut [f32], offset: usize)
fn store_at(&self, mem: &mut [f32], offset: usize)
Stores Self::LEN f32 values starting at mem[offset..].
Equivalent to self.store(&mut mem[offset..]).
Sourcefn round_store_u8_at(self, dest: &mut [u8], offset: usize)
fn round_store_u8_at(self, dest: &mut [u8], offset: usize)
Rounds to nearest integer and stores as u8 at the given offset.
Equivalent to self.round_store_u8(&mut dest[offset..]).
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.