Trait F32SimdVec

Source

pub trait F32SimdVec:
    Sized
    + Copy
    + Debug
    + Send
    + Sync
    + Add<Self, Output = Self>
    + Mul<Self, Output = Self>
    + Sub<Self, Output = Self>
    + Div<Self, Output = Self>
    + AddAssign<Self>
    + MulAssign<Self>
    + SubAssign<Self>
    + DivAssign<Self> {
    type Descriptor: SimdDescriptor;
    type UnderlyingArray: Copy + Default + Debug;

    const LEN: usize;
Show 37 methods
    // Required methods
    fn splat(d: Self::Descriptor, v: f32) -> Self;
    fn zero(d: Self::Descriptor) -> Self;
    fn mul_add(self, mul: Self, add: Self) -> Self;
    fn neg_mul_add(self, mul: Self, add: Self) -> Self;
    fn load(d: Self::Descriptor, mem: &[f32]) -> Self;
    fn load_array(d: Self::Descriptor, mem: &Self::UnderlyingArray) -> Self;
    fn store(&self, mem: &mut [f32]);
    fn store_array(&self, mem: &mut Self::UnderlyingArray);
    fn store_interleaved_2(a: Self, b: Self, dest: &mut [f32]);
    fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [f32]);
    fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [f32]);
    fn store_interleaved_8(
        a: Self,
        b: Self,
        c: Self,
        d: Self,
        e: Self,
        f: Self,
        g: Self,
        h: Self,
        dest: &mut [f32],
    );
    fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self);
    fn load_deinterleaved_3(
        d: Self::Descriptor,
        src: &[f32],
    ) -> (Self, Self, Self);
    fn load_deinterleaved_4(
        d: Self::Descriptor,
        src: &[f32],
    ) -> (Self, Self, Self, Self);
    fn round_store_u8(self, dest: &mut [u8]);
    fn round_store_u16(self, dest: &mut [u16]);
    fn abs(self) -> Self;
    fn floor(self) -> Self;
    fn sqrt(self) -> Self;
    fn neg(self) -> Self;
    fn copysign(self, sign: Self) -> Self;
    fn max(self, other: Self) -> Self;
    fn min(self, other: Self) -> Self;
    fn gt(
        self,
        other: Self,
    ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Mask;
    fn as_i32(
        self,
    ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec;
    fn bitcast_to_i32(
        self,
    ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec;
    fn prepare_table_bf16_8(
        d: Self::Descriptor,
        table: &[f32; 8],
    ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8;
    fn table_lookup_bf16_8(
        d: Self::Descriptor,
        table: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8,
        indices: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec,
    ) -> Self;
    fn make_array_slice(slice: &[f32]) -> &[Self::UnderlyingArray];
    fn make_array_slice_mut(slice: &mut [f32]) -> &mut [Self::UnderlyingArray];
    fn transpose_square(
        d: Self::Descriptor,
        data: &mut [Self::UnderlyingArray],
        stride: usize,
    );
    fn load_f16_bits(d: Self::Descriptor, mem: &[u16]) -> Self;
    fn store_f16_bits(self, dest: &mut [u16]);

    // Provided methods
    fn load_from(d: Self::Descriptor, mem: &[f32], offset: usize) -> Self { ... }
    fn store_at(&self, mem: &mut [f32], offset: usize) { ... }
    fn round_store_u8_at(self, dest: &mut [u8], offset: usize) { ... }
}

Required Associated Constants§

Source

const LEN: usize

Required Associated Types§

Source

type Descriptor: SimdDescriptor

Source

type UnderlyingArray: Copy + Default + Debug

An array of f32 of length Self::LEN.

Required Methods§

Source

fn splat(d: Self::Descriptor, v: f32) -> Self

Converts v to an array of v.

Source

fn zero(d: Self::Descriptor) -> Self

Source

fn mul_add(self, mul: Self, add: Self) -> Self

Source

fn neg_mul_add(self, mul: Self, add: Self) -> Self

Computes add - self * mul, equivalent to self * (-mul) + add. Uses fused multiply-add with negation when available (FMA3 fnmadd).

Source

fn store_interleaved_2(a: Self, b: Self, dest: &mut [f32])

Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, …]. Requires dest.len() >= 2 * Self::LEN or it will panic.

Source

fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [f32])

Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, …]. Requires dest.len() >= 3 * Self::LEN or it will panic.

Source

fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [f32])

Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, …]. Requires dest.len() >= 4 * Self::LEN or it will panic.

Source

fn store_interleaved_8( a: Self, b: Self, c: Self, d: Self, e: Self, f: Self, g: Self, h: Self, dest: &mut [f32], )

Stores eight vectors interleaved: [a0, b0, c0, d0, e0, f0, g0, h0, a1, …]. Requires dest.len() >= 8 * Self::LEN or it will panic.

Source

fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self)

Loads two vectors from interleaved data: [a0, b0, a1, b1, a2, b2, …]. Returns (a, b) where a = [a0, a1, a2, …] and b = [b0, b1, b2, …]. Requires src.len() >= 2 * Self::LEN or it will panic.

Source

fn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self)

Loads three vectors from interleaved data: [a0, b0, c0, a1, b1, c1, …]. Returns (a, b, c) where a = [a0, a1, …], b = [b0, b1, …], c = [c0, c1, …]. Requires src.len() >= 3 * Self::LEN or it will panic.

Source

fn load_deinterleaved_4( d: Self::Descriptor, src: &[f32], ) -> (Self, Self, Self, Self)

Loads four vectors from interleaved data: [a0, b0, c0, d0, a1, b1, c1, d1, …]. Returns (a, b, c, d) where each vector contains the deinterleaved components. Requires src.len() >= 4 * Self::LEN or it will panic.

Source

fn round_store_u8(self, dest: &mut [u8])

Rounds to nearest integer and stores as u8. Behavior is unspecified if values would overflow u8. Requires dest.len() >= Self::LEN or it will panic.

Source

fn round_store_u16(self, dest: &mut [u16])

Rounds to nearest integer and stores as u16. Behavior is unspecified if values would overflow u16. Requires dest.len() >= Self::LEN or it will panic.

Source

fn abs(self) -> Self

Source

fn floor(self) -> Self

Source

fn sqrt(self) -> Self

Source

fn neg(self) -> Self

Negates all elements. Currently unused but kept for API completeness.

Source

fn copysign(self, sign: Self) -> Self

Source

fn max(self, other: Self) -> Self

Source

fn min(self, other: Self) -> Self

Source

fn gt( self, other: Self, ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Mask

Source

fn as_i32(self) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec

Source

fn bitcast_to_i32( self, ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec

Source

fn prepare_table_bf16_8( d: Self::Descriptor, table: &[f32; 8], ) -> <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8

Prepares an 8-entry f32 table for fast approximate lookups. Values are converted to BF16 format (loses lower 16 mantissa bits).

Use this when you need to perform multiple lookups with the same table. The prepared table can be reused with [table_lookup_bf16_8].

Source

fn table_lookup_bf16_8( d: Self::Descriptor, table: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::Bf16Table8, indices: <<Self as F32SimdVec>::Descriptor as SimdDescriptor>::I32Vec, ) -> Self

Performs fast approximate table lookup using a prepared BF16 table.

This is the fastest lookup method when the same table is used multiple times. Use [prepare_table_bf16_8] to create the prepared table.

§Panics

May panic or produce undefined results if indices contain values outside 0..8 range.

Source

fn make_array_slice(slice: &[f32]) -> &[Self::UnderlyingArray]

Converts a slice of f32 into a slice of Self::UnderlyingArray. If slice.len() is not a multiple of Self::LEN this will panic.

Source

fn make_array_slice_mut(slice: &mut [f32]) -> &mut [Self::UnderlyingArray]

Converts a mut slice of f32 into a slice of Self::UnderlyingArray. If slice.len() is not a multiple of Self::LEN this will panic.

Source

fn transpose_square( d: Self::Descriptor, data: &mut [Self::UnderlyingArray], stride: usize, )

Transposes the Self::LEN x Self::LEN matrix formed by array elements data[stride * i] for i = 0..Self::LEN.

Source

fn load_f16_bits(d: Self::Descriptor, mem: &[u16]) -> Self

Loads f16 values (stored as u16 bit patterns) and converts them to f32. Uses hardware conversion instructions when available (F16C on x86, NEON fp16 on ARM). Requires mem.len() >= Self::LEN or it will panic.

Source

fn store_f16_bits(self, dest: &mut [u16])

Converts f32 values to f16 and stores as u16 bit patterns. Uses hardware conversion instructions when available (F16C on x86, NEON fp16 on ARM). Requires dest.len() >= Self::LEN or it will panic.