Struct CosineStateless

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<f16, f16, V3> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<8>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>

The type used to represent partial accumulated values.

Source §

type Left = <V3 as Architecture>::f16x8

The type used for the left-hand side.

Source §

type Right = <V3 as Architecture>::f16x8

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x4

The implementation of the main loop.

Source §

fn init(&self, arch: V3) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<f16, f16, V4> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<16>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>

The type used to represent partial accumulated values.

Source §

type Left = <V4 as Architecture>::f16x16

The type used for the left-hand side.

Source §

type Right = <V4 as Architecture>::f16x16

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x4

The implementation of the main loop.

Source §

fn init(&self, arch: V4) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl<A> SIMDSchema<f32, f16, A> for CosineStateless
where A: Architecture,

Source §

type SIMDWidth = Const<8>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<A as Architecture>::f32x8>

The type used to represent partial accumulated values.

Source §

type Left = <A as Architecture>::f32x8

The type used for the left-hand side.

Source §

type Right = <A as Architecture>::f16x8

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x4

The implementation of the main loop.

Source §

fn init(&self, arch: A) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<f32, f32> for CosineStateless

Source §

type SIMDWidth = Const<4>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<Emulated<f32, 4>>

The type used to represent partial accumulated values.

Source §

type Left = Emulated<f32, 4>

The type used for the left-hand side.

Source §

type Right = Emulated<f32, 4>

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x1

The implementation of the main loop.

Source §

fn init(&self, arch: Scalar) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<f32, f32, V3> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<8>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>

The type used to represent partial accumulated values.

Source §

type Left = <V3 as Architecture>::f32x8

The type used for the left-hand side.

Source §

type Right = <V3 as Architecture>::f32x8

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x4

The implementation of the main loop.

Source §

fn init(&self, arch: V3) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<f32, f32, V4> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<16>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>

The type used to represent partial accumulated values.

Source §

type Left = <V4 as Architecture>::f32x16

The type used for the left-hand side.

Source §

type Right = <V4 as Architecture>::f32x16

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy2x4

The implementation of the main loop.

Source §

fn init(&self, arch: V4) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<i8, i8> for CosineStateless

Source §

type SIMDWidth = Const<4>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>

The type used to represent partial accumulated values.

Source §

type Left = Emulated<i8, 4>

The type used for the left-hand side.

Source §

type Right = Emulated<i8, 4>

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy1x1

The implementation of the main loop.

Source §

fn init(&self, arch: Scalar) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

unsafe fn epilogue( &self, arch: Scalar, x: const i8, y: const i8, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<i8, i8, V3> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<16>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>

The type used to represent partial accumulated values.

Source §

type Left = <V3 as Architecture>::i8x16

The type used for the left-hand side.

Source §

type Right = <V3 as Architecture>::i8x16

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy4x1

The implementation of the main loop.

Source §

fn init(&self, arch: V3) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<i8, i8, V4> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<32>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>

The type used to represent partial accumulated values.

Source §

type Left = <V4 as Architecture>::i8x32

The type used for the left-hand side.

Source §

type Right = <V4 as Architecture>::i8x32

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy4x1

The implementation of the main loop.

Source §

fn init(&self, arch: V4) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<u8, u8> for CosineStateless

Source §

type SIMDWidth = Const<4>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>

The type used to represent partial accumulated values.

Source §

type Left = Emulated<u8, 4>

The type used for the left-hand side.

Source §

type Right = Emulated<u8, 4>

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy1x1

The implementation of the main loop.

Source §

fn init(&self, arch: Scalar) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

unsafe fn epilogue( &self, arch: Scalar, x: const u8, y: const u8, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<u8, u8, V3> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<16>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>

The type used to represent partial accumulated values.

Source §

type Left = <V3 as Architecture>::u8x16

The type used for the left-hand side.

Source §

type Right = <V3 as Architecture>::u8x16

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy4x1

The implementation of the main loop.

Source §

fn init(&self, arch: V3) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl SIMDSchema<u8, u8, V4> for CosineStateless

Available on x86-64 only.

Source §

type SIMDWidth = Const<32>

The desired SIMD read width. Reads from the input slice will be use this stride when accessing memory.

Source §

type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>

The type used to represent partial accumulated values.

Source §

type Left = <V4 as Architecture>::u8x32

The type used for the left-hand side.

Source §

type Right = <V4 as Architecture>::u8x32

The type used for the right-hand side.

Source §

type Return = f32

The final return type. This is often f32 for complete distance functions, but need not always be.

Source §

type Main = Strategy4x1

The implementation of the main loop.

Source §

fn init(&self, arch: V4) -> Self::Accumulator

Initialize an empty (identity) accumulator.

Source §

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

Perform an accumulation.

Source §

fn reduce(&self, x: Self::Accumulator) -> Self::Return

Perform a reduction on the accumulator to yield the final result. Read more

Source §

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

Combine two independent accumulators (allows for unrolling).

Source §

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

A supplied trait for dealing with non-full-width epilogues. Often, masked based loading will do the right thing, but for architectures like AVX2 that have limited support for masking 8 and 16-bit operations, using a scalar fallback may just be better. Read more

Source §

fn get_simd_width() -> usize

!! Do not extend this function !! Read more

Source §

fn get_main_bocksize() -> usize

!! Do not extend this function !! Read more

Source §

impl Copy for CosineStateless

Auto Trait Implementations§

§

impl UnwindSafe for CosineStateless

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

CosineStateless

Struct CosineStateless Copy item path

Trait Implementations§

impl Clone for CosineStateless

fn clone(&self) -> CosineStateless

fn clone_from(&mut self, source: &Self)

impl Default for CosineStateless

fn default() -> CosineStateless

impl SIMDSchema<f16, f16> for CosineStateless

type SIMDWidth = Const<1>

type Accumulator = FullCosineAccumulator<Emulated<f32, 1>>

type Left = Emulated<f16, 1>

type Right = Emulated<f16, 1>

type Return = f32

type Main = Strategy1x1

fn init(&self, arch: Scalar) -> Self::Accumulator

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

fn get_simd_width() -> usize

fn get_main_bocksize() -> usize

impl SIMDSchema<f16, f16, V3> for CosineStateless

type SIMDWidth = Const<8>

type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>

type Left = <V3 as Architecture>::f16x8

type Right = <V3 as Architecture>::f16x8

type Return = f32

type Main = Strategy2x4

fn init(&self, arch: V3) -> Self::Accumulator

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

fn get_simd_width() -> usize

fn get_main_bocksize() -> usize

impl SIMDSchema<f16, f16, V4> for CosineStateless

type SIMDWidth = Const<16>

type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>

type Left = <V4 as Architecture>::f16x16

type Right = <V4 as Architecture>::f16x16

type Return = f32

type Main = Strategy2x4

fn init(&self, arch: V4) -> Self::Accumulator

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

fn get_simd_width() -> usize

fn get_main_bocksize() -> usize

impl<A> SIMDSchema<f32, f16, A> for CosineStatelesswhere A: Architecture,

type SIMDWidth = Const<8>

type Accumulator = FullCosineAccumulator<<A as Architecture>::f32x8>

type Left = <A as Architecture>::f32x8

type Right = <A as Architecture>::f16x8

type Return = f32

type Main = Strategy2x4

fn init(&self, arch: A) -> Self::Accumulator

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

fn get_simd_width() -> usize

fn get_main_bocksize() -> usize

impl SIMDSchema<f32, f32> for CosineStateless

type SIMDWidth = Const<4>

type Accumulator = FullCosineAccumulator<Emulated<f32, 4>>

type Left = Emulated<f32, 4>

type Right = Emulated<f32, 4>

type Return = f32

type Main = Strategy2x1

fn init(&self, arch: Scalar) -> Self::Accumulator

fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator

fn reduce(&self, acc: Self::Accumulator) -> Self::Return

fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

fn get_simd_width() -> usize

fn get_main_bocksize() -> usize

impl SIMDSchema<f32, f32, V3> for CosineStateless

type SIMDWidth = Const<8>

Struct CosineStateless

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

impl<A> SIMDSchema<f32, f16, A> for CosineStateless
where A: Architecture,

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: Scalar, x: const i8, y: const i8, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator

unsafe fn epilogue( &self, arch: A, x: const T, y: const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator