pub struct CosineStateless;Expand description
A pure Cosine Similarity function that provides a final reduction.
Trait Implementations§
Source§impl Clone for CosineStateless
impl Clone for CosineStateless
Source§fn clone(&self) -> CosineStateless
fn clone(&self) -> CosineStateless
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Default for CosineStateless
impl Default for CosineStateless
Source§fn default() -> CosineStateless
fn default() -> CosineStateless
Returns the “default value” for a type. Read more
Source§impl SIMDSchema<f16, f16> for CosineStateless
impl SIMDSchema<f16, f16> for CosineStateless
Source§type SIMDWidth = Const<1>
type SIMDWidth = Const<1>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<Emulated<f32, 1>>
type Accumulator = FullCosineAccumulator<Emulated<f32, 1>>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy1x1
type Main = Strategy1x1
The implementation of the main loop.
Source§fn init(&self, arch: Scalar) -> Self::Accumulator
fn init(&self, arch: Scalar) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<f16, f16, V3> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<f16, f16, V3> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<8>
type SIMDWidth = Const<8>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>
type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x4
type Main = Strategy2x4
The implementation of the main loop.
Source§fn init(&self, arch: V3) -> Self::Accumulator
fn init(&self, arch: V3) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<f16, f16, V4> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<f16, f16, V4> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<16>
type SIMDWidth = Const<16>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>
type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x4
type Main = Strategy2x4
The implementation of the main loop.
Source§fn init(&self, arch: V4) -> Self::Accumulator
fn init(&self, arch: V4) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl<A> SIMDSchema<f32, f16, A> for CosineStatelesswhere
A: Architecture,
impl<A> SIMDSchema<f32, f16, A> for CosineStatelesswhere
A: Architecture,
Source§type SIMDWidth = Const<8>
type SIMDWidth = Const<8>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<A as Architecture>::f32x8>
type Accumulator = FullCosineAccumulator<<A as Architecture>::f32x8>
The type used to represent partial accumulated values.
Source§type Left = <A as Architecture>::f32x8
type Left = <A as Architecture>::f32x8
The type used for the left-hand side.
Source§type Right = <A as Architecture>::f16x8
type Right = <A as Architecture>::f16x8
The type used for the right-hand side.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x4
type Main = Strategy2x4
The implementation of the main loop.
Source§fn init(&self, arch: A) -> Self::Accumulator
fn init(&self, arch: A) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<f32, f32> for CosineStateless
impl SIMDSchema<f32, f32> for CosineStateless
Source§type SIMDWidth = Const<4>
type SIMDWidth = Const<4>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<Emulated<f32, 4>>
type Accumulator = FullCosineAccumulator<Emulated<f32, 4>>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x1
type Main = Strategy2x1
The implementation of the main loop.
Source§fn init(&self, arch: Scalar) -> Self::Accumulator
fn init(&self, arch: Scalar) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<f32, f32, V3> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<f32, f32, V3> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<8>
type SIMDWidth = Const<8>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>
type Accumulator = FullCosineAccumulator<<V3 as Architecture>::f32x8>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x4
type Main = Strategy2x4
The implementation of the main loop.
Source§fn init(&self, arch: V3) -> Self::Accumulator
fn init(&self, arch: V3) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<f32, f32, V4> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<f32, f32, V4> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<16>
type SIMDWidth = Const<16>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>
type Accumulator = FullCosineAccumulator<<V4 as Architecture>::f32x16>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy2x4
type Main = Strategy2x4
The implementation of the main loop.
Source§fn init(&self, arch: V4) -> Self::Accumulator
fn init(&self, arch: V4) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, acc: Self::Accumulator) -> Self::Return
fn reduce(&self, acc: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<i8, i8> for CosineStateless
impl SIMDSchema<i8, i8> for CosineStateless
Source§type SIMDWidth = Const<4>
type SIMDWidth = Const<4>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>
type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy1x1
type Main = Strategy1x1
The implementation of the main loop.
Source§fn init(&self, arch: Scalar) -> Self::Accumulator
fn init(&self, arch: Scalar) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§unsafe fn epilogue(
&self,
arch: Scalar,
x: *const i8,
y: *const i8,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: Scalar, x: *const i8, y: *const i8, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<i8, i8, V3> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<i8, i8, V3> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<16>
type SIMDWidth = Const<16>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>
type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy4x1
type Main = Strategy4x1
The implementation of the main loop.
Source§fn init(&self, arch: V3) -> Self::Accumulator
fn init(&self, arch: V3) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<i8, i8, V4> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<i8, i8, V4> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<32>
type SIMDWidth = Const<32>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>
type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy4x1
type Main = Strategy4x1
The implementation of the main loop.
Source§fn init(&self, arch: V4) -> Self::Accumulator
fn init(&self, arch: V4) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<u8, u8> for CosineStateless
impl SIMDSchema<u8, u8> for CosineStateless
Source§type SIMDWidth = Const<4>
type SIMDWidth = Const<4>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>
type Accumulator = FullCosineAccumulator<Emulated<i32, 4>>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy1x1
type Main = Strategy1x1
The implementation of the main loop.
Source§fn init(&self, arch: Scalar) -> Self::Accumulator
fn init(&self, arch: Scalar) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§unsafe fn epilogue(
&self,
arch: Scalar,
x: *const u8,
y: *const u8,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: Scalar, x: *const u8, y: *const u8, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<u8, u8, V3> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<u8, u8, V3> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<16>
type SIMDWidth = Const<16>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>
type Accumulator = FullCosineAccumulator<<V3 as Architecture>::i32x8>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy4x1
type Main = Strategy4x1
The implementation of the main loop.
Source§fn init(&self, arch: V3) -> Self::Accumulator
fn init(&self, arch: V3) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
Source§impl SIMDSchema<u8, u8, V4> for CosineStateless
Available on x86-64 only.
impl SIMDSchema<u8, u8, V4> for CosineStateless
Available on x86-64 only.
Source§type SIMDWidth = Const<32>
type SIMDWidth = Const<32>
The desired SIMD read width.
Reads from the input slice will be use this stride when accessing memory.
Source§type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>
type Accumulator = FullCosineAccumulator<<V4 as Architecture>::i32x16>
The type used to represent partial accumulated values.
Source§type Return = f32
type Return = f32
The final return type.
This is often
f32 for complete distance functions, but need not always be.Source§type Main = Strategy4x1
type Main = Strategy4x1
The implementation of the main loop.
Source§fn init(&self, arch: V4) -> Self::Accumulator
fn init(&self, arch: V4) -> Self::Accumulator
Initialize an empty (identity) accumulator.
Source§fn accumulate(
&self,
x: Self::Left,
y: Self::Right,
acc: Self::Accumulator,
) -> Self::Accumulator
fn accumulate( &self, x: Self::Left, y: Self::Right, acc: Self::Accumulator, ) -> Self::Accumulator
Perform an accumulation.
Source§fn reduce(&self, x: Self::Accumulator) -> Self::Return
fn reduce(&self, x: Self::Accumulator) -> Self::Return
Perform a reduction on the accumulator to yield the final result. Read more
Source§fn combine(
&self,
x: Self::Accumulator,
y: Self::Accumulator,
) -> Self::Accumulator
fn combine( &self, x: Self::Accumulator, y: Self::Accumulator, ) -> Self::Accumulator
Combine two independent accumulators (allows for unrolling).
Source§unsafe fn epilogue(
&self,
arch: A,
x: *const T,
y: *const U,
len: usize,
acc: Self::Accumulator,
) -> Self::Accumulator
unsafe fn epilogue( &self, arch: A, x: *const T, y: *const U, len: usize, acc: Self::Accumulator, ) -> Self::Accumulator
A supplied trait for dealing with non-full-width epilogues.
Often, masked based loading will do the right thing, but for architectures like AVX2
that have limited support for masking 8 and 16-bit operations, using a scalar
fallback may just be better. Read more
Source§fn get_simd_width() -> usize
fn get_simd_width() -> usize
!! Do not extend this function !! Read more
Source§fn get_main_bocksize() -> usize
fn get_main_bocksize() -> usize
!! Do not extend this function !! Read more
impl Copy for CosineStateless
Auto Trait Implementations§
impl Freeze for CosineStateless
impl RefUnwindSafe for CosineStateless
impl Send for CosineStateless
impl Sync for CosineStateless
impl Unpin for CosineStateless
impl UnsafeUnpin for CosineStateless
impl UnwindSafe for CosineStateless
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more