pub trait OpsTrait: Send + Sync {
Show 356 methods
// Provided methods
fn v_abs_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_abs_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_abs_i8(&self, x: &[i8], out: &mut [i8]) { ... }
fn v_abs_i16(&self, x: &[i16], out: &mut [i16]) { ... }
fn v_abs_i32(&self, x: &[i32], out: &mut [i32]) { ... }
fn v_abs_i64(&self, x: &[i64], out: &mut [i64]) { ... }
fn v_abs_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_abs_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_sin_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_sin_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_sin_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_sin_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_cos_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_cos_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_cos_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_cos_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_tanh_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_tanh_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_tanh_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_tanh_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_exp_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_exp_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_exp_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_exp_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_log_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_log_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_log_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_log_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_sqrt_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_sqrt_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_sqrt_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_sqrt_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_sqr_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_sqr_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_sqr_i8(&self, x: &[i8], out: &mut [i8]) { ... }
fn v_sqr_i16(&self, x: &[i16], out: &mut [i16]) { ... }
fn v_sqr_i32(&self, x: &[i32], out: &mut [i32]) { ... }
fn v_sqr_i64(&self, x: &[i64], out: &mut [i64]) { ... }
fn v_sqr_u8(&self, x: &[u8], out: &mut [u8]) { ... }
fn v_sqr_u16(&self, x: &[u16], out: &mut [u16]) { ... }
fn v_sqr_u32(&self, x: &[u32], out: &mut [u32]) { ... }
fn v_sqr_u64(&self, x: &[u64], out: &mut [u64]) { ... }
fn v_sqr_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_sqr_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_tan_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_tan_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_tan_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_tan_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_recip_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_recip_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_recip_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_recip_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_floor_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_floor_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_floor_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_floor_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_ceil_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_ceil_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_ceil_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_ceil_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_round_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_round_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_round_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_round_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn v_neg_i8(&self, x: &[i8], out: &mut [i8]) { ... }
fn v_neg_i16(&self, x: &[i16], out: &mut [i16]) { ... }
fn v_neg_i32(&self, x: &[i32], out: &mut [i32]) { ... }
fn v_neg_i64(&self, x: &[i64], out: &mut [i64]) { ... }
fn v_neg_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn v_neg_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn v_neg_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn v_neg_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn scal_f32(&self, a: f32, x: &mut [f32]) { ... }
fn scal_f64(&self, a: f64, x: &mut [f64]) { ... }
unsafe fn gemm_f32(
&self,
m: usize,
n: usize,
k: usize,
a: *const f32,
lda: usize,
b: *const f32,
ldb: usize,
c: *mut f32,
ldc: usize,
) { ... }
unsafe fn gemm_f64(
&self,
m: usize,
n: usize,
k: usize,
a: *const f64,
lda: usize,
b: *const f64,
ldb: usize,
c: *mut f64,
ldc: usize,
) { ... }
unsafe fn gemm_i8(
&self,
m: usize,
n: usize,
k: usize,
a: *const i8,
lda: usize,
b: *const i8,
ldb: usize,
c: *mut i8,
ldc: usize,
) { ... }
unsafe fn gemm_i16(
&self,
m: usize,
n: usize,
k: usize,
a: *const i16,
lda: usize,
b: *const i16,
ldb: usize,
c: *mut i16,
ldc: usize,
) { ... }
unsafe fn gemm_i32(
&self,
m: usize,
n: usize,
k: usize,
a: *const i32,
lda: usize,
b: *const i32,
ldb: usize,
c: *mut i32,
ldc: usize,
) { ... }
unsafe fn gemm_i64(
&self,
m: usize,
n: usize,
k: usize,
a: *const i64,
lda: usize,
b: *const i64,
ldb: usize,
c: *mut i64,
ldc: usize,
) { ... }
unsafe fn gemm_u8(
&self,
m: usize,
n: usize,
k: usize,
a: *const u8,
lda: usize,
b: *const u8,
ldb: usize,
c: *mut u8,
ldc: usize,
) { ... }
unsafe fn gemm_u16(
&self,
m: usize,
n: usize,
k: usize,
a: *const u16,
lda: usize,
b: *const u16,
ldb: usize,
c: *mut u16,
ldc: usize,
) { ... }
unsafe fn gemm_u32(
&self,
m: usize,
n: usize,
k: usize,
a: *const u32,
lda: usize,
b: *const u32,
ldb: usize,
c: *mut u32,
ldc: usize,
) { ... }
unsafe fn gemm_u64(
&self,
m: usize,
n: usize,
k: usize,
a: *const u64,
lda: usize,
b: *const u64,
ldb: usize,
c: *mut u64,
ldc: usize,
) { ... }
unsafe fn gemm_f16(
&self,
m: usize,
n: usize,
k: usize,
a: *const f16,
lda: usize,
b: *const f16,
ldb: usize,
c: *mut f16,
ldc: usize,
) { ... }
unsafe fn gemm_bf16(
&self,
m: usize,
n: usize,
k: usize,
a: *const bf16,
lda: usize,
b: *const bf16,
ldb: usize,
c: *mut bf16,
ldc: usize,
) { ... }
fn v_add_f32(&self, a: &[f32], b: &[f32], out: &mut [f32]) { ... }
fn v_add_f64(&self, a: &[f64], b: &[f64], out: &mut [f64]) { ... }
fn v_add_i8(&self, a: &[i8], b: &[i8], out: &mut [i8]) { ... }
fn v_add_i16(&self, a: &[i16], b: &[i16], out: &mut [i16]) { ... }
fn v_add_i32(&self, a: &[i32], b: &[i32], out: &mut [i32]) { ... }
fn v_add_i64(&self, a: &[i64], b: &[i64], out: &mut [i64]) { ... }
fn v_add_u8(&self, a: &[u8], b: &[u8], out: &mut [u8]) { ... }
fn v_add_u16(&self, a: &[u16], b: &[u16], out: &mut [u16]) { ... }
fn v_add_u32(&self, a: &[u32], b: &[u32], out: &mut [u32]) { ... }
fn v_add_u64(&self, a: &[u64], b: &[u64], out: &mut [u64]) { ... }
fn v_add_f16(&self, a: &[f16], b: &[f16], out: &mut [f16]) { ... }
fn v_add_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16]) { ... }
fn v_sub_f32(&self, a: &[f32], b: &[f32], out: &mut [f32]) { ... }
fn v_sub_f64(&self, a: &[f64], b: &[f64], out: &mut [f64]) { ... }
fn v_sub_i8(&self, a: &[i8], b: &[i8], out: &mut [i8]) { ... }
fn v_sub_i16(&self, a: &[i16], b: &[i16], out: &mut [i16]) { ... }
fn v_sub_i32(&self, a: &[i32], b: &[i32], out: &mut [i32]) { ... }
fn v_sub_i64(&self, a: &[i64], b: &[i64], out: &mut [i64]) { ... }
fn v_sub_u8(&self, a: &[u8], b: &[u8], out: &mut [u8]) { ... }
fn v_sub_u16(&self, a: &[u16], b: &[u16], out: &mut [u16]) { ... }
fn v_sub_u32(&self, a: &[u32], b: &[u32], out: &mut [u32]) { ... }
fn v_sub_u64(&self, a: &[u64], b: &[u64], out: &mut [u64]) { ... }
fn v_sub_f16(&self, a: &[f16], b: &[f16], out: &mut [f16]) { ... }
fn v_sub_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16]) { ... }
fn v_mul_f32(&self, a: &[f32], b: &[f32], out: &mut [f32]) { ... }
fn v_mul_f64(&self, a: &[f64], b: &[f64], out: &mut [f64]) { ... }
fn v_mul_i8(&self, a: &[i8], b: &[i8], out: &mut [i8]) { ... }
fn v_mul_i16(&self, a: &[i16], b: &[i16], out: &mut [i16]) { ... }
fn v_mul_i32(&self, a: &[i32], b: &[i32], out: &mut [i32]) { ... }
fn v_mul_i64(&self, a: &[i64], b: &[i64], out: &mut [i64]) { ... }
fn v_mul_u8(&self, a: &[u8], b: &[u8], out: &mut [u8]) { ... }
fn v_mul_u16(&self, a: &[u16], b: &[u16], out: &mut [u16]) { ... }
fn v_mul_u32(&self, a: &[u32], b: &[u32], out: &mut [u32]) { ... }
fn v_mul_u64(&self, a: &[u64], b: &[u64], out: &mut [u64]) { ... }
fn v_mul_f16(&self, a: &[f16], b: &[f16], out: &mut [f16]) { ... }
fn v_mul_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16]) { ... }
fn v_div_f32(&self, a: &[f32], b: &[f32], out: &mut [f32]) { ... }
fn v_div_f64(&self, a: &[f64], b: &[f64], out: &mut [f64]) { ... }
fn v_div_f16(&self, a: &[f16], b: &[f16], out: &mut [f16]) { ... }
fn v_div_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16]) { ... }
fn v_add_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32]) { ... }
fn v_add_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64]) { ... }
fn v_add_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8]) { ... }
fn v_add_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16]) { ... }
fn v_add_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32]) { ... }
fn v_add_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64]) { ... }
fn v_add_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8]) { ... }
fn v_add_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16]) { ... }
fn v_add_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32]) { ... }
fn v_add_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64]) { ... }
fn v_add_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16]) { ... }
fn v_add_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16]) { ... }
fn v_sub_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32]) { ... }
fn v_sub_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64]) { ... }
fn v_sub_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8]) { ... }
fn v_sub_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16]) { ... }
fn v_sub_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32]) { ... }
fn v_sub_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64]) { ... }
fn v_sub_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8]) { ... }
fn v_sub_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16]) { ... }
fn v_sub_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32]) { ... }
fn v_sub_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64]) { ... }
fn v_sub_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16]) { ... }
fn v_sub_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16]) { ... }
fn v_mul_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32]) { ... }
fn v_mul_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64]) { ... }
fn v_mul_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8]) { ... }
fn v_mul_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16]) { ... }
fn v_mul_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32]) { ... }
fn v_mul_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64]) { ... }
fn v_mul_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8]) { ... }
fn v_mul_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16]) { ... }
fn v_mul_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32]) { ... }
fn v_mul_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64]) { ... }
fn v_mul_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16]) { ... }
fn v_mul_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16]) { ... }
fn v_div_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32]) { ... }
fn v_div_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64]) { ... }
fn v_div_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16]) { ... }
fn v_div_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16]) { ... }
fn asum_f32(&self, x: &[f32]) -> f32 { ... }
fn asum_f64(&self, x: &[f64]) -> f64 { ... }
fn asum_i8(&self, x: &[i8]) -> i8 { ... }
fn asum_i16(&self, x: &[i16]) -> i16 { ... }
fn asum_i32(&self, x: &[i32]) -> i32 { ... }
fn asum_i64(&self, x: &[i64]) -> i64 { ... }
fn asum_u8(&self, x: &[u8]) -> u8 { ... }
fn asum_u16(&self, x: &[u16]) -> u16 { ... }
fn asum_u32(&self, x: &[u32]) -> u32 { ... }
fn asum_u64(&self, x: &[u64]) -> u64 { ... }
fn asum_f16(&self, x: &[f16]) -> f32 { ... }
fn asum_bf16(&self, x: &[bf16]) -> f32 { ... }
fn v_pow_f32(&self, a: &[f32], b: &[f32], out: &mut [f32]) { ... }
fn v_pow_f64(&self, a: &[f64], b: &[f64], out: &mut [f64]) { ... }
fn sum_f32(&self, x: &[f32]) -> f32 { ... }
fn sum_f64(&self, x: &[f64]) -> f64 { ... }
fn sum_f16(&self, x: &[f16]) -> f64 { ... }
fn sum_bf16(&self, x: &[bf16]) -> f64 { ... }
fn sum_u8(&self, x: &[u8]) -> f64 { ... }
fn sum_i8(&self, x: &[i8]) -> f64 { ... }
fn sum_u16(&self, x: &[u16]) -> f64 { ... }
fn sum_i16(&self, x: &[i16]) -> f64 { ... }
fn sum_u32(&self, x: &[u32]) -> f64 { ... }
fn sum_i32(&self, x: &[i32]) -> f64 { ... }
fn sum_u64(&self, x: &[u64]) -> f64 { ... }
fn sum_i64(&self, x: &[i64]) -> f64 { ... }
fn mean_f32(&self, x: &[f32]) -> f32 { ... }
fn mean_f64(&self, x: &[f64]) -> f64 { ... }
fn mean_i8(&self, x: &[i8]) -> f64 { ... }
fn mean_i16(&self, x: &[i16]) -> f64 { ... }
fn mean_i32(&self, x: &[i32]) -> f64 { ... }
fn mean_i64(&self, x: &[i64]) -> f64 { ... }
fn mean_u8(&self, x: &[u8]) -> f64 { ... }
fn mean_u16(&self, x: &[u16]) -> f64 { ... }
fn mean_u32(&self, x: &[u32]) -> f64 { ... }
fn mean_u64(&self, x: &[u64]) -> f64 { ... }
fn mean_f16(&self, x: &[f16]) -> f64 { ... }
fn mean_bf16(&self, x: &[bf16]) -> f64 { ... }
fn nrm2_f64(&self, a: &[f64]) -> f64 { ... }
fn nrm2_f32(&self, a: &[f32]) -> f64 { ... }
fn nrm2_f16(&self, a: &[f16]) -> f64 { ... }
fn nrm2_bf16(&self, a: &[bf16]) -> f64 { ... }
fn relu_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn relu_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn relu_i8(&self, x: &[i8], out: &mut [i8]) { ... }
fn relu_i16(&self, x: &[i16], out: &mut [i16]) { ... }
fn relu_i32(&self, x: &[i32], out: &mut [i32]) { ... }
fn relu_i64(&self, x: &[i64], out: &mut [i64]) { ... }
fn relu_u8(&self, x: &[u8], out: &mut [u8]) { ... }
fn relu_u16(&self, x: &[u16], out: &mut [u16]) { ... }
fn relu_u32(&self, x: &[u32], out: &mut [u32]) { ... }
fn relu_u64(&self, x: &[u64], out: &mut [u64]) { ... }
fn relu_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn relu_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn min_v_f32(&self, x: &[f32]) -> f32 { ... }
fn min_v_f64(&self, x: &[f64]) -> f64 { ... }
fn min_v_i8(&self, x: &[i8]) -> i8 { ... }
fn min_v_i16(&self, x: &[i16]) -> i16 { ... }
fn min_v_i32(&self, x: &[i32]) -> i32 { ... }
fn min_v_i64(&self, x: &[i64]) -> i64 { ... }
fn min_v_u8(&self, x: &[u8]) -> u8 { ... }
fn min_v_u16(&self, x: &[u16]) -> u16 { ... }
fn min_v_u32(&self, x: &[u32]) -> u32 { ... }
fn min_v_u64(&self, x: &[u64]) -> u64 { ... }
fn min_v_f16(&self, x: &[f16]) -> f16 { ... }
fn min_v_bf16(&self, x: &[bf16]) -> bf16 { ... }
fn max_v_f32(&self, x: &[f32]) -> f32 { ... }
fn max_v_f64(&self, x: &[f64]) -> f64 { ... }
fn max_v_i8(&self, x: &[i8]) -> i8 { ... }
fn max_v_i16(&self, x: &[i16]) -> i16 { ... }
fn max_v_i32(&self, x: &[i32]) -> i32 { ... }
fn max_v_i64(&self, x: &[i64]) -> i64 { ... }
fn max_v_u8(&self, x: &[u8]) -> u8 { ... }
fn max_v_u16(&self, x: &[u16]) -> u16 { ... }
fn max_v_u32(&self, x: &[u32]) -> u32 { ... }
fn max_v_u64(&self, x: &[u64]) -> u64 { ... }
fn max_v_f16(&self, x: &[f16]) -> f16 { ... }
fn max_v_bf16(&self, x: &[bf16]) -> bf16 { ... }
fn min_vi_f32(&self, x: &[f32]) -> (f32, u64) { ... }
fn min_vi_f64(&self, x: &[f64]) -> (f64, u64) { ... }
fn min_vi_i8(&self, x: &[i8]) -> (i8, u64) { ... }
fn min_vi_i16(&self, x: &[i16]) -> (i16, u64) { ... }
fn min_vi_i32(&self, x: &[i32]) -> (i32, u64) { ... }
fn min_vi_i64(&self, x: &[i64]) -> (i64, u64) { ... }
fn min_vi_u8(&self, x: &[u8]) -> (u8, u64) { ... }
fn min_vi_u16(&self, x: &[u16]) -> (u16, u64) { ... }
fn min_vi_u32(&self, x: &[u32]) -> (u32, u64) { ... }
fn min_vi_u64(&self, x: &[u64]) -> (u64, u64) { ... }
fn min_vi_f16(&self, x: &[f16]) -> (f16, u64) { ... }
fn min_vi_bf16(&self, x: &[bf16]) -> (bf16, u64) { ... }
fn max_vi_f32(&self, x: &[f32]) -> (f32, u64) { ... }
fn max_vi_f64(&self, x: &[f64]) -> (f64, u64) { ... }
fn max_vi_i8(&self, x: &[i8]) -> (i8, u64) { ... }
fn max_vi_i16(&self, x: &[i16]) -> (i16, u64) { ... }
fn max_vi_i32(&self, x: &[i32]) -> (i32, u64) { ... }
fn max_vi_i64(&self, x: &[i64]) -> (i64, u64) { ... }
fn max_vi_u8(&self, x: &[u8]) -> (u8, u64) { ... }
fn max_vi_u16(&self, x: &[u16]) -> (u16, u64) { ... }
fn max_vi_u32(&self, x: &[u32]) -> (u32, u64) { ... }
fn max_vi_u64(&self, x: &[u64]) -> (u64, u64) { ... }
fn max_vi_f16(&self, x: &[f16]) -> (f16, u64) { ... }
fn max_vi_bf16(&self, x: &[bf16]) -> (bf16, u64) { ... }
fn min_i_f32(&self, x: &[f32]) -> u64 { ... }
fn min_i_f64(&self, x: &[f64]) -> u64 { ... }
fn min_i_i8(&self, x: &[i8]) -> u64 { ... }
fn min_i_i16(&self, x: &[i16]) -> u64 { ... }
fn min_i_i32(&self, x: &[i32]) -> u64 { ... }
fn min_i_i64(&self, x: &[i64]) -> u64 { ... }
fn min_i_u8(&self, x: &[u8]) -> u64 { ... }
fn min_i_u16(&self, x: &[u16]) -> u64 { ... }
fn min_i_u32(&self, x: &[u32]) -> u64 { ... }
fn min_i_u64(&self, x: &[u64]) -> u64 { ... }
fn min_i_f16(&self, x: &[f16]) -> u64 { ... }
fn min_i_bf16(&self, x: &[bf16]) -> u64 { ... }
fn max_i_f32(&self, x: &[f32]) -> u64 { ... }
fn max_i_f64(&self, x: &[f64]) -> u64 { ... }
fn max_i_i8(&self, x: &[i8]) -> u64 { ... }
fn max_i_i16(&self, x: &[i16]) -> u64 { ... }
fn max_i_i32(&self, x: &[i32]) -> u64 { ... }
fn max_i_i64(&self, x: &[i64]) -> u64 { ... }
fn max_i_u8(&self, x: &[u8]) -> u64 { ... }
fn max_i_u16(&self, x: &[u16]) -> u64 { ... }
fn max_i_u32(&self, x: &[u32]) -> u64 { ... }
fn max_i_u64(&self, x: &[u64]) -> u64 { ... }
fn max_i_f16(&self, x: &[f16]) -> u64 { ... }
fn max_i_bf16(&self, x: &[bf16]) -> u64 { ... }
fn min_max_v_f32(&self, x: &[f32]) -> (f32, f32) { ... }
fn min_max_v_f64(&self, x: &[f64]) -> (f64, f64) { ... }
fn min_max_v_i8(&self, x: &[i8]) -> (i8, i8) { ... }
fn min_max_v_i16(&self, x: &[i16]) -> (i16, i16) { ... }
fn min_max_v_i32(&self, x: &[i32]) -> (i32, i32) { ... }
fn min_max_v_i64(&self, x: &[i64]) -> (i64, i64) { ... }
fn min_max_v_u8(&self, x: &[u8]) -> (u8, u8) { ... }
fn min_max_v_u16(&self, x: &[u16]) -> (u16, u16) { ... }
fn min_max_v_u32(&self, x: &[u32]) -> (u32, u32) { ... }
fn min_max_v_u64(&self, x: &[u64]) -> (u64, u64) { ... }
fn min_max_v_f16(&self, x: &[f16]) -> (f16, f16) { ... }
fn min_max_v_bf16(&self, x: &[bf16]) -> (bf16, bf16) { ... }
fn min_max_vi_f32(&self, x: &[f32]) -> ((f32, u64), (f32, u64)) { ... }
fn min_max_vi_f64(&self, x: &[f64]) -> ((f64, u64), (f64, u64)) { ... }
fn min_max_vi_i8(&self, x: &[i8]) -> ((i8, u64), (i8, u64)) { ... }
fn min_max_vi_i16(&self, x: &[i16]) -> ((i16, u64), (i16, u64)) { ... }
fn min_max_vi_i32(&self, x: &[i32]) -> ((i32, u64), (i32, u64)) { ... }
fn min_max_vi_i64(&self, x: &[i64]) -> ((i64, u64), (i64, u64)) { ... }
fn min_max_vi_u8(&self, x: &[u8]) -> ((u8, u64), (u8, u64)) { ... }
fn min_max_vi_u16(&self, x: &[u16]) -> ((u16, u64), (u16, u64)) { ... }
fn min_max_vi_u32(&self, x: &[u32]) -> ((u32, u64), (u32, u64)) { ... }
fn min_max_vi_u64(&self, x: &[u64]) -> ((u64, u64), (u64, u64)) { ... }
fn min_max_vi_f16(&self, x: &[f16]) -> ((f16, u64), (f16, u64)) { ... }
fn min_max_vi_bf16(&self, x: &[bf16]) -> ((bf16, u64), (bf16, u64)) { ... }
fn min_max_i_f32(&self, x: &[f32]) -> (u64, u64) { ... }
fn min_max_i_f64(&self, x: &[f64]) -> (u64, u64) { ... }
fn min_max_i_i8(&self, x: &[i8]) -> (u64, u64) { ... }
fn min_max_i_i16(&self, x: &[i16]) -> (u64, u64) { ... }
fn min_max_i_i32(&self, x: &[i32]) -> (u64, u64) { ... }
fn min_max_i_i64(&self, x: &[i64]) -> (u64, u64) { ... }
fn min_max_i_u8(&self, x: &[u8]) -> (u64, u64) { ... }
fn min_max_i_u16(&self, x: &[u16]) -> (u64, u64) { ... }
fn min_max_i_u32(&self, x: &[u32]) -> (u64, u64) { ... }
fn min_max_i_u64(&self, x: &[u64]) -> (u64, u64) { ... }
fn min_max_i_f16(&self, x: &[f16]) -> (u64, u64) { ... }
fn min_max_i_bf16(&self, x: &[bf16]) -> (u64, u64) { ... }
fn dot_i16(&self, a: &[i16], b: &[i16]) -> f64 { ... }
fn dot_i32(&self, a: &[i32], b: &[i32]) -> f64 { ... }
fn dot_i64(&self, a: &[i64], b: &[i64]) -> f64 { ... }
fn dot_u8(&self, a: &[u8], b: &[u8]) -> f64 { ... }
fn dot_u16(&self, a: &[u16], b: &[u16]) -> f64 { ... }
fn dot_u32(&self, a: &[u32], b: &[u32]) -> f64 { ... }
fn dot_u64(&self, a: &[u64], b: &[u64]) -> f64 { ... }
fn dot_f64(&self, a: &[f64], b: &[f64]) -> f64 { ... }
fn dot_f32(&self, a: &[f32], b: &[f32]) -> f64 { ... }
fn dot_i8(&self, a: &[i8], b: &[i8]) -> f64 { ... }
fn dot_f16(&self, a: &[f16], b: &[f16]) -> f64 { ... }
fn dot_bf16(&self, a: &[bf16], b: &[bf16]) -> f64 { ... }
fn sigmoid_f32(&self, x: &[f32], out: &mut [f32]) { ... }
fn sigmoid_f64(&self, x: &[f64], out: &mut [f64]) { ... }
fn sigmoid_f16(&self, x: &[f16], out: &mut [f16]) { ... }
fn sigmoid_bf16(&self, x: &[bf16], out: &mut [bf16]) { ... }
fn clamp_f32(&self, x: &[f32], min: f32, max: f32, out: &mut [f32]) { ... }
fn clamp_f64(&self, x: &[f64], min: f64, max: f64, out: &mut [f64]) { ... }
fn clamp_f16(&self, x: &[f16], min: f16, max: f16, out: &mut [f16]) { ... }
fn clamp_bf16(&self, x: &[bf16], min: bf16, max: bf16, out: &mut [bf16]) { ... }
fn clamp_i8(&self, x: &[i8], min: i8, max: i8, out: &mut [i8]) { ... }
fn clamp_i16(&self, x: &[i16], min: i16, max: i16, out: &mut [i16]) { ... }
fn clamp_i32(&self, x: &[i32], min: i32, max: i32, out: &mut [i32]) { ... }
fn clamp_i64(&self, x: &[i64], min: i64, max: i64, out: &mut [i64]) { ... }
fn clamp_u8(&self, x: &[u8], min: u8, max: u8, out: &mut [u8]) { ... }
fn clamp_u16(&self, x: &[u16], min: u16, max: u16, out: &mut [u16]) { ... }
fn clamp_u32(&self, x: &[u32], min: u32, max: u32, out: &mut [u32]) { ... }
fn clamp_u64(&self, x: &[u64], min: u64, max: u64, out: &mut [u64]) { ... }
}Expand description
OpsTrait trait for accelerated tensor operations
This trait provides a unified interface for various mathematical backends including Accelerate (macOS), Intel MKL, OpenBLAS, and fallback implementations.
Provided Methodsยง
fn v_abs_f32(&self, x: &[f32], out: &mut [f32])
fn v_abs_f64(&self, x: &[f64], out: &mut [f64])
fn v_abs_i8(&self, x: &[i8], out: &mut [i8])
fn v_abs_i16(&self, x: &[i16], out: &mut [i16])
fn v_abs_i32(&self, x: &[i32], out: &mut [i32])
fn v_abs_i64(&self, x: &[i64], out: &mut [i64])
fn v_abs_f16(&self, x: &[f16], out: &mut [f16])
fn v_abs_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_sin_f32(&self, x: &[f32], out: &mut [f32])
fn v_sin_f64(&self, x: &[f64], out: &mut [f64])
fn v_sin_f16(&self, x: &[f16], out: &mut [f16])
fn v_sin_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_cos_f32(&self, x: &[f32], out: &mut [f32])
fn v_cos_f64(&self, x: &[f64], out: &mut [f64])
fn v_cos_f16(&self, x: &[f16], out: &mut [f16])
fn v_cos_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_tanh_f32(&self, x: &[f32], out: &mut [f32])
fn v_tanh_f64(&self, x: &[f64], out: &mut [f64])
fn v_tanh_f16(&self, x: &[f16], out: &mut [f16])
fn v_tanh_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_exp_f32(&self, x: &[f32], out: &mut [f32])
fn v_exp_f64(&self, x: &[f64], out: &mut [f64])
fn v_exp_f16(&self, x: &[f16], out: &mut [f16])
fn v_exp_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_log_f32(&self, x: &[f32], out: &mut [f32])
fn v_log_f64(&self, x: &[f64], out: &mut [f64])
fn v_log_f16(&self, x: &[f16], out: &mut [f16])
fn v_log_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_sqrt_f32(&self, x: &[f32], out: &mut [f32])
fn v_sqrt_f64(&self, x: &[f64], out: &mut [f64])
fn v_sqrt_f16(&self, x: &[f16], out: &mut [f16])
fn v_sqrt_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_sqr_f32(&self, x: &[f32], out: &mut [f32])
fn v_sqr_f64(&self, x: &[f64], out: &mut [f64])
fn v_sqr_i8(&self, x: &[i8], out: &mut [i8])
fn v_sqr_i16(&self, x: &[i16], out: &mut [i16])
fn v_sqr_i32(&self, x: &[i32], out: &mut [i32])
fn v_sqr_i64(&self, x: &[i64], out: &mut [i64])
fn v_sqr_u8(&self, x: &[u8], out: &mut [u8])
fn v_sqr_u16(&self, x: &[u16], out: &mut [u16])
fn v_sqr_u32(&self, x: &[u32], out: &mut [u32])
fn v_sqr_u64(&self, x: &[u64], out: &mut [u64])
fn v_sqr_f16(&self, x: &[f16], out: &mut [f16])
fn v_sqr_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_tan_f32(&self, x: &[f32], out: &mut [f32])
fn v_tan_f64(&self, x: &[f64], out: &mut [f64])
fn v_tan_f16(&self, x: &[f16], out: &mut [f16])
fn v_tan_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_recip_f32(&self, x: &[f32], out: &mut [f32])
fn v_recip_f64(&self, x: &[f64], out: &mut [f64])
fn v_recip_f16(&self, x: &[f16], out: &mut [f16])
fn v_recip_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_floor_f32(&self, x: &[f32], out: &mut [f32])
fn v_floor_f64(&self, x: &[f64], out: &mut [f64])
fn v_floor_f16(&self, x: &[f16], out: &mut [f16])
fn v_floor_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_ceil_f32(&self, x: &[f32], out: &mut [f32])
fn v_ceil_f64(&self, x: &[f64], out: &mut [f64])
fn v_ceil_f16(&self, x: &[f16], out: &mut [f16])
fn v_ceil_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_round_f32(&self, x: &[f32], out: &mut [f32])
fn v_round_f64(&self, x: &[f64], out: &mut [f64])
fn v_round_f16(&self, x: &[f16], out: &mut [f16])
fn v_round_bf16(&self, x: &[bf16], out: &mut [bf16])
fn v_neg_i8(&self, x: &[i8], out: &mut [i8])
fn v_neg_i16(&self, x: &[i16], out: &mut [i16])
fn v_neg_i32(&self, x: &[i32], out: &mut [i32])
fn v_neg_i64(&self, x: &[i64], out: &mut [i64])
fn v_neg_f32(&self, x: &[f32], out: &mut [f32])
fn v_neg_f64(&self, x: &[f64], out: &mut [f64])
fn v_neg_f16(&self, x: &[f16], out: &mut [f16])
fn v_neg_bf16(&self, x: &[bf16], out: &mut [bf16])
fn scal_f32(&self, a: f32, x: &mut [f32])
fn scal_f64(&self, a: f64, x: &mut [f64])
Sourceunsafe fn gemm_f32(
&self,
m: usize,
n: usize,
k: usize,
a: *const f32,
lda: usize,
b: *const f32,
ldb: usize,
c: *mut f32,
ldc: usize,
)
unsafe fn gemm_f32( &self, m: usize, n: usize, k: usize, a: *const f32, lda: usize, b: *const f32, ldb: usize, c: *mut f32, ldc: usize, )
Performs matrix multiplication for f32: C = A * B Uses optimized gemm library implementation with dynamic thread configuration
ยงSafety
The caller must ensure that a, b, and c are valid pointers to arrays of the
correct size, and that m, n, k, lda, ldb, and ldc are valid dimensions
and leading dimensions for the matrices involved in the multiplication.
The matrices must not overlap in a way that would cause data races if accessed
concurrently.
Sourceunsafe fn gemm_f64(
&self,
m: usize,
n: usize,
k: usize,
a: *const f64,
lda: usize,
b: *const f64,
ldb: usize,
c: *mut f64,
ldc: usize,
)
unsafe fn gemm_f64( &self, m: usize, n: usize, k: usize, a: *const f64, lda: usize, b: *const f64, ldb: usize, c: *mut f64, ldc: usize, )
Performs matrix multiplication for f64: C = A * B Uses optimized gemm library implementation with dynamic thread configuration
ยงSafety
The caller must ensure that a, b, and c are valid pointers to arrays of the
correct size, and that m, n, k, lda, ldb, and ldc are valid dimensions
and leading dimensions for the matrices involved in the multiplication.
The matrices must not overlap in a way that would cause data races if accessed
concurrently.
Sourceunsafe fn gemm_i8(
&self,
m: usize,
n: usize,
k: usize,
a: *const i8,
lda: usize,
b: *const i8,
ldb: usize,
c: *mut i8,
ldc: usize,
)
unsafe fn gemm_i8( &self, m: usize, n: usize, k: usize, a: *const i8, lda: usize, b: *const i8, ldb: usize, c: *mut i8, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_i16(
&self,
m: usize,
n: usize,
k: usize,
a: *const i16,
lda: usize,
b: *const i16,
ldb: usize,
c: *mut i16,
ldc: usize,
)
unsafe fn gemm_i16( &self, m: usize, n: usize, k: usize, a: *const i16, lda: usize, b: *const i16, ldb: usize, c: *mut i16, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_i32(
&self,
m: usize,
n: usize,
k: usize,
a: *const i32,
lda: usize,
b: *const i32,
ldb: usize,
c: *mut i32,
ldc: usize,
)
unsafe fn gemm_i32( &self, m: usize, n: usize, k: usize, a: *const i32, lda: usize, b: *const i32, ldb: usize, c: *mut i32, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_i64(
&self,
m: usize,
n: usize,
k: usize,
a: *const i64,
lda: usize,
b: *const i64,
ldb: usize,
c: *mut i64,
ldc: usize,
)
unsafe fn gemm_i64( &self, m: usize, n: usize, k: usize, a: *const i64, lda: usize, b: *const i64, ldb: usize, c: *mut i64, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_u8(
&self,
m: usize,
n: usize,
k: usize,
a: *const u8,
lda: usize,
b: *const u8,
ldb: usize,
c: *mut u8,
ldc: usize,
)
unsafe fn gemm_u8( &self, m: usize, n: usize, k: usize, a: *const u8, lda: usize, b: *const u8, ldb: usize, c: *mut u8, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_u16(
&self,
m: usize,
n: usize,
k: usize,
a: *const u16,
lda: usize,
b: *const u16,
ldb: usize,
c: *mut u16,
ldc: usize,
)
unsafe fn gemm_u16( &self, m: usize, n: usize, k: usize, a: *const u16, lda: usize, b: *const u16, ldb: usize, c: *mut u16, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_u32(
&self,
m: usize,
n: usize,
k: usize,
a: *const u32,
lda: usize,
b: *const u32,
ldb: usize,
c: *mut u32,
ldc: usize,
)
unsafe fn gemm_u32( &self, m: usize, n: usize, k: usize, a: *const u32, lda: usize, b: *const u32, ldb: usize, c: *mut u32, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_u64(
&self,
m: usize,
n: usize,
k: usize,
a: *const u64,
lda: usize,
b: *const u64,
ldb: usize,
c: *mut u64,
ldc: usize,
)
unsafe fn gemm_u64( &self, m: usize, n: usize, k: usize, a: *const u64, lda: usize, b: *const u64, ldb: usize, c: *mut u64, ldc: usize, )
Performs matrix multiplication C = alpha * A * B + beta * C
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
Sourceunsafe fn gemm_f16(
&self,
m: usize,
n: usize,
k: usize,
a: *const f16,
lda: usize,
b: *const f16,
ldb: usize,
c: *mut f16,
ldc: usize,
)
unsafe fn gemm_f16( &self, m: usize, n: usize, k: usize, a: *const f16, lda: usize, b: *const f16, ldb: usize, c: *mut f16, ldc: usize, )
Performs matrix multiplication C = A * B for f16 matrices Uses optimized gemm library implementation with dynamic thread configuration
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
ยงSafety
The caller must ensure that a, b, and c are valid pointers to arrays of the
correct size, and that m, n, k, lda, ldb, and ldc are valid dimensions
and leading dimensions for the matrices involved in the multiplication.
The matrices must not overlap in a way that would cause data races if accessed
concurrently.
Sourceunsafe fn gemm_bf16(
&self,
m: usize,
n: usize,
k: usize,
a: *const bf16,
lda: usize,
b: *const bf16,
ldb: usize,
c: *mut bf16,
ldc: usize,
)
unsafe fn gemm_bf16( &self, m: usize, n: usize, k: usize, a: *const bf16, lda: usize, b: *const bf16, ldb: usize, c: *mut bf16, ldc: usize, )
Performs matrix multiplication C = A * B for bf16 matrices
ยงSafety
This function is unsafe because it performs raw pointer operations. The caller must ensure:
- All pointers are valid and point to properly allocated memory
- Matrix dimensions are correct (m, n, k)
- Leading dimensions (lda, ldb, ldc) are valid
- Memory regions do not overlap inappropriately
fn v_add_f32(&self, a: &[f32], b: &[f32], out: &mut [f32])
fn v_add_f64(&self, a: &[f64], b: &[f64], out: &mut [f64])
fn v_add_i8(&self, a: &[i8], b: &[i8], out: &mut [i8])
fn v_add_i16(&self, a: &[i16], b: &[i16], out: &mut [i16])
fn v_add_i32(&self, a: &[i32], b: &[i32], out: &mut [i32])
fn v_add_i64(&self, a: &[i64], b: &[i64], out: &mut [i64])
fn v_add_u8(&self, a: &[u8], b: &[u8], out: &mut [u8])
fn v_add_u16(&self, a: &[u16], b: &[u16], out: &mut [u16])
fn v_add_u32(&self, a: &[u32], b: &[u32], out: &mut [u32])
fn v_add_u64(&self, a: &[u64], b: &[u64], out: &mut [u64])
fn v_add_f16(&self, a: &[f16], b: &[f16], out: &mut [f16])
fn v_add_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16])
fn v_sub_f32(&self, a: &[f32], b: &[f32], out: &mut [f32])
fn v_sub_f64(&self, a: &[f64], b: &[f64], out: &mut [f64])
fn v_sub_i8(&self, a: &[i8], b: &[i8], out: &mut [i8])
fn v_sub_i16(&self, a: &[i16], b: &[i16], out: &mut [i16])
fn v_sub_i32(&self, a: &[i32], b: &[i32], out: &mut [i32])
fn v_sub_i64(&self, a: &[i64], b: &[i64], out: &mut [i64])
fn v_sub_u8(&self, a: &[u8], b: &[u8], out: &mut [u8])
fn v_sub_u16(&self, a: &[u16], b: &[u16], out: &mut [u16])
fn v_sub_u32(&self, a: &[u32], b: &[u32], out: &mut [u32])
fn v_sub_u64(&self, a: &[u64], b: &[u64], out: &mut [u64])
fn v_sub_f16(&self, a: &[f16], b: &[f16], out: &mut [f16])
fn v_sub_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16])
fn v_mul_f32(&self, a: &[f32], b: &[f32], out: &mut [f32])
fn v_mul_f64(&self, a: &[f64], b: &[f64], out: &mut [f64])
fn v_mul_i8(&self, a: &[i8], b: &[i8], out: &mut [i8])
fn v_mul_i16(&self, a: &[i16], b: &[i16], out: &mut [i16])
fn v_mul_i32(&self, a: &[i32], b: &[i32], out: &mut [i32])
fn v_mul_i64(&self, a: &[i64], b: &[i64], out: &mut [i64])
fn v_mul_u8(&self, a: &[u8], b: &[u8], out: &mut [u8])
fn v_mul_u16(&self, a: &[u16], b: &[u16], out: &mut [u16])
fn v_mul_u32(&self, a: &[u32], b: &[u32], out: &mut [u32])
fn v_mul_u64(&self, a: &[u64], b: &[u64], out: &mut [u64])
fn v_mul_f16(&self, a: &[f16], b: &[f16], out: &mut [f16])
fn v_mul_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16])
fn v_div_f32(&self, a: &[f32], b: &[f32], out: &mut [f32])
fn v_div_f64(&self, a: &[f64], b: &[f64], out: &mut [f64])
fn v_div_f16(&self, a: &[f16], b: &[f16], out: &mut [f16])
fn v_div_bf16(&self, a: &[bf16], b: &[bf16], out: &mut [bf16])
fn v_add_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32])
fn v_add_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64])
fn v_add_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8])
fn v_add_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16])
fn v_add_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32])
fn v_add_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64])
fn v_add_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8])
fn v_add_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16])
fn v_add_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32])
fn v_add_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64])
fn v_add_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16])
fn v_add_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16])
fn v_sub_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32])
fn v_sub_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64])
fn v_sub_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8])
fn v_sub_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16])
fn v_sub_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32])
fn v_sub_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64])
fn v_sub_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8])
fn v_sub_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16])
fn v_sub_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32])
fn v_sub_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64])
fn v_sub_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16])
fn v_sub_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16])
fn v_mul_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32])
fn v_mul_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64])
fn v_mul_scalar_i8(&self, x: &[i8], scalar: i8, out: &mut [i8])
fn v_mul_scalar_i16(&self, x: &[i16], scalar: i16, out: &mut [i16])
fn v_mul_scalar_i32(&self, x: &[i32], scalar: i32, out: &mut [i32])
fn v_mul_scalar_i64(&self, x: &[i64], scalar: i64, out: &mut [i64])
fn v_mul_scalar_u8(&self, x: &[u8], scalar: u8, out: &mut [u8])
fn v_mul_scalar_u16(&self, x: &[u16], scalar: u16, out: &mut [u16])
fn v_mul_scalar_u32(&self, x: &[u32], scalar: u32, out: &mut [u32])
fn v_mul_scalar_u64(&self, x: &[u64], scalar: u64, out: &mut [u64])
fn v_mul_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16])
fn v_mul_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16])
fn v_div_scalar_f32(&self, x: &[f32], scalar: f32, out: &mut [f32])
fn v_div_scalar_f64(&self, x: &[f64], scalar: f64, out: &mut [f64])
fn v_div_scalar_f16(&self, x: &[f16], scalar: f16, out: &mut [f16])
fn v_div_scalar_bf16(&self, x: &[bf16], scalar: bf16, out: &mut [bf16])
fn asum_f32(&self, x: &[f32]) -> f32
fn asum_f64(&self, x: &[f64]) -> f64
fn asum_i8(&self, x: &[i8]) -> i8
fn asum_i16(&self, x: &[i16]) -> i16
fn asum_i32(&self, x: &[i32]) -> i32
fn asum_i64(&self, x: &[i64]) -> i64
fn asum_u8(&self, x: &[u8]) -> u8
fn asum_u16(&self, x: &[u16]) -> u16
fn asum_u32(&self, x: &[u32]) -> u32
fn asum_u64(&self, x: &[u64]) -> u64
fn asum_f16(&self, x: &[f16]) -> f32
fn asum_bf16(&self, x: &[bf16]) -> f32
fn v_pow_f32(&self, a: &[f32], b: &[f32], out: &mut [f32])
fn v_pow_f64(&self, a: &[f64], b: &[f64], out: &mut [f64])
Sourcefn sum_f32(&self, x: &[f32]) -> f32
fn sum_f32(&self, x: &[f32]) -> f32
SIMD-optimized sum function for f32 arrays Uses f32x8 vectors with reduce_add for optimal performance
Sourcefn sum_f64(&self, x: &[f64]) -> f64
fn sum_f64(&self, x: &[f64]) -> f64
SIMD-optimized sum function for f64 arrays Uses f64x4 vectors with reduce_add for optimal performance