use crate::op::{ComputeContext, GradientContext, Op, OpError};
use crate::tensor::Tensor;
use crate::Float;
#[cfg(feature = "simd")]
fn dispatch_binary_f32(a: &[f32], b: &[f32], op: SimdBinaryKind) -> Vec<f32> {
use scirs2_core::ndarray::{Array1, ArrayView1};
let a_arr = ArrayView1::from(a);
let b_arr = ArrayView1::from(b);
let result: Array1<f32> = match op {
SimdBinaryKind::Add => scirs2_core::simd::simd_add_f32(&a_arr, &b_arr),
SimdBinaryKind::Sub => scirs2_core::simd::simd_sub_f32(&a_arr, &b_arr),
SimdBinaryKind::Mul => scirs2_core::simd::simd_mul_f32(&a_arr, &b_arr),
SimdBinaryKind::Div => scirs2_core::simd::simd_div_f32(&a_arr, &b_arr),
};
result.to_vec()
}
#[cfg(feature = "simd")]
fn dispatch_binary_f64(a: &[f64], b: &[f64], op: SimdBinaryKind) -> Vec<f64> {
use scirs2_core::ndarray::{Array1, ArrayView1};
let a_arr = ArrayView1::from(a);
let b_arr = ArrayView1::from(b);
let result: Array1<f64> = match op {
SimdBinaryKind::Add => scirs2_core::simd::simd_add_f64(&a_arr, &b_arr),
SimdBinaryKind::Sub => scirs2_core::simd::simd_sub_f64(&a_arr, &b_arr),
SimdBinaryKind::Mul => scirs2_core::simd::simd_mul_f64(&a_arr, &b_arr),
SimdBinaryKind::Div => scirs2_core::simd::simd_div_f64(&a_arr, &b_arr),
};
result.to_vec()
}
#[cfg(feature = "simd")]
#[derive(Debug, Clone, Copy)]
enum SimdBinaryKind {
Add,
Sub,
Mul,
Div,
}
pub struct SimdElementwiseAdd;
impl<F: Float> Op<F> for SimdElementwiseAdd {
fn name(&self) -> &'static str {
"SimdElementwiseAdd"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let a = ctx.input(0);
let b = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(a_slice), Some(b_slice)) = (a.as_slice(), b.as_slice()) {
if a_slice.len() == b_slice.len() {
if let Some(result) =
try_simd_binary_op::<F>(a_slice, b_slice, SimdBinaryKind::Add)
{
let shape = a.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdElementwiseAdd shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &a.to_owned() + &b.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
ctx.append_input_grad(0, Some(*gy));
ctx.append_input_grad(1, Some(*gy));
}
}
pub struct SimdElementwiseSub;
impl<F: Float> Op<F> for SimdElementwiseSub {
fn name(&self) -> &'static str {
"SimdElementwiseSub"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let a = ctx.input(0);
let b = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(a_slice), Some(b_slice)) = (a.as_slice(), b.as_slice()) {
if a_slice.len() == b_slice.len() {
if let Some(result) =
try_simd_binary_op::<F>(a_slice, b_slice, SimdBinaryKind::Sub)
{
let shape = a.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdElementwiseSub shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &a.to_owned() - &b.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
ctx.append_input_grad(0, Some(*gy));
ctx.append_input_grad(1, Some(crate::tensor_ops::neg(*gy)));
}
}
pub struct SimdElementwiseMul;
impl<F: Float> Op<F> for SimdElementwiseMul {
fn name(&self) -> &'static str {
"SimdElementwiseMul"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let a = ctx.input(0);
let b = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(a_slice), Some(b_slice)) = (a.as_slice(), b.as_slice()) {
if a_slice.len() == b_slice.len() {
if let Some(result) =
try_simd_binary_op::<F>(a_slice, b_slice, SimdBinaryKind::Mul)
{
let shape = a.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdElementwiseMul shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &a.to_owned() * &b.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let a = ctx.input(0);
let b = ctx.input(1);
ctx.append_input_grad(0, Some(*gy * b));
ctx.append_input_grad(1, Some(*gy * a));
}
}
pub struct SimdElementwiseDiv;
impl<F: Float> Op<F> for SimdElementwiseDiv {
fn name(&self) -> &'static str {
"SimdElementwiseDiv"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let a = ctx.input(0);
let b = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(a_slice), Some(b_slice)) = (a.as_slice(), b.as_slice()) {
if a_slice.len() == b_slice.len() {
if let Some(result) =
try_simd_binary_op::<F>(a_slice, b_slice, SimdBinaryKind::Div)
{
let shape = a.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdElementwiseDiv shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &a.to_owned() / &b.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let a = ctx.input(0);
let b = ctx.input(1);
let g = ctx.graph();
ctx.append_input_grad(0, Some(*gy / b));
let neg_one = crate::tensor_ops::scalar(-F::one(), g);
let b_sq = b * b;
ctx.append_input_grad(1, Some(neg_one * *gy * a / b_sq));
}
}
pub struct SimdGradientAccumulate;
impl<F: Float> Op<F> for SimdGradientAccumulate {
fn name(&self) -> &'static str {
"SimdGradientAccumulate"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let acc = ctx.input(0);
let grad = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(acc_slice), Some(grad_slice)) = (acc.as_slice(), grad.as_slice()) {
if acc_slice.len() == grad_slice.len() {
if let Some(result) =
try_simd_binary_op::<F>(acc_slice, grad_slice, SimdBinaryKind::Add)
{
let shape = acc.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| {
OpError::NdArrayError("SimdGradientAccumulate shape".into(), e)
})?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &acc.to_owned() + &grad.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
ctx.append_input_grad(0, Some(*gy));
ctx.append_input_grad(1, Some(*gy));
}
}
pub struct SimdScaledGradientAccumulate<F: Float> {
pub scale: F,
}
impl<F: Float> Op<F> for SimdScaledGradientAccumulate<F> {
fn name(&self) -> &'static str {
"SimdScaledGradientAccumulate"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let acc = ctx.input(0);
let grad = ctx.input(1);
#[cfg(feature = "simd")]
{
if let (Some(acc_slice), Some(grad_slice)) = (acc.as_slice(), grad.as_slice()) {
if acc_slice.len() == grad_slice.len() {
if let Some(result) = try_simd_fma::<F>(grad_slice, self.scale, acc_slice) {
let shape = acc.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| {
OpError::NdArrayError("SimdScaledGradAccum shape".into(), e)
})?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let scaled = grad.mapv(|v| v * self.scale);
let result = &acc.to_owned() + &scaled;
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let g = ctx.graph();
ctx.append_input_grad(0, Some(*gy));
let scale_tensor = crate::tensor_ops::scalar(self.scale, g);
ctx.append_input_grad(1, Some(*gy * scale_tensor));
}
}
pub struct SimdBroadcastAdd;
impl<F: Float> Op<F> for SimdBroadcastAdd {
fn name(&self) -> &'static str {
"SimdBroadcastAdd"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
let bias = ctx.input(1);
let x_shape = x.shape().to_vec();
let bias_shape = bias.shape().to_vec();
if x_shape.len() == 2 && bias_shape.len() == 1 && x_shape[1] == bias_shape[0] {
let rows = x_shape[0];
let cols = x_shape[1];
#[cfg(feature = "simd")]
{
if let (Some(x_slice), Some(bias_slice)) = (x.as_slice(), bias.as_slice()) {
if let Some(mut result_vec) =
try_simd_broadcast_add_2d::<F>(x_slice, bias_slice, rows, cols)
{
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&x_shape),
result_vec,
)
.map_err(|e| OpError::NdArrayError("SimdBroadcastAdd shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &x.to_owned() + &bias.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
ctx.append_input_grad(0, Some(*gy));
let dbias = crate::tensor_ops::reduce_sum(*gy, &[0], false);
ctx.append_input_grad(1, Some(dbias));
}
}
pub struct SimdBroadcastMul;
impl<F: Float> Op<F> for SimdBroadcastMul {
fn name(&self) -> &'static str {
"SimdBroadcastMul"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
let scale = ctx.input(1);
let x_shape = x.shape().to_vec();
let scale_shape = scale.shape().to_vec();
if x_shape.len() == 2 && scale_shape.len() == 1 && x_shape[1] == scale_shape[0] {
let rows = x_shape[0];
let cols = x_shape[1];
#[cfg(feature = "simd")]
{
if let (Some(x_slice), Some(scale_slice)) = (x.as_slice(), scale.as_slice()) {
if let Some(result_vec) =
try_simd_broadcast_mul_2d::<F>(x_slice, scale_slice, rows, cols)
{
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&x_shape),
result_vec,
)
.map_err(|e| OpError::NdArrayError("SimdBroadcastMul shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
}
let result = &x.to_owned() * &scale.to_owned();
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let scale = ctx.input(1);
ctx.append_input_grad(0, Some(*gy * scale));
let dscale = crate::tensor_ops::reduce_sum(*gy * x, &[0], false);
ctx.append_input_grad(1, Some(dscale));
}
}
pub struct SimdReLU;
impl<F: Float> Op<F> for SimdReLU {
fn name(&self) -> &'static str {
"SimdReLU"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
#[cfg(feature = "simd")]
{
if let Some(x_slice) = x.as_slice() {
if let Some(result) = try_simd_relu::<F>(x_slice) {
let shape = x.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdReLU shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
let result = x.mapv(|v| if v > F::zero() { v } else { F::zero() });
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let zero = crate::tensor_ops::scalar(F::zero(), g);
let mask = crate::tensor_ops::greater(x, zero);
ctx.append_input_grad(0, Some(*gy * mask));
}
}
pub struct SimdSigmoid;
impl<F: Float> Op<F> for SimdSigmoid {
fn name(&self) -> &'static str {
"SimdSigmoid"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
#[cfg(feature = "simd")]
{
if let Some(x_slice) = x.as_slice() {
if let Some(result) = try_simd_sigmoid::<F>(x_slice) {
let shape = x.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdSigmoid shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
let half = F::from(0.5).ok_or_else(|| OpError::ConversionError {
context: "SimdSigmoid half constant".into(),
from_type: "f64".into(),
to_type: std::any::type_name::<F>().into(),
})?;
let result = x.mapv(move |v| ((v * half).tanh() * half) + half);
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let y = ctx.output();
let g = ctx.graph();
let one = crate::tensor_ops::scalar(F::one(), g);
let one_minus_y = one - y;
ctx.append_input_grad(0, Some(*gy * y * one_minus_y));
}
}
pub struct SimdTanh;
impl<F: Float> Op<F> for SimdTanh {
fn name(&self) -> &'static str {
"SimdTanh"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
#[cfg(feature = "simd")]
{
if let Some(x_slice) = x.as_slice() {
if let Some(result) = try_simd_tanh::<F>(x_slice) {
let shape = x.shape().to_vec();
let arr = scirs2_core::ndarray::Array::from_shape_vec(
scirs2_core::ndarray::IxDyn(&shape),
result,
)
.map_err(|e| OpError::NdArrayError("SimdTanh shape".into(), e))?;
ctx.append_output(arr);
return Ok(());
}
}
}
let result = x.mapv(|v| v.tanh());
ctx.append_output(result);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let y = ctx.output();
let g = ctx.graph();
let one = crate::tensor_ops::scalar(F::one(), g);
let y_sq = y * y;
ctx.append_input_grad(0, Some(*gy * (one - y_sq)));
}
}
pub struct SimdDotProduct;
impl<F: Float> Op<F> for SimdDotProduct {
fn name(&self) -> &'static str {
"SimdDotProduct"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let a = ctx.input(0);
let b = ctx.input(1);
if a.ndim() != 1 || b.ndim() != 1 {
return Err(OpError::IncompatibleShape(
"SimdDotProduct requires 1-D inputs".into(),
));
}
if a.len() != b.len() {
return Err(OpError::IncompatibleShape(format!(
"SimdDotProduct: length mismatch: {} vs {}",
a.len(),
b.len()
)));
}
#[cfg(feature = "simd")]
{
if let (Some(a_slice), Some(b_slice)) = (a.as_slice(), b.as_slice()) {
if let Some(dot_val) = try_simd_dot::<F>(a_slice, b_slice) {
let arr = scirs2_core::ndarray::arr0(dot_val).into_dyn();
ctx.append_output(arr);
return Ok(());
}
}
}
let mut sum = F::zero();
for (&ai, &bi) in a.iter().zip(b.iter()) {
sum += ai * bi;
}
let arr = scirs2_core::ndarray::arr0(sum).into_dyn();
ctx.append_output(arr);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let a = ctx.input(0);
let b = ctx.input(1);
ctx.append_input_grad(0, Some(*gy * b));
ctx.append_input_grad(1, Some(*gy * a));
}
}
pub struct SimdReductionSum;
impl<F: Float> Op<F> for SimdReductionSum {
fn name(&self) -> &'static str {
"SimdReductionSum"
}
fn compute(&self, ctx: &mut ComputeContext<F>) -> Result<(), OpError> {
let x = ctx.input(0);
#[cfg(feature = "simd")]
{
if let Some(x_slice) = x.as_slice() {
if let Some(sum_val) = try_simd_sum::<F>(x_slice) {
let arr = scirs2_core::ndarray::arr0(sum_val).into_dyn();
ctx.append_output(arr);
return Ok(());
}
}
}
let sum = x.iter().fold(F::zero(), |acc, &v| acc + v);
let arr = scirs2_core::ndarray::arr0(sum).into_dyn();
ctx.append_output(arr);
Ok(())
}
fn grad<'a>(&self, ctx: &mut GradientContext<'a, 'a, F>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let ones_shape = crate::tensor_ops::shape(x);
let ones_val = crate::tensor_ops::ones(&ones_shape, g);
ctx.append_input_grad(0, Some(ones_val * *gy));
}
}
#[cfg(feature = "simd")]
fn try_simd_binary_op<F: Float>(a: &[F], b: &[F], kind: SimdBinaryKind) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let a_f32: &[f32] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f32, a.len()) };
let b_f32: &[f32] =
unsafe { std::slice::from_raw_parts(b.as_ptr() as *const f32, b.len()) };
let result = dispatch_binary_f32(a_f32, b_f32, kind);
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let a_f64: &[f64] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f64, a.len()) };
let b_f64: &[f64] =
unsafe { std::slice::from_raw_parts(b.as_ptr() as *const f64, b.len()) };
let result = dispatch_binary_f64(a_f64, b_f64, kind);
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_fma<F: Float>(a: &[F], scale: F, c: &[F]) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let a_f32: &[f32] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f32, a.len()) };
let c_f32: &[f32] =
unsafe { std::slice::from_raw_parts(c.as_ptr() as *const f32, c.len()) };
let scale_f32: f32 = unsafe { *(&scale as *const F as *const f32) };
let scale_arr = scirs2_core::ndarray::Array1::from_elem(a.len(), scale_f32);
let a_view = scirs2_core::ndarray::ArrayView1::from(a_f32);
let scale_view = scale_arr.view();
let c_view = scirs2_core::ndarray::ArrayView1::from(c_f32);
let result = scirs2_core::simd::simd_fma_f32_ultra(&a_view, &scale_view, &c_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let a_f64: &[f64] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f64, a.len()) };
let c_f64: &[f64] =
unsafe { std::slice::from_raw_parts(c.as_ptr() as *const f64, c.len()) };
let scale_f64: f64 = unsafe { *(&scale as *const F as *const f64) };
let a_view = scirs2_core::ndarray::ArrayView1::from(a_f64);
let scale_arr = scirs2_core::simd::simd_scalar_mul_f64(&a_view, scale_f64);
let scale_view = scale_arr.view();
let c_view = scirs2_core::ndarray::ArrayView1::from(c_f64);
let result = scirs2_core::simd::simd_add_f64(&scale_view, &c_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_broadcast_add_2d<F: Float>(
x: &[F],
bias: &[F],
rows: usize,
cols: usize,
) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let bias_f32: &[f32] =
unsafe { std::slice::from_raw_parts(bias.as_ptr() as *const f32, bias.len()) };
let bias_view = scirs2_core::ndarray::ArrayView1::from(bias_f32);
let mut result: Vec<f32> = Vec::with_capacity(rows * cols);
for row in 0..rows {
let row_start = row * cols;
let row_end = row_start + cols;
let row_slice = &x_f32[row_start..row_end];
let row_view = scirs2_core::ndarray::ArrayView1::from(row_slice);
let row_result = scirs2_core::simd::simd_add_f32(&row_view, &bias_view);
result.extend(row_result.iter().copied());
}
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let bias_f64: &[f64] =
unsafe { std::slice::from_raw_parts(bias.as_ptr() as *const f64, bias.len()) };
let bias_view = scirs2_core::ndarray::ArrayView1::from(bias_f64);
let mut result: Vec<f64> = Vec::with_capacity(rows * cols);
for row in 0..rows {
let row_start = row * cols;
let row_end = row_start + cols;
let row_slice = &x_f64[row_start..row_end];
let row_view = scirs2_core::ndarray::ArrayView1::from(row_slice);
let row_result = scirs2_core::simd::simd_add_f64(&row_view, &bias_view);
result.extend(row_result.iter().copied());
}
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_broadcast_mul_2d<F: Float>(
x: &[F],
scale: &[F],
rows: usize,
cols: usize,
) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let scale_f32: &[f32] =
unsafe { std::slice::from_raw_parts(scale.as_ptr() as *const f32, scale.len()) };
let scale_view = scirs2_core::ndarray::ArrayView1::from(scale_f32);
let mut result: Vec<f32> = Vec::with_capacity(rows * cols);
for row in 0..rows {
let row_start = row * cols;
let row_end = row_start + cols;
let row_slice = &x_f32[row_start..row_end];
let row_view = scirs2_core::ndarray::ArrayView1::from(row_slice);
let row_result = scirs2_core::simd::simd_mul_f32(&row_view, &scale_view);
result.extend(row_result.iter().copied());
}
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let scale_f64: &[f64] =
unsafe { std::slice::from_raw_parts(scale.as_ptr() as *const f64, scale.len()) };
let scale_view = scirs2_core::ndarray::ArrayView1::from(scale_f64);
let mut result: Vec<f64> = Vec::with_capacity(rows * cols);
for row in 0..rows {
let row_start = row * cols;
let row_end = row_start + cols;
let row_slice = &x_f64[row_start..row_end];
let row_view = scirs2_core::ndarray::ArrayView1::from(row_slice);
let row_result = scirs2_core::simd::simd_mul_f64(&row_view, &scale_view);
result.extend(row_result.iter().copied());
}
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_relu<F: Float>(x: &[F]) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f32);
let result = scirs2_core::simd::simd_relu_f32(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f64);
let result = scirs2_core::simd::simd_relu_f64(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_sigmoid<F: Float>(x: &[F]) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f32);
let result = scirs2_core::simd::simd_sigmoid_f32(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f64);
let result = scirs2_core::simd::simd_sigmoid_f64(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_tanh<F: Float>(x: &[F]) -> Option<Vec<F>> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f32);
let result = scirs2_core::simd::simd_tanh_f32(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f64);
let result = scirs2_core::simd::simd_tanh_f64(&x_view);
let result_vec = result.to_vec();
let result_f: Vec<F> = unsafe {
let mut v = std::mem::ManuallyDrop::new(result_vec);
Vec::from_raw_parts(v.as_mut_ptr() as *mut F, v.len(), v.capacity())
};
Some(result_f)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_dot<F: Float>(a: &[F], b: &[F]) -> Option<F> {
use crate::same_type;
if same_type::<F, f32>() {
let a_f32: &[f32] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f32, a.len()) };
let b_f32: &[f32] =
unsafe { std::slice::from_raw_parts(b.as_ptr() as *const f32, b.len()) };
let a_view = scirs2_core::ndarray::ArrayView1::from(a_f32);
let b_view = scirs2_core::ndarray::ArrayView1::from(b_f32);
let result_f32 = scirs2_core::simd::simd_dot_f32(&a_view, &b_view);
let result: F = unsafe { *(&result_f32 as *const f32 as *const F) };
Some(result)
} else if same_type::<F, f64>() {
let a_f64: &[f64] =
unsafe { std::slice::from_raw_parts(a.as_ptr() as *const f64, a.len()) };
let b_f64: &[f64] =
unsafe { std::slice::from_raw_parts(b.as_ptr() as *const f64, b.len()) };
let a_view = scirs2_core::ndarray::ArrayView1::from(a_f64);
let b_view = scirs2_core::ndarray::ArrayView1::from(b_f64);
let result_f64 = scirs2_core::simd::simd_dot_f64(&a_view, &b_view);
let result: F = unsafe { *(&result_f64 as *const f64 as *const F) };
Some(result)
} else {
None
}
}
#[cfg(feature = "simd")]
fn try_simd_sum<F: Float>(x: &[F]) -> Option<F> {
use crate::same_type;
if same_type::<F, f32>() {
let x_f32: &[f32] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f32, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f32);
let result_f32 = scirs2_core::simd::simd_sum_f32(&x_view);
let result: F = unsafe { *(&result_f32 as *const f32 as *const F) };
Some(result)
} else if same_type::<F, f64>() {
let x_f64: &[f64] =
unsafe { std::slice::from_raw_parts(x.as_ptr() as *const f64, x.len()) };
let x_view = scirs2_core::ndarray::ArrayView1::from(x_f64);
let result_f64 = scirs2_core::simd::simd_sum_f64(&x_view);
let result: F = unsafe { *(&result_f64 as *const f64 as *const F) };
Some(result)
} else {
None
}
}
pub fn simd_elementwise_add<'g, F: Float>(a: &Tensor<'g, F>, b: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = a.graph();
Tensor::builder(g)
.append_input(a, false)
.append_input(b, false)
.build(SimdElementwiseAdd)
}
pub fn simd_elementwise_sub<'g, F: Float>(a: &Tensor<'g, F>, b: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = a.graph();
Tensor::builder(g)
.append_input(a, false)
.append_input(b, false)
.build(SimdElementwiseSub)
}
pub fn simd_elementwise_mul<'g, F: Float>(a: &Tensor<'g, F>, b: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = a.graph();
Tensor::builder(g)
.append_input(a, false)
.append_input(b, false)
.build(SimdElementwiseMul)
}
pub fn simd_elementwise_div<'g, F: Float>(a: &Tensor<'g, F>, b: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = a.graph();
Tensor::builder(g)
.append_input(a, false)
.append_input(b, false)
.build(SimdElementwiseDiv)
}
pub fn simd_gradient_accumulate<'g, F: Float>(
accumulator: &Tensor<'g, F>,
gradient: &Tensor<'g, F>,
) -> Tensor<'g, F> {
let g = accumulator.graph();
Tensor::builder(g)
.append_input(accumulator, false)
.append_input(gradient, false)
.build(SimdGradientAccumulate)
}
pub fn simd_scaled_gradient_accumulate<'g, F: Float>(
accumulator: &Tensor<'g, F>,
gradient: &Tensor<'g, F>,
scale: F,
) -> Tensor<'g, F> {
let g = accumulator.graph();
Tensor::builder(g)
.append_input(accumulator, false)
.append_input(gradient, false)
.build(SimdScaledGradientAccumulate { scale })
}
pub fn simd_broadcast_add<'g, F: Float>(x: &Tensor<'g, F>, bias: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g)
.append_input(x, false)
.append_input(bias, false)
.build(SimdBroadcastAdd)
}
pub fn simd_broadcast_mul<'g, F: Float>(x: &Tensor<'g, F>, scale: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g)
.append_input(x, false)
.append_input(scale, false)
.build(SimdBroadcastMul)
}
pub fn simd_activation_relu<'g, F: Float>(x: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g).append_input(x, false).build(SimdReLU)
}
pub fn simd_activation_sigmoid<'g, F: Float>(x: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g).append_input(x, false).build(SimdSigmoid)
}
pub fn simd_activation_tanh<'g, F: Float>(x: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g).append_input(x, false).build(SimdTanh)
}
pub fn simd_dot_product<'g, F: Float>(a: &Tensor<'g, F>, b: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = a.graph();
Tensor::builder(g)
.append_input(a, false)
.append_input(b, false)
.build(SimdDotProduct)
}
pub fn simd_reduction_sum<'g, F: Float>(x: &Tensor<'g, F>) -> Tensor<'g, F> {
let g = x.graph();
Tensor::builder(g)
.append_input(x, false)
.build(SimdReductionSum)
}
#[derive(Debug, Clone)]
pub struct SimdConfig {
pub min_simd_length: usize,
pub prefer_fma: bool,
pub adaptive_dispatch: bool,
}
impl Default for SimdConfig {
fn default() -> Self {
Self {
min_simd_length: 16,
prefer_fma: true,
adaptive_dispatch: true,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate as ag;
use scirs2_core::ndarray::{array, Array1, ArrayView1};
fn assert_approx_eq_f32(actual: &[f32], expected: &[f32], epsilon: f32) {
assert_eq!(actual.len(), expected.len(), "Length mismatch");
for (i, (&a, &e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < epsilon,
"Mismatch at index {}: actual={}, expected={}, diff={}",
i,
a,
e,
(a - e).abs()
);
}
}
fn assert_approx_eq_f64(actual: &[f64], expected: &[f64], epsilon: f64) {
assert_eq!(actual.len(), expected.len(), "Length mismatch");
for (i, (&a, &e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < epsilon,
"Mismatch at index {}: actual={}, expected={}, diff={}",
i,
a,
e,
(a - e).abs()
);
}
}
#[test]
fn test_simd_elementwise_add_f32() {
ag::run::<f32, _, _>(|ctx| {
let a_arr = array![
1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0
];
let b_arr = array![
0.1f32, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6
];
let expected: Vec<f32> = a_arr
.iter()
.zip(b_arr.iter())
.map(|(&a, &b)| a + b)
.collect();
let a = ag::tensor_ops::convert_to_tensor(a_arr.clone(), ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr.clone(), ctx);
let y = simd_elementwise_add(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-6);
}
}
});
}
#[test]
fn test_simd_elementwise_sub_f64() {
ag::run::<f64, _, _>(|ctx| {
let a_arr = array![10.0f64, 20.0, 30.0, 40.0];
let b_arr = array![1.0f64, 2.0, 3.0, 4.0];
let expected: Vec<f64> = a_arr
.iter()
.zip(b_arr.iter())
.map(|(&a, &b)| a - b)
.collect();
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_elementwise_sub(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f64(result_slice, &expected, 1e-12);
}
}
});
}
#[test]
fn test_simd_elementwise_mul_f32() {
ag::run::<f32, _, _>(|ctx| {
let a_arr = array![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let b_arr = array![2.0f32, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let expected: Vec<f32> = a_arr
.iter()
.zip(b_arr.iter())
.map(|(&a, &b)| a * b)
.collect();
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_elementwise_mul(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-6);
}
}
});
}
#[test]
fn test_simd_elementwise_div_f64() {
ag::run::<f64, _, _>(|ctx| {
let a_arr = array![10.0f64, 20.0, 30.0, 40.0];
let b_arr = array![2.0f64, 4.0, 5.0, 8.0];
let expected: Vec<f64> = a_arr
.iter()
.zip(b_arr.iter())
.map(|(&a, &b)| a / b)
.collect();
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_elementwise_div(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f64(result_slice, &expected, 1e-12);
}
}
});
}
#[test]
fn test_simd_gradient_accumulate_f32() {
ag::run::<f32, _, _>(|ctx| {
let acc_arr = array![1.0f32, 2.0, 3.0, 4.0];
let grad_arr = array![0.1f32, 0.2, 0.3, 0.4];
let expected = vec![1.1, 2.2, 3.3, 4.4];
let acc = ag::tensor_ops::convert_to_tensor(acc_arr, ctx);
let grad = ag::tensor_ops::convert_to_tensor(grad_arr, ctx);
let y = simd_gradient_accumulate(&acc, &grad);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-6);
}
}
});
}
#[test]
fn test_simd_scaled_gradient_accumulate_f32() {
ag::run::<f32, _, _>(|ctx| {
let acc_arr = array![1.0f32, 2.0, 3.0, 4.0];
let grad_arr = array![10.0f32, 20.0, 30.0, 40.0];
let scale = 0.1f32;
let expected = vec![2.0, 4.0, 6.0, 8.0];
let acc = ag::tensor_ops::convert_to_tensor(acc_arr, ctx);
let grad = ag::tensor_ops::convert_to_tensor(grad_arr, ctx);
let y = simd_scaled_gradient_accumulate(&acc, &grad, scale);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-5);
}
}
});
}
#[test]
fn test_simd_relu_f32() {
ag::run::<f32, _, _>(|ctx| {
let x_arr = array![-3.0f32, -1.0, 0.0, 1.0, 3.0, -0.5, 2.0, -2.0];
let expected = vec![0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0];
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_activation_relu(&x);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-6);
}
}
});
}
#[test]
fn test_simd_sigmoid_f32() {
ag::run::<f32, _, _>(|ctx| {
let x_arr = array![0.0f32, 1.0, -1.0, 5.0, -5.0, 0.5, -0.5, 2.0];
let expected: Vec<f32> = x_arr.iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect();
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_activation_sigmoid(&x);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-4);
}
}
});
}
#[test]
fn test_simd_tanh_f64() {
ag::run::<f64, _, _>(|ctx| {
let x_arr = array![0.0f64, 1.0, -1.0, 2.0, -2.0, 0.5];
let expected: Vec<f64> = x_arr.iter().map(|&v| v.tanh()).collect();
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_activation_tanh(&x);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f64(result_slice, &expected, 1e-10);
}
}
});
}
#[test]
fn test_simd_dot_product_f32() {
ag::run::<f32, _, _>(|ctx| {
let a_arr = array![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let b_arr = array![2.0f32, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let expected: f32 = a_arr.iter().zip(b_arr.iter()).map(|(&a, &b)| a * b).sum();
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_dot_product(&a, &b);
if let Ok(result) = y.eval(ctx) {
let val = result.iter().next().copied().unwrap_or(0.0);
assert!(
(val - expected).abs() < 1e-3,
"dot product: got {}, expected {}",
val,
expected
);
}
});
}
#[test]
fn test_simd_reduction_sum_f64() {
ag::run::<f64, _, _>(|ctx| {
let x_arr = array![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let expected: f64 = x_arr.iter().sum();
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_reduction_sum(&x);
if let Ok(result) = y.eval(ctx) {
let val = result.iter().next().copied().unwrap_or(0.0);
assert!(
(val - expected).abs() < 1e-10,
"sum: got {}, expected {}",
val,
expected
);
}
});
}
#[test]
fn test_simd_empty_array() {
ag::run::<f32, _, _>(|ctx| {
let empty = scirs2_core::ndarray::Array1::<f32>::zeros(0);
let a = ag::tensor_ops::convert_to_tensor(empty.clone(), ctx);
let b = ag::tensor_ops::convert_to_tensor(empty, ctx);
let y = simd_elementwise_add(&a, &b);
if let Ok(result) = y.eval(ctx) {
assert_eq!(result.len(), 0);
}
});
}
#[test]
fn test_simd_single_element() {
ag::run::<f64, _, _>(|ctx| {
let a_arr = array![42.0f64];
let b_arr = array![8.0f64];
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_elementwise_mul(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(slice) = result.as_slice() {
assert_approx_eq_f64(slice, &[336.0], 1e-12);
}
}
});
}
#[test]
fn test_simd_relu_all_negative() {
ag::run::<f32, _, _>(|ctx| {
let x_arr = array![-1.0f32, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0];
let expected = vec![0.0f32; 8];
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_activation_relu(&x);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-6);
}
}
});
}
#[test]
fn test_simd_sigmoid_extreme_values() {
ag::run::<f32, _, _>(|ctx| {
let x_arr = array![-100.0f32, 100.0, 0.0, -50.0, 50.0, -10.0, 10.0, 0.0];
let x = ag::tensor_ops::convert_to_tensor(x_arr, ctx);
let y = simd_activation_sigmoid(&x);
if let Ok(result) = y.eval(ctx) {
if let Some(slice) = result.as_slice() {
assert!(
slice[0] < 1e-6,
"sigmoid(-100) should be near 0, got {}",
slice[0]
);
assert!(
(slice[1] - 1.0).abs() < 1e-6,
"sigmoid(100) should be near 1, got {}",
slice[1]
);
assert!(
(slice[2] - 0.5).abs() < 1e-4,
"sigmoid(0) should be 0.5, got {}",
slice[2]
);
}
}
});
}
#[test]
fn test_simd_large_array_add() {
ag::run::<f32, _, _>(|ctx| {
let n = 1024;
let a_vec: Vec<f32> = (0..n).map(|i| i as f32 * 0.1).collect();
let b_vec: Vec<f32> = (0..n).map(|i| (n - i) as f32 * 0.01).collect();
let expected: Vec<f32> = a_vec
.iter()
.zip(b_vec.iter())
.map(|(&a, &b)| a + b)
.collect();
let a_arr = Array1::from_vec(a_vec);
let b_arr = Array1::from_vec(b_vec);
let a = ag::tensor_ops::convert_to_tensor(a_arr, ctx);
let b = ag::tensor_ops::convert_to_tensor(b_arr, ctx);
let y = simd_elementwise_add(&a, &b);
if let Ok(result) = y.eval(ctx) {
if let Some(result_slice) = result.as_slice() {
assert_approx_eq_f32(result_slice, &expected, 1e-4);
}
}
});
}
#[test]
fn test_simd_add_gradient() {
ag::run::<f64, _, _>(|ctx| {
let x = ctx.placeholder("x", &[4]);
let y = ctx.placeholder("y", &[4]);
let z = simd_elementwise_add(&x, &y);
let sum_z = ag::tensor_ops::sum_all(z);
let grads = ag::tensor_ops::grad(&[sum_z], &[x, y]);
let x_val = array![1.0f64, 2.0, 3.0, 4.0];
let y_val = array![5.0f64, 6.0, 7.0, 8.0];
let results = ctx
.evaluator()
.push(&grads[0])
.push(&grads[1])
.feed(x, x_val.view().into_dyn())
.feed(y, y_val.view().into_dyn())
.run();
if let Some(Ok(dx)) = results.first() {
if let Some(dx_slice) = dx.as_slice() {
assert_approx_eq_f64(dx_slice, &[1.0, 1.0, 1.0, 1.0], 1e-10);
}
}
if let Some(Ok(dy)) = results.get(1) {
if let Some(dy_slice) = dy.as_slice() {
assert_approx_eq_f64(dy_slice, &[1.0, 1.0, 1.0, 1.0], 1e-10);
}
}
});
}
#[test]
fn test_simd_mul_gradient() {
ag::run::<f64, _, _>(|ctx| {
let x = ctx.placeholder("x", &[4]);
let y = ctx.placeholder("y", &[4]);
let z = simd_elementwise_mul(&x, &y);
let sum_z = ag::tensor_ops::sum_all(z);
let grads = ag::tensor_ops::grad(&[sum_z], &[x, y]);
let x_val = array![1.0f64, 2.0, 3.0, 4.0];
let y_val = array![5.0f64, 6.0, 7.0, 8.0];
let results = ctx
.evaluator()
.push(&grads[0])
.push(&grads[1])
.feed(x, x_val.view().into_dyn())
.feed(y, y_val.view().into_dyn())
.run();
if let Some(Ok(dx)) = results.first() {
if let Some(dx_slice) = dx.as_slice() {
assert_approx_eq_f64(dx_slice, &[5.0, 6.0, 7.0, 8.0], 1e-10);
}
}
if let Some(Ok(dy)) = results.get(1) {
if let Some(dy_slice) = dy.as_slice() {
assert_approx_eq_f64(dy_slice, &[1.0, 2.0, 3.0, 4.0], 1e-10);
}
}
});
}
}