use crate::ndarray_ext::{NdArray, NdArrayView};
use crate::op;
use crate::tensor::Tensor;
use crate::ndarray;
use crate::tensor_ops::*;
use crate::Float;
pub struct Elu<T> {
pub alpha: T,
}
pub struct EluGrad<T> {
pub alpha: T,
}
pub struct Identity;
pub struct ReLU;
pub struct Sigmoid;
pub struct Softplus;
pub struct Softmax {
pub axis: isize,
}
#[cfg(feature = "blas")]
#[allow(dead_code)]
fn fast_sigmoid_impl<F: Float>(x: &NdArrayView<F>) -> NdArray<F> {
use crate::same_type;
let half = F::from(0.5).expect("Failed to convert constant to float");
let y = x.mapv(move |x_val| {
let tanh_result = (x_val * half).tanh();
half * (tanh_result + F::one())
});
y
}
#[inline]
#[allow(dead_code)]
pub fn softmax_impl<T: Float>(x: &NdArrayView<T>, axis: isize) -> NdArray<T> {
let axis = if axis < 0 {
(x.ndim() as isize + axis) as usize
} else {
axis as usize
};
let mut a = x.shape().to_vec();
a[axis] = 1;
let reducedshape = a.as_slice();
let max_fn = T::max;
let max = &x
.fold_axis(
scirs2_core::ndarray::Axis(axis),
T::min_value(),
move |&a, &b| max_fn(a, b),
)
.into_shape_with_order(scirs2_core::ndarray::IxDyn(reducedshape))
.expect("Failed to create array");
let mut tmp = x - max;
tmp.mapv_inplace(move |a| a.exp());
let sum = tmp
.sum_axis(scirs2_core::ndarray::Axis(axis))
.into_shape_with_order(scirs2_core::ndarray::IxDyn(reducedshape))
.expect("Failed to create array");
tmp /= ∑
tmp
}
impl<T: Float> op::Op<T> for Softmax {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let ret = softmax_impl(&ctx.input(0), self.axis);
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let y = ctx.output();
let gy = ctx.output_grad();
let sum = reduce_sum(y * gy, &[self.axis], true);
ctx.append_input_grad(0, Some((gy - sum) * y))
}
}
impl<T: Float> op::Op<T> for Softplus {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let ret = ctx.input(0).map(move |a| (a.exp() + T::one()).ln());
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let a = exp(ctx.input(0));
let b = a + scalar(T::one(), ctx.graph());
let gx = gy * (a / b);
ctx.append_input_grad(0, Some(gx))
}
}
impl<T: Float> op::Op<T> for Sigmoid {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let half = T::from(0.5).expect("Operation failed");
let ret = ctx
.input(0)
.mapv(move |a| ((a * half).tanh() * half) + half);
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let y = ctx.output();
ctx.append_input_grad(0, Some(gy * (y - square(y))));
}
}
impl<T: Float> op::Op<T> for ReLU {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let ret = ctx.input(0).map(|a| a.max(T::zero()));
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let s = ctx.graph();
let gy = ctx.output_grad();
let bin = greater(ctx.input(0), scalar(T::zero(), s));
ctx.append_input_grad(0, Some(mul(bin, gy)))
}
}
impl<T: Float> op::Op<T> for Identity {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let ret = ctx.input(0);
ctx.append_output(ret.to_owned());
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
ctx.append_input_grad(0, Some(gy.to_owned()))
}
}
impl<T: Float> op::Op<T> for Elu<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let ret = ctx.input(0).mapv(move |a| {
if a > T::zero() {
a
} else {
self.alpha * (a.exp() - T::one())
}
});
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = &ctx.output_grad();
let gx = Tensor::builder(ctx.graph())
.append_input(ctx.input(0), false)
.append_input(gy, false)
.setshape(&shape(gy))
.build(EluGrad { alpha: self.alpha });
ctx.append_input_grad(0, Some(gx))
}
}
impl<T: Float> op::Op<T> for EluGrad<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = &ctx.input(0);
let a = x.mapv(move |a| {
if a > T::zero() {
T::one()
} else {
self.alpha * (a.exp() - T::one()) + self.alpha
}
});
let ret = a * &ctx.input(1);
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
ctx.append_input_grad(0, None);
ctx.append_input_grad(1, None);
}
}
pub struct Swish;
impl<T: Float> op::Op<T> for Swish {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = &ctx.input(0);
let half = T::from(0.5).expect("Operation failed");
let sigmoid_x = x.mapv(move |a| ((a * half).tanh() * half) + half);
let ret = x * &sigmoid_x;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let sigmoid_x = sigmoid(x);
let one = scalar(T::one(), ctx.graph());
let grad_factor = sigmoid_x * (one + x * (scalar(T::one(), ctx.graph()) - sigmoid_x));
ctx.append_input_grad(0, Some(gy * grad_factor));
}
}
pub struct Gelu;
impl<T: Float> op::Op<T> for Gelu {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = &ctx.input(0);
let half = T::from(0.5).expect("Operation failed");
let sqrt_2_pi = T::from(0.7978845608028654).expect("Operation failed"); let c = T::from(0.044715).expect("Operation failed");
let one = T::one();
let inner = x.mapv(|val| sqrt_2_pi * (val + c * val * val * val));
let tanh_inner = inner.mapv(|a| a.tanh());
let ret = x.mapv(|val| val * half) * &tanh_inner.mapv(|a| one + a);
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let half = scalar(T::from(0.5).expect("Operation failed"), ctx.graph());
let sqrt_2_pi = scalar(
T::from(0.7978845608028654).expect("Operation failed"),
ctx.graph(),
);
let c = scalar(T::from(0.044715).expect("Operation failed"), ctx.graph());
let one = scalar(T::one(), ctx.graph());
let x_squared = square(x);
let x_cubed = x * x_squared;
let inner = sqrt_2_pi * (x + c * x_cubed);
let tanh_inner = tanh(inner);
let sech_squared = one - square(tanh_inner);
let grad = half
* (one
+ tanh_inner
+ x * sqrt_2_pi
* sech_squared
* (one
+ scalar(T::from(3.0).expect("Operation failed"), ctx.graph())
* c
* x_squared));
ctx.append_input_grad(0, Some(gy * grad));
}
}
pub struct Mish;
impl<T: Float> op::Op<T> for Mish {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = &ctx.input(0);
let softplus_x = x.mapv(move |a| {
if a > T::from(20.0).expect("Operation failed") {
a
} else {
(a.exp() + T::one()).ln()
}
});
let tanh_softplus = softplus_x.mapv(|a| a.tanh());
let ret = x * &tanh_softplus;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let exp_x = exp(x);
let one = scalar(T::one(), ctx.graph());
let softplus_x = ln(one + exp_x);
let tanh_softplus = tanh(softplus_x);
let sigmoid_x = exp_x / (one + exp_x);
let sech_squared = one - square(tanh_softplus);
let grad = tanh_softplus + x * sech_squared * sigmoid_x;
ctx.append_input_grad(0, Some(gy * grad));
}
}
pub struct PReLU<T> {
pub alpha: T,
}
impl<T: Float> op::Op<T> for PReLU<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let ret = x.mapv(|val| {
if val > T::zero() {
val
} else {
self.alpha * val
}
});
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let grad_x = Tensor::builder(g)
.append_input(x, false)
.append_input(gy, false)
.setshape(&shape(gy))
.build(PReLUGrad { alpha: self.alpha });
ctx.append_input_grad(0, Some(grad_x));
}
}
pub struct PReLUGrad<T> {
pub alpha: T,
}
impl<T: Float> op::Op<T> for PReLUGrad<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let gy = ctx.input(1);
let ret = x.mapv(|val| {
if val > T::zero() {
T::one()
} else {
self.alpha
}
}) * gy;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
}
}
pub struct LearnableELU<T> {
pub alpha: T,
}
impl<T: Float> op::Op<T> for LearnableELU<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let ret = x.mapv(|val| {
if val > T::zero() {
val
} else {
self.alpha * (val.exp() - T::one())
}
});
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let grad_x = Tensor::builder(g)
.append_input(x, false)
.append_input(gy, false)
.setshape(&shape(gy))
.build(LearnableELUGrad { alpha: self.alpha });
ctx.append_input_grad(0, Some(grad_x));
}
}
pub struct LearnableELUGrad<T> {
pub alpha: T,
}
impl<T: Float> op::Op<T> for LearnableELUGrad<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let gy = ctx.input(1);
let ret = x.mapv(|val| {
if val > T::zero() {
T::one()
} else {
self.alpha * val.exp()
}
}) * gy;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
}
}
pub struct LearnableSwish<T> {
pub beta: T,
}
impl<T: Float> op::Op<T> for LearnableSwish<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let half = T::from(0.5).expect("Operation failed");
let beta_x = x.mapv(|val| self.beta * val);
let sigmoid_beta_x = beta_x.mapv(|val| ((val * half).tanh() * half) + half);
let ret = &x.to_owned() * &sigmoid_beta_x;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let grad_x = Tensor::builder(g)
.append_input(x, false)
.append_input(gy, false)
.setshape(&shape(gy))
.build(LearnableSwishGrad { beta: self.beta });
ctx.append_input_grad(0, Some(grad_x));
}
}
pub struct LearnableSwishGrad<T> {
pub beta: T,
}
impl<T: Float> op::Op<T> for LearnableSwishGrad<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let gy = ctx.input(1);
let half = T::from(0.5).expect("Operation failed");
let beta_x = x.mapv(|val| self.beta * val);
let sigmoid_beta_x = beta_x.mapv(|val| ((val * half).tanh() * half) + half);
let one = T::one();
let derivative = sigmoid_beta_x.mapv(|s_val| s_val)
+ x.mapv(|x_val| x_val)
* sigmoid_beta_x.mapv(|s_val| self.beta * s_val * (one - s_val));
let ret = derivative * gy;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
}
}
pub struct AdaptiveActivation<T> {
pub a: T, pub b: T, pub c: T, pub d: T, pub e: T, }
impl<T: Float> op::Op<T> for AdaptiveActivation<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let half = T::from(0.5).expect("Operation failed");
let linear_part = x.mapv(|val| self.a * val);
let tanh_part = x.mapv(|val| self.b * (self.c * val).tanh());
let sigmoid_part = x.mapv(|val| {
let sigmoid_val = ((self.e * val * half).tanh() * half) + half;
self.d * sigmoid_val
});
let ret = linear_part + &tanh_part + &sigmoid_part;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
let gy = ctx.output_grad();
let x = ctx.input(0);
let g = ctx.graph();
let grad_x = Tensor::builder(g)
.append_input(x, false)
.append_input(gy, false)
.setshape(&shape(gy))
.build(AdaptiveActivationGrad {
a: self.a,
b: self.b,
c: self.c,
d: self.d,
e: self.e,
});
ctx.append_input_grad(0, Some(grad_x));
}
}
pub struct AdaptiveActivationGrad<T> {
pub a: T,
pub b: T,
pub c: T,
pub d: T,
pub e: T,
}
impl<T: Float> op::Op<T> for AdaptiveActivationGrad<T> {
fn compute(&self, ctx: &mut crate::op::ComputeContext<T>) -> Result<(), crate::op::OpError> {
let x = ctx.input(0);
let gy = ctx.input(1);
let half = T::from(0.5).expect("Operation failed");
let one = T::one();
let linear_grad = x.mapv(|_| self.a);
let tanh_grad = x.mapv(|val| {
let tanh_val = (self.c * val).tanh();
self.b * self.c * (one - tanh_val * tanh_val)
});
let sigmoid_grad = x.mapv(|val| {
let sigmoid_val = ((self.e * val * half).tanh() * half) + half;
self.d * self.e * sigmoid_val * (one - sigmoid_val)
});
let total_grad = linear_grad + &tanh_grad + &sigmoid_grad;
let ret = total_grad * gy;
ctx.append_output(ret);
Ok(())
}
fn grad<'a>(&self, ctx: &mut crate::op::GradientContext<'a, 'a, T>) {
}
}