extern crate ndarray;
use ndarray_ext::NdArray;
use op;
use ops;
use std::f32;
use tensor::Tensor;
pub struct SoftmaxCrossEntropy;
pub struct SparseSoftmaxCrossEntropy;
pub struct SparseSoftmaxCrossEntropyGrad;
pub struct SigmoidCrossEntropy;
pub struct LogSoftmax {
pub axis: isize,
}
impl op::Op for LogSoftmax {
fn name(&self) -> &str {
"LogSoftmax"
}
fn compute(&self, ctx: ::runtime::OpComputeContext) -> op::ComputeResult {
let xs = ctx.grab_inputs();
vec![Ok(xs[0] - &ops::math_ops::logsumexp_forward(xs[0], self.axis, true))]
}
fn grad(&self, gy: &Tensor, _: &[&Tensor], output: &Tensor) -> Vec<Option<Tensor>> {
let sm = ops::exp(output);
let sum = ops::reduce_sum(gy, &[1], true);
let ref mul = sm * sum;
vec![Some(gy - mul)]
}
}
impl op::Op for SigmoidCrossEntropy {
fn name(&self) -> &str {
"SigmoidCrossEntropy"
}
fn compute(&self, ctx: ::runtime::OpComputeContext) -> op::ComputeResult {
let xs = ctx.grab_inputs();
let x = xs[0];
let t = xs[1];
assert_eq!(x.shape(), t.shape(), "x.shape must match t.shape");
let e = f32::consts::E;
let max_fn = f32::max;
let mut tmp = x.map(move |a| ((-a.abs()).exp() + 1.).log(e) + max_fn(0., *a));
tmp -= &(t * x);
vec![Ok(tmp)]
}
fn grad(&self, gy: &Tensor, inputs: &[&Tensor], _: &Tensor) -> Vec<Option<Tensor>> {
let x = inputs[0];
let t = inputs[1];
let gx1 = {
let ref exp = ops::exp(x);
((exp / (exp + 1)) - t) * gy
};
let gx2 = ops::neg(&(gy * t));
vec![Some(gx1), Some(gx2)]
}
}
impl op::Op for SparseSoftmaxCrossEntropy {
fn name(&self) -> &str {
"SparseSoftmaxCrossEntropy"
}
fn compute(&self, ctx: ::runtime::OpComputeContext) -> op::ComputeResult {
let xs = ctx.grab_inputs();
let (x, t) = (xs[0], xs[1]);
let log_x: NdArray = x - &ops::math_ops::logsumexp_forward(x, 1, true);
{
assert_eq!(log_x.ndim(), 2, "Bad first argument's shape");
let t_shape = t.shape();
let t_rank = t_shape.len();
if t_rank == 2 {
assert_eq!(t_shape[1], 1, "Bad second argument's shape");
} else if t_rank != 1 {
panic!("Bad second argument's shape");
}
}
let mut t_iter = t.iter();
let ret = log_x
.map_axis(ndarray::Axis(1), move |row| {
-row[*t_iter.next().unwrap() as usize]
})
.into_shape(ndarray::IxDyn(&[log_x.shape()[0], 1]))
.unwrap();
vec![Ok(ret), Ok(log_x)]
}
fn grad(&self, gy: &Tensor, inputs: &[&Tensor], output: &Tensor) -> Vec<Option<Tensor>> {
let t = inputs[1];
let ref log_x = ops::nth_tensor(output, 1);
let gx1 = Tensor::builder()
.set_inputs(vec![log_x, t, gy])
.build(SparseSoftmaxCrossEntropyGrad);
let gx2 = {
let ref x = ops::exp(log_x);
let sum = ops::reduce_sum(&(x * log_x), &[1], true);
x * gy * (sum - log_x)
};
vec![Some(gx1), Some(gx2)]
}
}
impl op::Op for SparseSoftmaxCrossEntropyGrad {
fn name(&self) -> &str {
"SparseSoftmaxCrossEntropyGrad"
}
fn compute(&self, ctx: ::runtime::OpComputeContext) -> op::ComputeResult {
let xs = ctx.grab_inputs();
let log_x = xs[0]; let t = xs[1]; let gy = xs[2]; let mut x = log_x.map(|a| a.exp());
for (mut row, &t_) in x.axis_iter_mut(ndarray::Axis(0)).zip(t) {
row[t_ as usize] -= 1.;
}
x *= gy;
vec![Ok(x)]
}
fn grad(&self, _: &Tensor, _: &[&Tensor], _: &Tensor) -> Vec<Option<Tensor>> {
vec![None, None]
}
}
impl op::Op for SoftmaxCrossEntropy {
fn name(&self) -> &str {
"SoftmaxCrossEntropy"
}
fn compute(&self, ctx: ::runtime::OpComputeContext) -> op::ComputeResult {
let xs = ctx.grab_inputs();
let x = xs[0];
let log_x: NdArray = x - &ops::math_ops::logsumexp_forward(x, 1, true);
let t = xs[1];
assert_eq!(log_x.ndim(), 2, "x must be 2-ranked tensor");
assert_eq!(t.ndim(), 2, "t must be 2-ranked tensor");
vec![Ok((t * &log_x).sum_axis(ndarray::Axis(1)) * -1.), Ok(log_x)]
}
fn grad(&self, gy: &Tensor, inputs: &[&Tensor], output: &Tensor) -> Vec<Option<Tensor>> {
let ref log_x = ops::nth_tensor(output, 1);
let ref x = ops::exp(log_x);
let t = inputs[1];
let gx1 = (x - t) * gy;
let gx2 = {
let sum = ops::reduce_sum(&(x * log_x), &[-1], true);
gy * (sum - log_x) * output
};
vec![Some(gx1), Some(gx2)]
}
}