1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
use super::utils::move_tape_and_add_backward_op;
use crate::prelude::*;
pub fn value_mask<T: Tensor<Dtype = f32>>(mut t: T, mask: &T::NoTape, value: T::Dtype) -> T {
let mut result = T::NoTape::zeros();
T::Device::foreach_mrr(result.mut_data(), t.data(), mask.data(), &mut |r, t, o| {
*r = if o == &value { value } else { *t }
});
T::Device::foreach_mr(t.mut_data(), mask.data(), &mut |t, o| {
*t = if o == &value { 0.0 } else { 1.0 }
});
move_tape_and_add_backward_op(t, result, move |t, result, grads| {
let (t_grad, result_grad) = grads.mut_and_ref(&t, &result);
T::Device::addmul(t_grad, t.data(), result_grad);
})
}
macro_rules! tensor_impl {
($typename:ident, [$($Vs:tt),*]) => {
impl<$(const $Vs: usize, )* H: Tape> $typename<$($Vs, )* H> {
pub fn value_mask(self, mask: &$typename<$($Vs, )* NoneTape>, value: f32) -> Self {
value_mask(self, mask, value)
}
}
};
}
tensor_impl!(Tensor0D, []);
tensor_impl!(Tensor1D, [M]);
tensor_impl!(Tensor2D, [M, N]);
tensor_impl!(Tensor3D, [M, N, O]);
tensor_impl!(Tensor4D, [M, N, O, P]);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mask_0d() {
let t = Tensor0D::new(1.0);
let m = Tensor0D::new(-1e10);
let r = t.trace().value_mask(&m, -1e10);
assert_eq!(r.data(), &-1e10);
let gradients = r.mean().backward();
assert_eq!(gradients.ref_gradient(&t), &0.0);
}
#[test]
fn test_mask_1d() {
let t: Tensor1D<3> = Tensor1D::new([1.0, 2.0, 3.0]);
let m: Tensor1D<3> = Tensor1D::new([-1e10, 0.0, -1e10]);
let r = t.trace().value_mask(&m, -1e10);
assert_eq!(r.data(), &[-1e10, 2.0, -1e10]);
let gradients = r.exp().mean().backward();
assert_eq!(gradients.ref_gradient(&t), &[0.0, 2.463019, 0.0]);
}
#[test]
fn test_mask_2d() {
let t: Tensor2D<2, 3> = Tensor2D::new([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
let m: Tensor2D<2, 3> = Tensor2D::new([[-1e10, 0.0, -1e10], [1.0, -1e10, -1e9]]);
let r = t.trace().value_mask(&m, -1e10);
assert_eq!(r.data(), &[[-1e10, 2.0, -1e10], [4.0, -1e10, 6.0]]);
let gradients = r.mean().backward();
assert_eq!(
gradients.ref_gradient(&t),
&[[0.0, 1.0 / 6.0, 0.0], [1.0 / 6.0, 0.0, 1.0 / 6.0]]
);
}
}