1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
use crate::{AdditionalOps, BaseOps, ClipOp, FnsOps, Matrix, SumOps, SumOverOps};
use custos::{number::Float, Device, Shape};
pub trait CCE<T> {
fn cce(&self, targets: &Matrix<T>) -> (T, Matrix<T>);
}
impl<'a, T, D> Matrix<'a, T, D> where D: Device {}
pub trait CCEOp<T, S: Shape = (), D = Self>: Device
where
D: Device,
{
#[inline]
fn cce<'a>(
&self,
preds: &Matrix<'a, T, D, S>,
targets: &Matrix<'a, T, D, S>,
) -> (T, Matrix<'a, T, Self, S>) {
(self.cce_loss(preds, targets), self.cce_grad(preds, targets))
}
fn cce_loss(&self, preds: &Matrix<T, D, S>, targets: &Matrix<T, D, S>) -> T;
fn cce_grad<'a>(
&self,
preds: &Matrix<'a, T, D, S>,
targets: &Matrix<'a, T, D, S>,
) -> Matrix<'a, T, Self, S>;
}
impl<'a, T, S: Shape, D: CCEOp<T, S>> Matrix<'a, T, D, S> {
#[inline]
pub fn cce(&self, targets: &Matrix<'a, T, D, S>) -> (T, Matrix<'a, T, D, S>) {
self.device().cce(self, targets)
}
#[inline]
pub fn cce_loss(&self, targets: &Matrix<T, D, S>) -> T {
self.device().cce_loss(self, targets)
}
#[inline]
pub fn cce_grad(&self, targets: &Matrix<'a, T, D, S>) -> Matrix<'a, T, D, S> {
self.device().cce_grad(self, targets)
}
}
impl<T, D, IS: Shape> CCEOp<T, IS> for D
where
T: Float,
D: FnsOps<T>
+ ClipOp<T, IS>
+ BaseOps<T, IS>
+ SumOps<T>
+ SumOverOps<T, IS>
+ AdditionalOps<T, IS>
+ FnsOps<T, IS>,
{
fn cce_loss(&self, preds: &Matrix<T, D, IS>, targets: &Matrix<T, D, IS>) -> T {
let preds = preds.clip(T::as_generic(1E-7), T::as_generic(1. - 1E-7));
let confidences = (&preds * targets).sum_cols::<()>();
confidences.ln().neg().mean()
}
fn cce_grad<'a>(
&self,
preds: &Matrix<'a, T, D, IS>,
targets: &Matrix<'a, T, D, IS>,
) -> Matrix<'a, T, Self, IS> {
let grad = (targets / preds).neg();
grad / T::from_usize(preds.rows())
}
}