1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
pub mod adam;
pub mod adagrad;
pub mod sgd;
pub mod momentum_sgd;
use crate::evaluation::Feeder;
use crate::tensor::Tensor;
use crate::variable::{VariableNamespace};
use crate::{Context, Float};
pub use sgd::SGD;
pub use adam::Adam;
pub use momentum_sgd::MomentumSGD;
pub use adagrad::AdaGrad;
pub fn grad_helper<'g, A, F: Float>(
losses: &[A],
namespace: &'g VariableNamespace<F>,
) -> (Vec<Tensor<'g, F>>, Vec<Tensor<'g, F>>)
where
A: AsRef<Tensor<'g, F>> + Copy,
{
use crate::tensor_ops as T;
let g = losses[0].as_ref().graph;
let variables: Vec<Tensor<F>> = g.var_tensors_by_name(namespace).map(|(_a, b)| b).collect();
let grads = T::grad(losses, &variables);
(variables, grads)
}
pub trait Optimizer<F: Float> {
fn compute_updates<'g, A, B>(
&self,
variables: &[A],
grads: &[B],
g: &'g Context<F>,
) -> Vec<Tensor<'g, F>>
where
A: AsRef<Tensor<'g, F>> + Copy,
B: AsRef<Tensor<'g, F>> + Copy;
fn update<'g, A, B>(&self, variables: &[A], grads: &[B], g: &'g Context<F>, feeder: Feeder<F>)
where
A: AsRef<Tensor<'g, F>> + Copy,
B: AsRef<Tensor<'g, F>> + Copy,
{
let mut evaluator = g.evaluator();
evaluator.set_feeder(feeder);
let update_ops = self.compute_updates(variables, grads, g);
evaluator
.extend(&update_ops)
.run()
.into_iter()
.for_each(|r| {
r.unwrap();
});
}
fn get_update_op<'g, A, B>(
&self,
variables: &[A],
grads: &[B],
g: &'g Context<F>,
) -> Tensor<'g, F>
where
A: AsRef<Tensor<'g, F>> + Copy,
B: AsRef<Tensor<'g, F>> + Copy,
{
crate::tensor_ops::add_n(&self.compute_updates(variables, grads, g))
}
}