1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use super::mapper::ModuleTensorUpdater;
use super::visitor::{GradientsLoader, GradientsRegister};
use super::GradientsParams;
use crate::module::{ADModule, LoadingError, Module, ParamId, State, StateNamed};
use crate::tensor::backend::{ADBackend, Backend};
use crate::tensor::{Data, Tensor};
pub trait Optimizer: Send + Sync {
type Backend: ADBackend;
fn update_tensor<const D: usize>(
&mut self,
id: &ParamId,
tensor: Tensor<Self::Backend, D>,
grad: Tensor<<Self::Backend as ADBackend>::InnerBackend, D>,
) -> Tensor<Self::Backend, D>;
fn update_module<M>(&mut self, module: M, grads: GradientsParams) -> M
where
M: ADModule<ADBackend = Self::Backend>,
Self: Sized,
{
let mut mapper = ModuleTensorUpdater::new(self, grads);
module.map(&mut mapper)
}
fn register_param_state<const D: usize>(
&self,
_id: &ParamId,
_state: &mut StateNamed<<Self::Backend as Backend>::FloatElem>,
) {
}
fn load_param_state<const D: usize>(
&mut self,
_id: &ParamId,
_state: &StateNamed<<Self::Backend as Backend>::FloatElem>,
_device: &<Self::Backend as Backend>::Device,
) {
}
fn state<M: Module<Backend = Self::Backend>>(
&self,
module: &M,
) -> State<<Self::Backend as Backend>::FloatElem>
where
Self: Sized,
{
let mut state_named = StateNamed::new();
let mut visitor = GradientsRegister::new(self, &mut state_named);
module.visit(&mut visitor);
State::StateNamed(state_named)
}
fn load<M: Module<Backend = Self::Backend>>(
&mut self,
module: &M,
state: &State<<Self::Backend as Backend>::FloatElem>,
) -> Result<(), LoadingError>
where
Self: Sized,
{
let state_named = match state {
State::StateNamed(state) => state,
_ => {
return Err(LoadingError::new(
"Can't load state wrapper to fetch id and data".to_string(),
))
}
};
let mut visitor = GradientsLoader::new(self, state_named);
module.visit(&mut visitor);
Ok(())
}
}
pub(super) fn register_state_gradients<const D: usize, B: ADBackend, F: Fn(&ParamId) -> String>(
id: &ParamId,
state: &mut StateNamed<B::FloatElem>,
grads: &GradientsParams,
id_to_key: F,
) {
if let Some(grad) = grads.get::<B::InnerBackend, D>(id) {
let data = State::Data(grad.into_data().serialize());
state.register_state(id_to_key(id).as_str(), data);
};
}
pub(super) fn load_state_gradients<const D: usize, B: ADBackend, F: Fn(&ParamId) -> String>(
id: &ParamId,
state: &StateNamed<B::FloatElem>,
grads: &mut GradientsParams,
id_to_key: F,
device: &B::Device,
) {
if let Some(State::Data(data)) = state.get(id_to_key(id).as_str()) {
let tensor = Tensor::<B::InnerBackend, D>::from_data_device(Data::from(data), device);
grads.register::<B::InnerBackend, D>(id.clone(), tensor);
};
}