1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use super::mapper::ModuleTensorUpdater;
use super::visitor::{GradientsLoader, GradientsRegister};
use super::GradientsParams;

use crate::module::{ADModule, LoadingError, Module, ParamId, State, StateNamed};
use crate::tensor::backend::{ADBackend, Backend};
use crate::tensor::{Data, Tensor};

pub trait Optimizer: Send + Sync {
    type Backend: ADBackend;

    /// Update the tensor parameter using the given the gradients.
    fn update_tensor<const D: usize>(
        &mut self,
        id: &ParamId,
        tensor: Tensor<Self::Backend, D>,
        grad: Tensor<<Self::Backend as ADBackend>::InnerBackend, D>,
    ) -> Tensor<Self::Backend, D>;

    /// Update the parameters of the given module using the given the gradients.
    fn update_module<M>(&mut self, module: M, grads: GradientsParams) -> M
    where
        M: ADModule<ADBackend = Self::Backend>,
        Self: Sized,
    {
        let mut mapper = ModuleTensorUpdater::new(self, grads);
        module.map(&mut mapper)
    }

    /// Register the optimizer state for a given parameter.
    ///
    /// # Notes
    ///
    /// This should only be called by generated code.
    fn register_param_state<const D: usize>(
        &self,
        _id: &ParamId,
        _state: &mut StateNamed<<Self::Backend as Backend>::FloatElem>,
    ) {
        // By default there is no state to register
    }

    /// Load the optimizer state for a given parameter.
    ///
    /// # Notes
    ///
    /// This should only be called by generated code.
    fn load_param_state<const D: usize>(
        &mut self,
        _id: &ParamId,
        _state: &StateNamed<<Self::Backend as Backend>::FloatElem>,
        _device: &<Self::Backend as Backend>::Device,
    ) {
        // By default there is no state to load
    }

    /// Get the optimizer state for a given module.
    fn state<M: Module<Backend = Self::Backend>>(
        &self,
        module: &M,
    ) -> State<<Self::Backend as Backend>::FloatElem>
    where
        Self: Sized,
    {
        let mut state_named = StateNamed::new();
        let mut visitor = GradientsRegister::new(self, &mut state_named);

        module.visit(&mut visitor);
        State::StateNamed(state_named)
    }

    /// Load the optimizer state for a given module.
    fn load<M: Module<Backend = Self::Backend>>(
        &mut self,
        module: &M,
        state: &State<<Self::Backend as Backend>::FloatElem>,
    ) -> Result<(), LoadingError>
    where
        Self: Sized,
    {
        let state_named = match state {
            State::StateNamed(state) => state,
            _ => {
                return Err(LoadingError::new(
                    "Can't load state wrapper to fetch id and data".to_string(),
                ))
            }
        };

        let mut visitor = GradientsLoader::new(self, state_named);
        module.visit(&mut visitor);

        Ok(())
    }
}

pub(super) fn register_state_gradients<const D: usize, B: ADBackend, F: Fn(&ParamId) -> String>(
    id: &ParamId,
    state: &mut StateNamed<B::FloatElem>,
    grads: &GradientsParams,
    id_to_key: F,
) {
    if let Some(grad) = grads.get::<B::InnerBackend, D>(id) {
        let data = State::Data(grad.into_data().serialize());
        state.register_state(id_to_key(id).as_str(), data);
    };
}

pub(super) fn load_state_gradients<const D: usize, B: ADBackend, F: Fn(&ParamId) -> String>(
    id: &ParamId,
    state: &StateNamed<B::FloatElem>,
    grads: &mut GradientsParams,
    id_to_key: F,
    device: &B::Device,
) {
    if let Some(State::Data(data)) = state.get(id_to_key(id).as_str()) {
        let tensor = Tensor::<B::InnerBackend, D>::from_data_device(Data::from(data), device);
        grads.register::<B::InnerBackend, D>(id.clone(), tensor);
    };
}