1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*!
Optimisers for use with the [candle](https://github.com/huggingface/candle) framework for lightweight machine learning.
Apart from LBFGS, these all implement the [`candle_nn::optim::Optimizer`] trait from candle-nn
# Example
Training an MNIST model using the Adam optimiser
```
# use candle_core::{Result, Tensor};
# use candle_core::{DType, D};
# use candle_nn::{loss, ops, VarBuilder, VarMap, optim::Optimizer};
# use candle_optimisers::{
# adam::{Adam, ParamsAdam}
# };
#
# pub trait Model: Sized {
# fn new(vs: VarBuilder) -> Result<Self>;
# fn forward(&self, xs: &Tensor) -> Result<Tensor>;
# }
#
# pub fn training_loop<M: Model>(
# m: candle_datasets::vision::Dataset,
# varmap: &VarMap,
# model: M,
# ) -> anyhow::Result<()> {
# // check to see if cuda device availabke
# let dev = candle_core::Device::cuda_if_available(0)?;
# // get the input from the dataset and put on device
# let train_images = m.train_images.to_device(&dev)?;
# // get the training labels on the device
# let train_labels = m.train_labels.to_dtype(DType::U32)?.to_device(&dev)?;
#
#
# // load the test images
# let test_images = m.test_images.to_device(&dev)?;
# // load the test labels
# let test_labels = m.test_labels.to_dtype(DType::U32)?.to_device(&dev)?;
#
// create the Adam optimiser
// set the learning rate to 0.004 and use the default parameters for everything else
let params = ParamsAdam {
lr: 0.004,
..Default::default()
};
// create the optimiser by passing in the variable to be optimised and the parameters
let mut optimiser = Adam::new(varmap.all_vars(), params)?;
// loop for model optimisation
for epoch in 0..100 {
// run the model forwards
// get log probabilities of results
let logits = model.forward(&train_images)?;
// softmax the log probabilities
let log_sm = ops::log_softmax(&logits, D::Minus1)?;
// get the loss
let loss = loss::nll(&log_sm, &train_labels)?;
// step the tensors by backpropagating the loss
optimiser.backward_step(&loss)?;
# // get the log probabilities of the test images
# let test_logits = model.forward(&test_images)?;
# // get the sum of the correct predictions
# let sum_ok = test_logits
# .argmax(D::Minus1)?
# .eq(&test_labels)?
# .to_dtype(DType::F32)?
# .sum_all()?
# .to_scalar::<f32>()?;
# // get the accuracy on the test set
# #[allow(clippy::cast_precision_loss)]
# let test_accuracy = sum_ok / test_labels.dims1()? as f32;
# println!(
# "{:4} train loss: {:8.5} test acc: {:5.2}%",
# epoch + 1,
# loss.to_scalar::<f32>()?,
# 100. * test_accuracy
# );
}
Ok(())
# }
```
*/
use Debug;
use Result as CResult;
use Tensor;
use Var;
/// Trait for optimisers to expose their parameters
/// Trait for Models: this is needed for optimisers that require the ability to calculate the loss
/// such as LBFGS
/// trait for optimisers like LBFGS that need the ability to calculate the loss
/// and its gradient
/// Outcomes of an optimiser step for methods such as LBFGS
/// Method of weight decay to use
/// Type of momentum to use