1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
pub use AdaGrad;
pub use Adam;
pub use RMSprop;
pub use SGD;
use ;
use ;
use ;
/// AdaGrad (Adaptive Gradient Algorithm) optimizer
/// Adam (Adaptive Moment Estimation) optimizer
/// Input validation functions for optimizers
/// RMSprop (Root Mean Square Propagation) optimizer
/// SGD (Stochastic Gradient Descent) optimizer
/// Optimizer cache for dense layers.
///
/// Stores optional per-optimizer state between iterations for dense layers.
///
/// # Fields
///
/// - `adam_states` - Optional cache storage for Adam optimizer states (momentum and velocity terms)
/// - `rmsprop_cache` - Optional cache storage for RMSprop optimizer running averages
/// - `ada_grad_cache` - Optional cache storage for AdaGrad optimizer accumulated squared gradients
/// Optimizer cache for 1D convolutional layers.
///
/// Stores optional per-optimizer state between iterations for Conv1D layers.
///
/// # Fields
///
/// - `adam_states` - Optional cache storage for Adam optimizer states including first and second moment estimates for both weights and biases used in 1D convolution
/// - `rmsprop_cache` - Optional cache storage for RMSprop optimizer state including exponentially decaying averages of squared gradients for weights and biases
/// - `ada_grad_cache` - Optional cache storage for AdaGrad optimizer accumulated squared gradients
/// Optimizer cache for 2D convolutional layers.
///
/// Stores optional per-optimizer state between iterations for Conv2D layers.
///
/// # Fields
///
/// - `adam_states` - Optional cache storage for Adam optimizer states (momentum and velocity terms) for feature extraction layers
/// - `rmsprop_cache` - Optional cache storage for RMSprop optimizer running averages for feature extraction layers
/// - `ada_grad_cache` - Optional cache storage for AdaGrad optimizer accumulated squared gradients
/// Optimizer cache for 3D convolutional layers.
///
/// Stores optional per-optimizer state between iterations for Conv3D layers.
///
/// # Fields
///
/// - `adam_states` - Optional cache for Adam optimizer state variables including first and second moment estimates for both weights and biases
/// - `rmsprop_cache` - Optional cache for RMSprop optimizer state variables including exponentially decaying averages of squared gradients for weights and biases
/// - `ada_grad_cache` - Optional cache storage for AdaGrad optimizer accumulated squared gradients
/// Optimizer cache for normalization layers.
///
/// Stores optional per-optimizer state between iterations for normalization layers.
///
/// # Fields
///
/// - `adam_states` - Optional cache for Adam optimizer state variables including first and second moment estimates for gamma and beta parameters
/// - `rmsprop_cache` - Optional cache for RMSprop optimizer state variables including exponentially decaying averages of squared gradients for gamma and beta parameters
/// - `ada_grad_cache` - Optional cache storage for AdaGrad optimizer accumulated squared gradients for gamma and beta parameters