advanced_algorithms/optimization/
gradient_descent.rs1use crate::Result;
25
26#[derive(Debug, Clone)]
28pub enum LearningRate {
29 Constant(f64),
31 Decreasing { initial: f64, decay: f64 },
33 Exponential { initial: f64, decay: f64 },
35 Adaptive { initial: f64, epsilon: f64 },
37}
38
39impl LearningRate {
40 fn get_rate(&self, iteration: usize) -> f64 {
41 match self {
42 LearningRate::Constant(rate) => *rate,
43 LearningRate::Decreasing { initial, decay } => {
44 initial / (1.0 + decay * iteration as f64)
45 }
46 LearningRate::Exponential { initial, decay } => {
47 initial * decay.powi(iteration as i32)
48 }
49 LearningRate::Adaptive { initial, .. } => *initial,
50 }
51 }
52}
53
54pub struct GradientDescent {
56 learning_rate: LearningRate,
57 max_iterations: usize,
58 tolerance: f64,
59 momentum: f64,
60 verbose: bool,
61}
62
63impl Default for GradientDescent {
64 fn default() -> Self {
65 GradientDescent {
66 learning_rate: LearningRate::Constant(0.01),
67 max_iterations: 1000,
68 tolerance: 1e-6,
69 momentum: 0.0,
70 verbose: false,
71 }
72 }
73}
74
75impl GradientDescent {
76 pub fn new() -> Self {
78 Self::default()
79 }
80
81 pub fn with_learning_rate(mut self, lr: LearningRate) -> Self {
83 self.learning_rate = lr;
84 self
85 }
86
87 pub fn with_max_iterations(mut self, max_iter: usize) -> Self {
89 self.max_iterations = max_iter;
90 self
91 }
92
93 pub fn with_tolerance(mut self, tol: f64) -> Self {
95 self.tolerance = tol;
96 self
97 }
98
99 pub fn with_momentum(mut self, momentum: f64) -> Self {
101 self.momentum = momentum;
102 self
103 }
104
105 pub fn verbose(mut self) -> Self {
107 self.verbose = true;
108 self
109 }
110
111 pub fn minimize<F, G>(
123 &self,
124 f: F,
125 grad_f: G,
126 initial: &[f64],
127 ) -> Result<OptimizationResult>
128 where
129 F: Fn(&[f64]) -> f64,
130 G: Fn(&[f64]) -> Vec<f64>,
131 {
132 let mut x = initial.to_vec();
133 let mut velocity = vec![0.0; x.len()];
134 let mut accumulated_squared_gradients = vec![0.0; x.len()];
135
136 let mut history = Vec::new();
137
138 for iteration in 0..self.max_iterations {
139 let current_value = f(&x);
140 let gradient = grad_f(&x);
141
142 if self.verbose {
143 history.push((x.clone(), current_value));
144 }
145
146 let grad_norm: f64 = gradient.iter().map(|g| g * g).sum::<f64>().sqrt();
148
149 if grad_norm < self.tolerance {
150 return Ok(OptimizationResult {
151 parameters: x,
152 value: current_value,
153 iterations: iteration,
154 gradient_norm: grad_norm,
155 converged: true,
156 history,
157 });
158 }
159
160 let lr = self.learning_rate.get_rate(iteration);
162
163 match &self.learning_rate {
164 LearningRate::Adaptive { epsilon, .. } => {
165 for i in 0..x.len() {
167 accumulated_squared_gradients[i] += gradient[i] * gradient[i];
168 let adjusted_lr = lr / (accumulated_squared_gradients[i].sqrt() + epsilon);
169
170 velocity[i] = self.momentum * velocity[i] - adjusted_lr * gradient[i];
171 x[i] += velocity[i];
172 }
173 }
174 _ => {
175 for i in 0..x.len() {
177 velocity[i] = self.momentum * velocity[i] - lr * gradient[i];
178 x[i] += velocity[i];
179 }
180 }
181 }
182 }
183
184 let final_value = f(&x);
185 let final_gradient = grad_f(&x);
186 let grad_norm: f64 = final_gradient.iter().map(|g| g * g).sum::<f64>().sqrt();
187
188 Ok(OptimizationResult {
189 parameters: x,
190 value: final_value,
191 iterations: self.max_iterations,
192 gradient_norm: grad_norm,
193 converged: grad_norm < self.tolerance,
194 history,
195 })
196 }
197}
198
199#[derive(Debug, Clone)]
201pub struct OptimizationResult {
202 pub parameters: Vec<f64>,
204 pub value: f64,
206 pub iterations: usize,
208 pub gradient_norm: f64,
210 pub converged: bool,
212 pub history: Vec<(Vec<f64>, f64)>,
214}
215
216pub struct StochasticGD {
218 learning_rate: LearningRate,
219 max_epochs: usize,
220 batch_size: usize,
221 tolerance: f64,
222 momentum: f64,
223}
224
225impl StochasticGD {
226 pub fn new(batch_size: usize) -> Self {
228 StochasticGD {
229 learning_rate: LearningRate::Decreasing {
230 initial: 0.01,
231 decay: 0.001,
232 },
233 max_epochs: 100,
234 batch_size,
235 tolerance: 1e-6,
236 momentum: 0.9,
237 }
238 }
239
240 pub fn minimize<G>(
248 &self,
249 grad_f: G,
250 initial: &[f64],
251 n_samples: usize,
252 ) -> Result<OptimizationResult>
253 where
254 G: Fn(&[f64], &[usize]) -> Vec<f64> + Sync,
255 {
256 let mut x = initial.to_vec();
257 let mut velocity = vec![0.0; x.len()];
258
259 let mut total_iterations = 0;
260
261 for _epoch in 0..self.max_epochs {
262 let n_batches = n_samples.div_ceil(self.batch_size);
264
265 for batch_idx in 0..n_batches {
266 let start = batch_idx * self.batch_size;
267 let end = (start + self.batch_size).min(n_samples);
268 let batch_indices: Vec<usize> = (start..end).collect();
269
270 let gradient = grad_f(&x, &batch_indices);
271
272 let lr = self.learning_rate.get_rate(total_iterations);
273
274 for i in 0..x.len() {
275 velocity[i] = self.momentum * velocity[i] - lr * gradient[i];
276 x[i] += velocity[i];
277 }
278
279 total_iterations += 1;
280 }
281
282 let grad_norm: f64 = velocity.iter().map(|v| v * v).sum::<f64>().sqrt();
284
285 if grad_norm < self.tolerance {
286 return Ok(OptimizationResult {
287 parameters: x,
288 value: 0.0,
289 iterations: total_iterations,
290 gradient_norm: grad_norm,
291 converged: true,
292 history: Vec::new(),
293 });
294 }
295 }
296
297 Ok(OptimizationResult {
298 parameters: x,
299 value: 0.0,
300 iterations: total_iterations,
301 gradient_norm: 0.0,
302 converged: false,
303 history: Vec::new(),
304 })
305 }
306}
307
308pub fn numerical_gradient<F>(f: F, x: &[f64], epsilon: f64) -> Vec<f64>
310where
311 F: Fn(&[f64]) -> f64,
312{
313 let mut gradient = Vec::with_capacity(x.len());
314
315 for i in 0..x.len() {
316 let mut x_plus = x.to_vec();
317 let mut x_minus = x.to_vec();
318
319 x_plus[i] += epsilon;
320 x_minus[i] -= epsilon;
321
322 let grad_i = (f(&x_plus) - f(&x_minus)) / (2.0 * epsilon);
323 gradient.push(grad_i);
324 }
325
326 gradient
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 #[test]
334 fn test_quadratic() {
335 let f = |x: &[f64]| (x[0] - 3.0) * (x[0] - 3.0);
337 let grad_f = |x: &[f64]| vec![2.0 * (x[0] - 3.0)];
338
339 let gd = GradientDescent::new()
340 .with_learning_rate(LearningRate::Constant(0.1))
341 .with_max_iterations(1000);
342
343 let result = gd.minimize(f, grad_f, &[0.0]).unwrap();
344
345 assert!(result.converged);
346 assert!((result.parameters[0] - 3.0).abs() < 0.01);
347 }
348
349 #[test]
350 fn test_rosenbrock() {
351 let f = |x: &[f64]| {
353 let a = 1.0 - x[0];
354 let b = x[1] - x[0] * x[0];
355 a * a + 100.0 * b * b
356 };
357
358 let grad_f = |x: &[f64]| {
359 vec![
360 -2.0 * (1.0 - x[0]) - 400.0 * x[0] * (x[1] - x[0] * x[0]),
361 200.0 * (x[1] - x[0] * x[0]),
362 ]
363 };
364
365 let gd = GradientDescent::new()
366 .with_learning_rate(LearningRate::Constant(0.001))
367 .with_momentum(0.9)
368 .with_max_iterations(10000);
369
370 let result = gd.minimize(f, grad_f, &[0.0, 0.0]).unwrap();
371
372 assert!(result.value < 0.1);
374 }
375
376 #[test]
377 fn test_numerical_gradient() {
378 let f = |x: &[f64]| x[0] * x[0] + 2.0 * x[1] * x[1];
379
380 let num_grad = numerical_gradient(f, &[1.0, 2.0], 1e-5);
381 let analytical_grad = vec![2.0 * 1.0, 4.0 * 2.0];
382
383 for (n, a) in num_grad.iter().zip(analytical_grad.iter()) {
384 assert!((n - a).abs() < 1e-4);
385 }
386 }
387}