1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
use crate::{
algebra::linear::Vector,
optimization::{Optim, OptimResult},
};
extern crate rand;
use crate::algebra::abstr::Real;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::clone::Clone;
/// Gradient method
///
/// It is assumed that $f \colon D \in \mathbb{R}^n \to \mathbb{R}$
/// The idea is, that in every iteration a step is made in direction of
/// the anti gradient.
///
/// ```math
/// x_{k + 1} := x_{k} - \alpha_{k} \nabla f(x_{k})
/// ```
/// in order that $ f(x_{k + 1}) < f(x_{k}) $.
///
/// input: Function $ f: \mathbb{R}^n \to \mathbb{R} $, and initial
/// approximation $ x_{0} \in \mathbb{R}^{n} $
///
/// output: $ x_{k} $
///
/// 1. Initialization: choose $\sigma \in (0, 1) $
///
/// set $k := 0 $
/// 2. calculate antigradient $d_{k} := -\nabla f(x_{k}) $
///
/// set $ \alpha_{k} := 1 $
/// 3. while $f(x_{k} + \alpha_{k} d_{k}) > f(x_k) + \sigma \alpha_{k} \lvert
/// \lvert d_{k} \rvert \rvert_{2}^{2} $ set $ \alpha_{k} := \alpha_{k}
/// /2 $ 4. $ x_{k + 1} := x_{k} + \alpha_{k} d_{k} $
/// 5. $ k := k + 1 $ go to 2.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Copy, Debug)]
pub struct Gradient<T> {
/// Learning rate
sigma: T,
/// The number of iterations to run.
iters: usize,
}
impl<T> Gradient<T>
where
T: Real,
{
/// Construct an instance of gradient algorithm.
///
/// # Parameters
///
/// sigma: learning rate > 0.0
///
/// # Examples
///
/// ```
/// use mathru::optimization::Gradient;
///
/// let gd = Gradient::new(0.3, 10000);
/// ```
pub fn new(sigma: T, iters: usize) -> Gradient<T> {
assert!(sigma <= T::one() && sigma > T::zero());
assert!(iters > 0);
Gradient { sigma, iters }
}
}
impl<T> Gradient<T>
where
T: Real,
{
pub fn minimize<F>(&self, func: &F, x_0: &Vector<T>) -> OptimResult<Vector<T>>
where
F: Optim<T>,
{
let mut x_k: Vector<T> = x_0.clone();
for _k in 0..self.iters {
let mut alpha_k: T = T::one();
let anti_grad: Vector<T> = -func.jacobian(&x_k).get_row(0).transpose();
//Backtracking line search
//Armijo–Goldstein condition
loop {
let anti_grad_alpha: Vector<T> = &anti_grad * &alpha_k;
let r: Vector<T> = &x_k + &anti_grad_alpha;
let f_r: T = func.eval(&r)[0];
let b: T = self.sigma * anti_grad_alpha.dotp(&anti_grad);
let f_x_k: T = func.eval(&x_k)[0];
if f_r <= f_x_k - b {
break;
}
alpha_k /= T::from_f64(2.0);
}
//Make step
x_k += anti_grad * alpha_k;
}
OptimResult::new(x_k)
}
}