use std::time::{Duration, Instant};
use RustQuant_autodiff::{variable::Variable, Accumulate, Gradient, Graph};
#[derive(Default, Debug, Clone)]
pub struct GradientDescent {
pub learning_rate: f64,
pub max_iterations: usize,
pub tolerance: Option<f64>,
}
#[allow(clippy::module_name_repetitions)]
pub struct GradientDescentResult {
pub minimizer: Vec<f64>,
pub minimum: f64,
pub iterations: usize,
pub elapsed: Duration,
}
impl GradientDescent {
#[must_use]
pub fn new(learning_rate: f64, max_iterations: usize, tolerance: Option<f64>) -> Self {
if tolerance.is_some() {
assert!(tolerance.unwrap() > 0.0);
}
Self {
learning_rate,
max_iterations,
tolerance,
}
}
#[inline]
fn is_stationary(gradient: &[f64], tol: f64) -> bool {
gradient.iter().map(|&x| x * x).sum::<f64>().sqrt() < tol
}
#[inline]
fn norm(x: &[f64]) -> f64 {
x.iter().map(|&x| x * x).sum::<f64>().sqrt()
}
#[allow(clippy::assign_op_pattern)]
pub fn optimize<F>(&self, f: F, x0: &[f64], verbose: bool) -> GradientDescentResult
where
F: for<'v> Fn(&[Variable<'v>]) -> Variable<'v>,
{
let start = Instant::now();
let tolerance = self.tolerance.unwrap_or(f64::EPSILON.sqrt());
let mut result = GradientDescentResult {
minimum: 0.0,
minimizer: x0.to_vec(),
iterations: 0,
elapsed: start.elapsed(),
};
for k in 0..self.max_iterations {
let graph = Graph::new();
result.iterations = k + 1;
let location = graph.vars(&result.minimizer);
let function = f(&location);
let gradient = function.accumulate().wrt(&location);
if Self::is_stationary(&gradient, tolerance) {
break;
}
result
.minimizer
.iter_mut()
.zip(&gradient)
.for_each(|(xi, gi)| *xi = *xi - self.learning_rate * gi);
result.minimum = f(&location).value;
if verbose {
println!(
"Iter: {:?}, Norm: {}, Func: {:.4?}, X: {:.4?}",
k + 1,
Self::norm(&gradient),
function.value,
location.iter().map(|x| x.value).collect::<Vec<f64>>()
);
}
}
result.elapsed = start.elapsed();
result
}
}
#[cfg(test)]
mod test_gradient_descent {
use super::*;
use RustQuant_autodiff::overload::Powf;
use RustQuant_autodiff::variable::Variable;
#[test]
fn test_gradient_descent_new() {
let gd = GradientDescent::new(0.1, 1000, Some(0.0001));
assert_eq!(gd.learning_rate, 0.1);
assert_eq!(gd.max_iterations, 1000);
assert_eq!(gd.tolerance, Some(0.0001));
}
#[test]
fn test_is_stationary() {
assert!(GradientDescent::is_stationary(&[0.00001, 0.00001], 0.0001));
assert!(!GradientDescent::is_stationary(&[0.01, 0.01], 0.0001));
}
#[test]
fn test_norm() {
assert_eq!(GradientDescent::norm(&[3.0, 4.0]), 5.0);
}
#[test]
fn test_optimize_x_squared() {
fn f<'v>(x: &[Variable<'v>]) -> Variable<'v> {
x[0] * x[0]
}
let gd = GradientDescent::new(0.1, 1000, Some(0.000_001));
let result = gd.optimize(f, &[10.0], false);
println!("Minimum: {:?}", result.minimum);
println!("Minimizer: {:?}", result.minimizer);
println!("Iterations: {:?}", result.iterations);
println!("Elapsed: {:?}", result.elapsed);
}
#[test]
fn test_optimize_booth() {
fn f<'v>(variables: &[Variable<'v>]) -> Variable<'v> {
let x = variables[0];
let y = variables[1];
(x + 2. * y - 7.).powf(2.0) + (2. * x + y - 5.).powf(2.0)
}
let gd = GradientDescent::new(0.1, 1000, Some(0.000_001));
let result = gd.optimize(f, &[5.0, 5.0], false);
println!("Minimum: {:?}", result.minimum);
println!("Minimizer: {:?}", result.minimizer);
println!("Iterations: {:?}", result.iterations);
}
#[test]
fn test_optimize_rosenbrock() {
fn f<'v>(variables: &[Variable<'v>]) -> Variable<'v> {
let x = variables[0];
let y = variables[1];
(1. - x).powf(2.0) + 100. * (y - x.powf(2.0)).powf(2.0)
}
let gd = GradientDescent::new(0.001, 10000, Some(0.000_001));
let result = gd.optimize(f, &[0.0, 5.0], false);
println!("Minimum: {:?}", result.minimum);
println!("Minimizer: {:?}", result.minimizer);
println!("Iterations: {:?}", result.iterations);
}
}