use num_traits::Float;
use std::collections::VecDeque;
use std::fmt::Debug;
use crate::math::optimization::{ObjectiveFunction, OptimizationConfig, OptimizationResult};
pub fn minimize<T, F>(
f: &F,
initial_point: &[T],
config: &OptimizationConfig<T>,
) -> OptimizationResult<T>
where
T: Float + Debug,
F: ObjectiveFunction<T>,
{
const M: usize = 10; let n = initial_point.len();
let mut current_point = initial_point.to_vec();
let mut iterations = 0;
let mut converged = false;
let mut s_list: VecDeque<Vec<T>> = VecDeque::with_capacity(M);
let mut y_list: VecDeque<Vec<T>> = VecDeque::with_capacity(M);
let mut rho_list: VecDeque<T> = VecDeque::with_capacity(M);
let mut gradient = match f.gradient(¤t_point) {
Some(g) => g,
None => {
return OptimizationResult {
optimal_point: current_point.clone(),
optimal_value: f.evaluate(¤t_point),
iterations: 0,
converged: false,
};
}
};
while iterations < config.max_iterations {
let gradient_norm = gradient
.iter()
.fold(T::zero(), |acc, &x| acc + x * x)
.sqrt();
if gradient_norm < config.tolerance {
converged = true;
break;
}
let mut q = gradient.clone();
let mut alpha_list = Vec::with_capacity(s_list.len());
for i in (0..s_list.len()).rev() {
let alpha = rho_list[i]
* s_list[i]
.iter()
.zip(q.iter())
.fold(T::zero(), |acc, (&s, &q)| acc + s * q);
alpha_list.push(alpha);
for (q_j, y_j) in q.iter_mut().zip(y_list[i].iter()) {
*q_j = *q_j - alpha * *y_j;
}
}
let mut r = if !s_list.is_empty() {
let i = s_list.len() - 1;
let yy = y_list[i].iter().fold(T::zero(), |acc, &y| acc + y * y);
let ys = y_list[i]
.iter()
.zip(s_list[i].iter())
.fold(T::zero(), |acc, (&y, &s)| acc + y * s);
q.iter_mut().for_each(|r_j| *r_j = *r_j * (ys / yy));
q
} else {
q.iter_mut()
.for_each(|r_j| *r_j = *r_j * config.learning_rate);
q
};
for i in 0..s_list.len() {
let beta = rho_list[i]
* y_list[i]
.iter()
.zip(r.iter())
.fold(T::zero(), |acc, (&y, &r)| acc + y * r);
let alpha = alpha_list[s_list.len() - 1 - i];
for (r_j, s_j) in r.iter_mut().zip(s_list[i].iter()) {
*r_j = *r_j + (alpha - beta) * *s_j;
}
}
let direction: Vec<T> = r.iter().map(|&x| -x).collect();
let mut alpha = T::one();
let mut new_point = vec![T::zero(); n];
let current_value = f.evaluate(¤t_point);
for _ in 0..20 {
for i in 0..n {
new_point[i] = current_point[i] + alpha * direction[i];
}
let new_value = f.evaluate(&new_point);
if new_value < current_value {
break;
}
alpha = alpha * T::from(0.5).unwrap();
}
let new_gradient = match f.gradient(&new_point) {
Some(g) => g,
None => break,
};
let s = new_point
.iter()
.zip(current_point.iter())
.map(|(&x_new, &x_old)| x_new - x_old)
.collect::<Vec<T>>();
let y = new_gradient
.iter()
.zip(gradient.iter())
.map(|(&g_new, &g_old)| g_new - g_old)
.collect::<Vec<T>>();
let ys = y
.iter()
.zip(s.iter())
.fold(T::zero(), |acc, (&y_i, &s_i)| acc + y_i * s_i);
let rho = T::one() / ys;
if s_list.len() == M {
s_list.pop_front();
y_list.pop_front();
rho_list.pop_front();
}
s_list.push_back(s);
y_list.push_back(y);
rho_list.push_back(rho);
current_point = new_point;
gradient = new_gradient;
iterations += 1;
}
OptimizationResult {
optimal_point: current_point.clone(),
optimal_value: f.evaluate(¤t_point),
iterations,
converged,
}
}
#[cfg(test)]
mod tests {
use super::*;
struct Quadratic;
impl ObjectiveFunction<f64> for Quadratic {
fn evaluate(&self, point: &[f64]) -> f64 {
point.iter().map(|x| x * x).sum()
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
Some(point.iter().map(|x| 2.0 * x).collect())
}
}
#[test]
fn test_lbfgs_quadratic() {
let f = Quadratic;
let initial_point = vec![1.0, 1.0];
let config = OptimizationConfig {
max_iterations: 100,
tolerance: 1e-6,
learning_rate: 1.0,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!(result.optimal_value < 1e-10);
for x in result.optimal_point {
assert!(x.abs() < 1e-5);
}
}
struct QuadraticWithMinimum;
impl ObjectiveFunction<f64> for QuadraticWithMinimum {
fn evaluate(&self, point: &[f64]) -> f64 {
let x = point[0];
(x - 2.0).powi(2)
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
let x = point[0];
Some(vec![2.0 * (x - 2.0)])
}
}
#[test]
fn test_lbfgs_quadratic_with_minimum() {
let f = QuadraticWithMinimum;
let initial_point = vec![0.0];
let config = OptimizationConfig {
max_iterations: 100,
tolerance: 1e-6,
learning_rate: 1.0,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!((result.optimal_point[0] - 2.0).abs() < 1e-5);
assert!(result.optimal_value < 1e-10);
}
struct Rosenbrock;
impl ObjectiveFunction<f64> for Rosenbrock {
fn evaluate(&self, point: &[f64]) -> f64 {
let x = point[0];
let y = point[1];
(x - 1.0).powi(2) + 100.0 * (y - x.powi(2)).powi(2)
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
let x = point[0];
let y = point[1];
Some(vec![
2.0 * (x - 1.0) - 400.0 * x * (y - x.powi(2)),
200.0 * (y - x.powi(2)),
])
}
}
#[test]
fn test_lbfgs_rosenbrock() {
let f = Rosenbrock;
let initial_point = vec![0.0, 0.0];
let config = OptimizationConfig {
max_iterations: 1000,
tolerance: 1e-6,
learning_rate: 0.01,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!((result.optimal_point[0] - 1.0).abs() < 1e-3);
assert!((result.optimal_point[1] - 1.0).abs() < 1e-3);
}
struct HighDimensionalQuadratic;
impl ObjectiveFunction<f64> for HighDimensionalQuadratic {
fn evaluate(&self, point: &[f64]) -> f64 {
point
.iter()
.enumerate()
.map(|(i, &x)| (i + 1) as f64 * x * x)
.sum()
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
Some(
point
.iter()
.enumerate()
.map(|(i, &x)| 2.0 * (i + 1) as f64 * x)
.collect(),
)
}
}
#[test]
fn test_lbfgs_high_dimensional() {
let f = HighDimensionalQuadratic;
let initial_point = vec![1.0; 100];
let config = OptimizationConfig {
max_iterations: 1000,
tolerance: 1e-6,
learning_rate: 1.0,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!(result.optimal_value < 1e-6);
for x in result.optimal_point {
assert!(x.abs() < 1e-3);
}
}
}