use num_traits::Float;
use std::fmt::Debug;
use crate::math::optimization::{ObjectiveFunction, OptimizationConfig, OptimizationResult};
pub fn minimize<T>(
f: &impl ObjectiveFunction<T>,
initial_point: &[T],
config: &OptimizationConfig<T>,
) -> OptimizationResult<T>
where
T: Float + Debug,
{
let n = initial_point.len();
let mut current_point = initial_point.to_vec();
let mut iterations = 0;
let mut converged = false;
let mut h_inv = vec![vec![T::zero(); n]; n];
for (i, row) in h_inv.iter_mut().enumerate().take(n) {
for (j, val) in row.iter_mut().enumerate() {
*val = if i == j { T::one() } else { T::zero() };
}
}
let mut gradient = match f.gradient(¤t_point) {
Some(g) => g,
None => {
return OptimizationResult {
optimal_point: current_point.clone(),
optimal_value: f.evaluate(¤t_point),
iterations: 0,
converged: false,
};
}
};
let c1 = T::from(1e-4).unwrap(); let c2 = T::from(0.9).unwrap(); let max_line_search = 20;
while iterations < config.max_iterations {
let gradient_norm = gradient
.iter()
.fold(T::zero(), |acc, &x| acc + x * x)
.sqrt();
let scale = T::one().max(f.evaluate(¤t_point).abs());
if gradient_norm < config.tolerance * scale {
converged = true;
break;
}
let mut direction = vec![T::zero(); n];
for i in 0..n {
direction[i] = gradient
.iter()
.enumerate()
.fold(T::zero(), |acc, (j, &g)| acc - h_inv[i][j] * g);
}
let mut alpha = T::one(); let mut new_point = vec![T::zero(); n];
let current_value = f.evaluate(¤t_point);
let directional_derivative = gradient
.iter()
.zip(direction.iter())
.fold(T::zero(), |acc, (&g, &d)| acc + g * d);
let mut found_step = false;
for _ in 0..max_line_search {
for i in 0..n {
new_point[i] = current_point[i] + alpha * direction[i];
}
let new_value = f.evaluate(&new_point);
if new_value <= current_value + c1 * alpha * directional_derivative {
if let Some(new_grad) = f.gradient(&new_point) {
let new_directional_derivative = new_grad
.iter()
.zip(direction.iter())
.fold(T::zero(), |acc, (&g, &d)| acc + g * d);
if new_directional_derivative.abs() <= c2 * directional_derivative.abs() {
found_step = true;
break;
}
}
}
alpha = alpha * T::from(0.5).unwrap();
}
if !found_step {
alpha = T::from(1e-4).unwrap();
for i in 0..n {
new_point[i] = current_point[i] + alpha * direction[i];
}
}
let s: Vec<T> = new_point
.iter()
.zip(current_point.iter())
.map(|(&x_new, &x_old)| x_new - x_old)
.collect();
let new_gradient = match f.gradient(&new_point) {
Some(g) => g,
None => break,
};
let y: Vec<T> = new_gradient
.iter()
.zip(gradient.iter())
.map(|(&g_new, &g_old)| g_new - g_old)
.collect();
let ys = y
.iter()
.zip(s.iter())
.fold(T::zero(), |acc, (&y_i, &s_i)| acc + y_i * s_i);
let rho = if ys.abs() > T::from(1e-10).unwrap() {
T::one() / ys
} else {
T::one() / T::from(1e-10).unwrap()
};
let mut temp_matrix = vec![vec![T::zero(); n]; n];
let mut new_h_inv = vec![vec![T::zero(); n]; n];
for i in 0..n {
for j in 0..n {
let sy_term = s[i]
* y.iter()
.enumerate()
.fold(T::zero(), |acc, (k, &y_k)| acc + y_k * h_inv[k][j]);
temp_matrix[i][j] = h_inv[i][j] - rho * sy_term;
}
}
for i in 0..n {
let row_sum = y
.iter()
.enumerate()
.fold(T::zero(), |acc, (k, &y_k)| acc + y_k * temp_matrix[i][k]);
new_h_inv[i] = s
.iter()
.enumerate()
.map(|(j, &s_j)| temp_matrix[i][j] - rho * row_sum * s_j)
.collect();
}
for i in 0..n {
for j in 0..n {
new_h_inv[i][j] = new_h_inv[i][j] + rho * s[i] * s[j];
}
}
h_inv = new_h_inv;
current_point = new_point;
gradient = new_gradient;
iterations += 1;
}
OptimizationResult {
optimal_point: current_point.clone(),
optimal_value: f.evaluate(¤t_point),
iterations,
converged,
}
}
#[cfg(test)]
mod tests {
use super::*;
struct Quadratic;
impl ObjectiveFunction<f64> for Quadratic {
fn evaluate(&self, point: &[f64]) -> f64 {
point.iter().map(|x| x * x).sum()
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
Some(point.iter().map(|x| 2.0 * x).collect())
}
}
#[test]
fn test_bfgs_quadratic() {
let f = Quadratic;
let initial_point = vec![1.0, 1.0];
let config = OptimizationConfig {
max_iterations: 100,
tolerance: 1e-6,
learning_rate: 1.0,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!(result.optimal_value < 1e-10);
for x in result.optimal_point {
assert!(x.abs() < 1e-5);
}
}
struct QuadraticWithMinimum;
impl ObjectiveFunction<f64> for QuadraticWithMinimum {
fn evaluate(&self, point: &[f64]) -> f64 {
let x = point[0];
(x - 2.0).powi(2)
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
let x = point[0];
Some(vec![2.0 * (x - 2.0)])
}
}
#[test]
fn test_bfgs_quadratic_with_minimum() {
let f = QuadraticWithMinimum;
let initial_point = vec![0.0];
let config = OptimizationConfig {
max_iterations: 100,
tolerance: 1e-6,
learning_rate: 1.0,
};
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!((result.optimal_point[0] - 2.0).abs() < 1e-5);
assert!(result.optimal_value < 1e-10);
}
struct Rosenbrock;
impl ObjectiveFunction<f64> for Rosenbrock {
fn evaluate(&self, point: &[f64]) -> f64 {
let x = point[0];
let y = point[1];
(x - 1.0).powi(2) + 100.0 * (y - x.powi(2)).powi(2)
}
fn gradient(&self, point: &[f64]) -> Option<Vec<f64>> {
let x = point[0];
let y = point[1];
Some(vec![
2.0 * (x - 1.0) - 400.0 * x * (y - x.powi(2)),
200.0 * (y - x.powi(2)),
])
}
}
#[test]
fn test_bfgs_rosenbrock() {
let f = Rosenbrock;
let initial_point = vec![0.0, 0.0];
let config = OptimizationConfig {
max_iterations: 2000, tolerance: 1e-5, learning_rate: 1.0, };
let result = minimize(&f, &initial_point, &config);
assert!(result.converged);
assert!((result.optimal_point[0] - 1.0).abs() < 1e-3);
assert!((result.optimal_point[1] - 1.0).abs() < 1e-3);
}
}