use crate::error::{NumRs2Error, Result};
use num_traits::Float;
use super::{compute_norm, dot_product, OptimizeConfig, OptimizeResult};
pub fn bfgs<T, F, G>(
f: F,
grad: G,
x0: &[T],
config: Option<OptimizeConfig<T>>,
) -> Result<OptimizeResult<T>>
where
T: Float + std::fmt::Debug + std::iter::Sum,
F: Fn(&[T]) -> T,
G: Fn(&[T]) -> Vec<T>,
{
let cfg = config.unwrap_or_default();
let n = x0.len();
let mut x = x0.to_vec();
let mut f_val = f(&x);
let mut g = grad(&x);
let mut nfev = 1;
let mut njev = 1;
let mut h_inv = vec![vec![T::zero(); n]; n];
for i in 0..n {
h_inv[i][i] = T::one();
}
let g_norm = compute_norm(&g);
if g_norm < cfg.gtol {
return Ok(OptimizeResult {
x,
fun: f_val,
grad: g,
nit: 0,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (initial point is optimal)".to_string(),
});
}
for k in 0..cfg.max_iter {
let mut p = vec![T::zero(); n];
for i in 0..n {
for j in 0..n {
p[i] = p[i] - h_inv[i][j] * g[j];
}
}
let (alpha, f_new, nfev_ls) = wolfe_line_search(&f, &grad, &x, &p, f_val, &g, &cfg)?;
nfev += nfev_ls;
njev += nfev_ls;
let x_new: Vec<T> = x
.iter()
.zip(p.iter())
.map(|(&xi, &pi)| xi + alpha * pi)
.collect();
let g_new = grad(&x_new);
njev += 1;
let dx_norm = compute_norm(
&x_new
.iter()
.zip(x.iter())
.map(|(&xi_new, &xi)| xi_new - xi)
.collect::<Vec<_>>(),
);
let df = (f_new - f_val).abs();
let g_new_norm = compute_norm(&g_new);
if g_new_norm < cfg.gtol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (gradient norm converged)"
.to_string(),
});
}
if dx_norm < cfg.xtol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (parameter change converged)"
.to_string(),
});
}
if df < cfg.ftol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (function value converged)"
.to_string(),
});
}
let s: Vec<T> = x_new
.iter()
.zip(x.iter())
.map(|(&xi_new, &xi)| xi_new - xi)
.collect();
let y: Vec<T> = g_new
.iter()
.zip(g.iter())
.map(|(&gi_new, &gi)| gi_new - gi)
.collect();
let ys: T = y.iter().zip(s.iter()).map(|(&yi, &si)| yi * si).sum();
if ys > T::from(1e-14).expect("1e-14 should be representable in Float") {
let mut hy = vec![T::zero(); n];
for i in 0..n {
for j in 0..n {
hy[i] = hy[i] + h_inv[i][j] * y[j];
}
}
let yhy: T = y.iter().zip(hy.iter()).map(|(&yi, &hyi)| yi * hyi).sum();
for i in 0..n {
for j in 0..n {
let term1 = (T::one() + yhy / ys) * s[i] * s[j] / ys;
let term2 = (s[i] * hy[j] + hy[i] * s[j]) / ys;
h_inv[i][j] = h_inv[i][j] + term1 - term2;
}
}
}
x = x_new;
f_val = f_new;
g = g_new;
}
Ok(OptimizeResult {
x,
fun: f_val,
grad: g,
nit: cfg.max_iter,
nfev,
njev,
success: false,
message: "Maximum iterations reached".to_string(),
})
}
pub fn wolfe_line_search<T, F, G>(
f: &F,
grad: &G,
x: &[T],
p: &[T],
f0: T,
g0: &[T],
config: &OptimizeConfig<T>,
) -> Result<(T, T, usize)>
where
T: Float + std::iter::Sum,
F: Fn(&[T]) -> T,
G: Fn(&[T]) -> Vec<T>,
{
let mut alpha = T::one();
let mut nfev = 0;
let dg: T = g0.iter().zip(p.iter()).map(|(&gi, &pi)| gi * pi).sum();
for _ in 0..config.ls_max_iter {
let x_new: Vec<T> = x
.iter()
.zip(p.iter())
.map(|(&xi, &pi)| xi + alpha * pi)
.collect();
let f_new = f(&x_new);
nfev += 1;
if f_new <= f0 + config.c1 * alpha * dg {
let g_new = grad(&x_new);
let dg_new: T = g_new.iter().zip(p.iter()).map(|(&gi, &pi)| gi * pi).sum();
if dg_new.abs() <= config.c2 * dg.abs() {
return Ok((alpha, f_new, nfev + 1)); }
}
alpha = alpha * T::from(0.5).expect("0.5 should be representable in Float");
if alpha < T::from(1e-10).expect("1e-10 should be representable in Float") {
break;
}
}
let alpha_min = T::from(1e-8).expect("1e-8 should be representable in Float");
let x_new: Vec<T> = x
.iter()
.zip(p.iter())
.map(|(&xi, &pi)| xi + alpha_min * pi)
.collect();
let f_new = f(&x_new);
Ok((alpha_min, f_new, nfev + 1))
}
pub fn lbfgs<T, F, G>(
f: F,
grad: G,
x0: &[T],
m: usize, config: Option<OptimizeConfig<T>>,
) -> Result<OptimizeResult<T>>
where
T: Float + std::fmt::Debug + std::iter::Sum,
F: Fn(&[T]) -> T,
G: Fn(&[T]) -> Vec<T>,
{
let cfg = config.unwrap_or_default();
let n = x0.len();
if m == 0 {
return Err(NumRs2Error::ValueError(
"L-BFGS memory parameter m must be > 0".to_string(),
));
}
let mut x = x0.to_vec();
let mut f_val = f(&x);
let mut g = grad(&x);
let mut nfev = 1;
let mut njev = 1;
let mut s_history: Vec<Vec<T>> = Vec::with_capacity(m);
let mut y_history: Vec<Vec<T>> = Vec::with_capacity(m);
let mut rho_history: Vec<T> = Vec::with_capacity(m);
let g_norm = compute_norm(&g);
if g_norm < cfg.gtol {
return Ok(OptimizeResult {
x,
fun: f_val,
grad: g,
nit: 0,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (initial point is optimal)".to_string(),
});
}
for k in 0..cfg.max_iter {
let p = lbfgs_two_loop_recursion(&g, &s_history, &y_history, &rho_history);
let (alpha, f_new, nfev_ls) = wolfe_line_search(&f, &grad, &x, &p, f_val, &g, &cfg)?;
nfev += nfev_ls;
njev += nfev_ls;
let x_new: Vec<T> = x
.iter()
.zip(p.iter())
.map(|(&xi, &pi)| xi + alpha * pi)
.collect();
let g_new = grad(&x_new);
njev += 1;
let s: Vec<T> = x_new
.iter()
.zip(x.iter())
.map(|(&xi_new, &xi)| xi_new - xi)
.collect();
let y: Vec<T> = g_new
.iter()
.zip(g.iter())
.map(|(&gi_new, &gi)| gi_new - gi)
.collect();
let ys: T = y.iter().zip(s.iter()).map(|(&yi, &si)| yi * si).sum();
if ys > T::from(1e-14).expect("1e-14 should be representable in Float") {
let rho = T::one() / ys;
if s_history.len() >= m {
s_history.remove(0);
y_history.remove(0);
rho_history.remove(0);
}
s_history.push(s);
y_history.push(y);
rho_history.push(rho);
}
let g_new_norm = compute_norm(&g_new);
let dx_norm = compute_norm(
&x_new
.iter()
.zip(x.iter())
.map(|(&xi_new, &xi)| xi_new - xi)
.collect::<Vec<_>>(),
);
let df = (f_new - f_val).abs();
if g_new_norm < cfg.gtol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (gradient converged)".to_string(),
});
}
if dx_norm < cfg.xtol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (parameter converged)".to_string(),
});
}
if df < cfg.ftol {
return Ok(OptimizeResult {
x: x_new,
fun: f_new,
grad: g_new,
nit: k + 1,
nfev,
njev,
success: true,
message: "Optimization terminated successfully (function value converged)"
.to_string(),
});
}
x = x_new;
f_val = f_new;
g = g_new;
}
Ok(OptimizeResult {
x,
fun: f_val,
grad: g,
nit: cfg.max_iter,
nfev,
njev,
success: false,
message: "Maximum iterations reached".to_string(),
})
}
fn lbfgs_two_loop_recursion<T: Float + std::iter::Sum>(
g: &[T],
s_history: &[Vec<T>],
y_history: &[Vec<T>],
rho_history: &[T],
) -> Vec<T> {
let n = g.len();
let m = s_history.len();
if m == 0 {
return g.iter().map(|&gi| -gi).collect();
}
let mut q = g.to_vec();
let mut alpha = vec![T::zero(); m];
for i in (0..m).rev() {
alpha[i] = rho_history[i] * dot_product(&s_history[i], &q);
for j in 0..n {
q[j] = q[j] - alpha[i] * y_history[i][j];
}
}
let last_idx = m - 1;
let sy: T = dot_product(&s_history[last_idx], &y_history[last_idx]);
let yy: T = dot_product(&y_history[last_idx], &y_history[last_idx]);
let gamma = if yy > T::from(1e-14).expect("1e-14 should be representable in Float") {
sy / yy
} else {
T::one()
};
let mut r: Vec<T> = q.iter().map(|&qi| gamma * qi).collect();
for i in 0..m {
let beta = rho_history[i] * dot_product(&y_history[i], &r);
for j in 0..n {
r[j] = r[j] + (alpha[i] - beta) * s_history[i][j];
}
}
r.iter().map(|&ri| -ri).collect()
}
pub fn check_gradient<T, F, G>(f: &F, grad: &G, x: &[T], tol: T) -> bool
where
T: Float + std::iter::Sum,
F: Fn(&[T]) -> T,
G: Fn(&[T]) -> Vec<T>,
{
let n = x.len();
let eps = T::from(1e-8).expect("1e-8 should be representable in Float");
let g_analytical = grad(x);
for i in 0..n {
let mut x_plus = x.to_vec();
let mut x_minus = x.to_vec();
x_plus[i] = x_plus[i] + eps;
x_minus[i] = x_minus[i] - eps;
let f_plus = f(&x_plus);
let f_minus = f(&x_minus);
let g_numerical = (f_plus - f_minus)
/ (T::from(2.0).expect("2.0 should be representable in Float") * eps);
let relative_error = if g_analytical[i].abs()
> T::from(1e-10).expect("1e-10 should be representable in Float")
{
((g_analytical[i] - g_numerical) / g_analytical[i]).abs()
} else {
(g_analytical[i] - g_numerical).abs()
};
if relative_error > tol {
return false;
}
}
true
}