use scivex_core::{Float, Tensor};
use crate::error::Result;
use super::{MinimizeOptions, MinimizeResult};
#[cfg_attr(
feature = "serde-support",
derive(serde::Serialize, serde::Deserialize)
)]
#[derive(Debug, Clone)]
pub struct Bounds<T> {
pub lower: Option<T>,
pub upper: Option<T>,
}
impl<T> Bounds<T> {
pub fn none() -> Self {
Self {
lower: None,
upper: None,
}
}
pub fn lower(lb: T) -> Self {
Self {
lower: Some(lb),
upper: None,
}
}
pub fn upper(ub: T) -> Self {
Self {
lower: None,
upper: Some(ub),
}
}
pub fn both(lb: T, ub: T) -> Self {
Self {
lower: Some(lb),
upper: Some(ub),
}
}
}
fn project<T: Float>(x: &mut [T], bounds: &[Bounds<T>]) {
for (xi, b) in x.iter_mut().zip(bounds.iter()) {
#[allow(clippy::collapsible_if)]
if let Some(lb) = b.lower {
if *xi < lb {
*xi = lb;
}
}
#[allow(clippy::collapsible_if)]
if let Some(ub) = b.upper {
if *xi > ub {
*xi = ub;
}
}
}
}
pub fn lbfgsb<T, F, G>(
f: F,
grad: G,
x0: &Tensor<T>,
bounds: &[Bounds<T>],
options: &MinimizeOptions<T>,
) -> Result<MinimizeResult<T>>
where
T: Float,
F: Fn(&Tensor<T>) -> T,
G: Fn(&Tensor<T>) -> Tensor<T>,
{
let n = x0.numel();
let mut state = LbfgsbState::new(&f, &grad, x0, bounds, n);
for iter in 0..options.max_iter {
let pg_norm = projected_gradient_norm(&state.x, &state.g, bounds);
if pg_norm < options.gtol {
return Ok(state.into_result(iter, true));
}
let d = lbfgs_direction(&state.g, &state.s_hist, &state.y_hist, &state.rho_hist, n);
let p: Vec<T> = d.iter().map(|&v| -v).collect();
let (alpha, fx_new, ls_evals) = projected_line_search(
&f,
&LineSearchInput {
x: &state.x,
p: &p,
fx: state.fx,
g: &state.g,
bounds,
n,
},
);
state.f_evals += ls_evals;
let (step_norm, f_change) = state.step(&grad, &p, alpha, fx_new, bounds, n);
if step_norm < options.xtol
|| (f_change < options.ftol && f_change < options.ftol * state.fx.abs().max(T::one()))
{
return Ok(state.into_result(iter + 1, true));
}
}
Ok(state.into_result(options.max_iter, false))
}
struct LbfgsbState<T: Float> {
x: Vec<T>,
x_tensor: Tensor<T>,
fx: T,
g: Vec<T>,
g_tensor: Tensor<T>,
f_evals: usize,
g_evals: usize,
s_hist: Vec<Vec<T>>,
y_hist: Vec<Vec<T>>,
rho_hist: Vec<T>,
}
impl<T: Float> LbfgsbState<T> {
fn new<F, G>(f: &F, grad: &G, x0: &Tensor<T>, bounds: &[Bounds<T>], n: usize) -> Self
where
F: Fn(&Tensor<T>) -> T,
G: Fn(&Tensor<T>) -> Tensor<T>,
{
let mut x = x0.as_slice().to_vec();
project(&mut x, bounds);
let x_tensor = Tensor::from_vec(x.clone(), vec![n]).expect("projected x0 length matches n");
let fx = f(&x_tensor);
let g_tensor = grad(&x_tensor);
let g = g_tensor.as_slice().to_vec();
Self {
x,
x_tensor,
fx,
g,
g_tensor,
f_evals: 1,
g_evals: 1,
s_hist: Vec::with_capacity(10),
y_hist: Vec::with_capacity(10),
rho_hist: Vec::with_capacity(10),
}
}
fn step<G: Fn(&Tensor<T>) -> Tensor<T>>(
&mut self,
grad: &G,
p: &[T],
alpha: T,
fx_new: T,
bounds: &[Bounds<T>],
n: usize,
) -> (T, T) {
let mut x_new = vec![T::zero(); n];
for j in 0..n {
x_new[j] = self.x[j] + alpha * p[j];
}
project(&mut x_new, bounds);
let x_new_tensor =
Tensor::from_vec(x_new.clone(), vec![n]).expect("new x length matches n");
let g_new_tensor = grad(&x_new_tensor);
self.g_evals += 1;
let g_new = g_new_tensor.as_slice().to_vec();
let mut sy = T::zero();
let mut s_k = vec![T::zero(); n];
let mut y_k = vec![T::zero(); n];
for j in 0..n {
s_k[j] = x_new[j] - self.x[j];
y_k[j] = g_new[j] - self.g[j];
sy += s_k[j] * y_k[j];
}
let step_norm: T = s_k.iter().map(|&v| v * v).sum::<T>().sqrt();
let f_change = (self.fx - fx_new).abs();
if sy > T::epsilon() {
if self.s_hist.len() == 10 {
self.s_hist.remove(0);
self.y_hist.remove(0);
self.rho_hist.remove(0);
}
self.s_hist.push(s_k);
self.y_hist.push(y_k);
self.rho_hist.push(T::one() / sy);
}
self.x = x_new;
self.x_tensor = x_new_tensor;
self.fx = fx_new;
self.g = g_new;
self.g_tensor = g_new_tensor;
(step_norm, f_change)
}
fn into_result(self, iterations: usize, converged: bool) -> MinimizeResult<T> {
MinimizeResult {
x: self.x_tensor,
f_val: self.fx,
grad: Some(self.g_tensor),
iterations,
f_evals: self.f_evals,
g_evals: self.g_evals,
converged,
}
}
}
fn projected_gradient_norm<T: Float>(x: &[T], g: &[T], bounds: &[Bounds<T>]) -> T {
let mut norm_sq = T::zero();
for i in 0..x.len() {
let gi = g[i];
let at_lower = bounds[i]
.lower
.is_some_and(|lb| x[i] <= lb && gi > T::zero());
let at_upper = bounds[i]
.upper
.is_some_and(|ub| x[i] >= ub && gi < T::zero());
if !at_lower && !at_upper {
norm_sq += gi * gi;
}
}
norm_sq.sqrt()
}
fn lbfgs_direction<T: Float>(
g: &[T],
s_history: &[Vec<T>],
y_history: &[Vec<T>],
rho_history: &[T],
n: usize,
) -> Vec<T> {
let k = s_history.len();
if k == 0 {
return g.to_vec();
}
let mut q = g.to_vec();
let mut alphas = vec![T::zero(); k];
for i in (0..k).rev() {
let mut si_q = T::zero();
for j in 0..n {
si_q += s_history[i][j] * q[j];
}
alphas[i] = rho_history[i] * si_q;
for j in 0..n {
q[j] -= alphas[i] * y_history[i][j];
}
}
let last = k - 1;
let mut sy = T::zero();
let mut yy = T::zero();
for j in 0..n {
sy += s_history[last][j] * y_history[last][j];
yy += y_history[last][j] * y_history[last][j];
}
let gamma = if yy > T::epsilon() { sy / yy } else { T::one() };
let mut r: Vec<T> = q.iter().map(|&v| gamma * v).collect();
for i in 0..k {
let mut yi_r = T::zero();
for j in 0..n {
yi_r += y_history[i][j] * r[j];
}
let beta = rho_history[i] * yi_r;
for j in 0..n {
r[j] += (alphas[i] - beta) * s_history[i][j];
}
}
r
}
struct LineSearchInput<'a, T> {
x: &'a [T],
p: &'a [T],
fx: T,
g: &'a [T],
bounds: &'a [Bounds<T>],
n: usize,
}
fn projected_line_search<T, F>(f: &F, input: &LineSearchInput<'_, T>) -> (T, T, usize)
where
T: Float,
F: Fn(&Tensor<T>) -> T,
{
let x = input.x;
let p = input.p;
let fx = input.fx;
let g = input.g;
let bounds = input.bounds;
let n = input.n;
let c1 = T::from_f64(1e-4);
let shrink = T::from_f64(0.5);
let dg: T = g.iter().zip(p.iter()).map(|(&gi, &pi)| gi * pi).sum();
let mut alpha = T::one();
let mut evals = 0usize;
let max_ls = 20usize;
for _ in 0..max_ls {
let mut x_trial = vec![T::zero(); n];
for j in 0..n {
x_trial[j] = x[j] + alpha * p[j];
}
project(&mut x_trial, bounds);
let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
let f_trial = f(&t);
evals += 1;
if f_trial <= fx + c1 * alpha * dg {
return (alpha, f_trial, evals);
}
alpha *= shrink;
}
let mut x_trial = vec![T::zero(); n];
for j in 0..n {
x_trial[j] = x[j] + alpha * p[j];
}
project(&mut x_trial, bounds);
let t = Tensor::from_vec(x_trial, vec![n]).expect("trial point length matches n");
let f_trial = f(&t);
evals += 1;
(alpha, f_trial, evals)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lbfgsb_unconstrained_quadratic() {
let f = |x: &Tensor<f64>| {
let s = x.as_slice();
s[0] * s[0] + s[1] * s[1]
};
let grad = |x: &Tensor<f64>| {
let s = x.as_slice();
Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
};
let x0 = Tensor::from_vec(vec![5.0, 5.0], vec![2]).unwrap();
let bounds = vec![Bounds::none(), Bounds::none()];
let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
assert!(result.converged);
let s = result.x.as_slice();
assert!(s[0].abs() < 1e-6, "x = {}", s[0]);
assert!(s[1].abs() < 1e-6, "y = {}", s[1]);
}
#[test]
fn test_lbfgsb_active_bounds() {
let f = |x: &Tensor<f64>| {
let s = x.as_slice();
(s[0] - 3.0).powi(2) + (s[1] - 3.0).powi(2)
};
let grad = |x: &Tensor<f64>| {
let s = x.as_slice();
Tensor::from_vec(vec![2.0 * (s[0] - 3.0), 2.0 * (s[1] - 3.0)], vec![2]).unwrap()
};
let x0 = Tensor::from_vec(vec![1.0, 2.0], vec![2]).unwrap();
let bounds = vec![Bounds::lower(1.0), Bounds::lower(2.0)];
let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
assert!(result.converged);
let s = result.x.as_slice();
assert!((s[0] - 3.0).abs() < 1e-4, "x = {}", s[0]);
assert!((s[1] - 3.0).abs() < 1e-4, "y = {}", s[1]);
}
#[test]
fn test_lbfgsb_active_bound_at_solution() {
let f = |x: &Tensor<f64>| {
let s = x.as_slice();
(s[0] + 1.0).powi(2)
};
let grad = |x: &Tensor<f64>| {
let s = x.as_slice();
Tensor::from_vec(vec![2.0 * (s[0] + 1.0)], vec![1]).unwrap()
};
let x0 = Tensor::from_vec(vec![5.0], vec![1]).unwrap();
let bounds = vec![Bounds::lower(0.0)];
let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
assert!(result.converged);
let s = result.x.as_slice();
assert!(s[0].abs() < 1e-6, "x = {}, expected 0.0", s[0]);
}
#[test]
fn test_lbfgsb_all_bounded() {
let f = |x: &Tensor<f64>| {
let s = x.as_slice();
s[0] * s[0] + s[1] * s[1]
};
let grad = |x: &Tensor<f64>| {
let s = x.as_slice();
Tensor::from_vec(vec![2.0 * s[0], 2.0 * s[1]], vec![2]).unwrap()
};
let x0 = Tensor::from_vec(vec![1.5, 1.5], vec![2]).unwrap();
let bounds = vec![Bounds::both(1.0, 2.0), Bounds::both(1.0, 2.0)];
let result = lbfgsb(f, grad, &x0, &bounds, &MinimizeOptions::default()).unwrap();
assert!(result.converged);
let s = result.x.as_slice();
assert!((s[0] - 1.0).abs() < 1e-4, "x = {}", s[0]);
assert!((s[1] - 1.0).abs() < 1e-4, "y = {}", s[1]);
}
}