use crate::DType;
use numr::error::Result;
use numr::ops::{ScalarOps, TensorOps};
use numr::runtime::{Runtime, RuntimeClient};
use numr::tensor::Tensor;
use crate::optimize::error::{OptimizeError, OptimizeResult};
use crate::optimize::minimize::MinimizeOptions;
use super::helpers::{TensorMinimizeResult, backtracking_line_search_tensor};
use super::utils::{SINGULAR_THRESHOLD, finite_difference_gradient, tensor_dot, tensor_norm};
#[derive(Debug, Clone)]
pub struct LbfgsOptions {
pub base: MinimizeOptions,
pub m: usize,
}
impl Default for LbfgsOptions {
fn default() -> Self {
Self {
base: MinimizeOptions::default(),
m: 10,
}
}
}
pub fn lbfgs_impl<R, C, F>(
client: &C,
f: F,
x0: &Tensor<R>,
options: &LbfgsOptions,
) -> OptimizeResult<TensorMinimizeResult<R>>
where
R: Runtime<DType = DType>,
C: TensorOps<R> + ScalarOps<R> + RuntimeClient<R>,
F: Fn(&Tensor<R>) -> Result<f64>,
{
let n = x0.shape()[0];
if n == 0 {
return Err(OptimizeError::InvalidInput {
context: "lbfgs: empty initial guess".to_string(),
});
}
if options.m == 0 {
return Err(OptimizeError::InvalidInput {
context: "lbfgs: history size m must be > 0".to_string(),
});
}
let mut x = x0.clone();
let mut fx = f(&x).map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: initial evaluation - {}", e),
})?;
let mut nfev = 1;
let mut grad =
finite_difference_gradient(client, &f, &x, fx, options.base.eps).map_err(|e| {
OptimizeError::NumericalError {
message: format!("lbfgs: gradient - {}", e),
}
})?;
nfev += n;
let mut s_history: Vec<Tensor<R>> = Vec::with_capacity(options.m);
let mut y_history: Vec<Tensor<R>> = Vec::with_capacity(options.m);
let mut rho_history: Vec<f64> = Vec::with_capacity(options.m);
for iter in 0..options.base.max_iter {
let grad_norm = tensor_norm(client, &grad).map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: grad norm - {}", e),
})?;
if grad_norm < options.base.g_tol {
return Ok(TensorMinimizeResult {
x,
fun: fx,
iterations: iter + 1,
nfev,
converged: true,
});
}
let p = two_loop_recursion(client, &grad, &s_history, &y_history, &rho_history)?;
let (x_new, fx_new, evals) =
backtracking_line_search_tensor(client, &f, &x, &p, fx, &grad)?;
nfev += evals;
let s = client
.sub(&x_new, &x)
.map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: s = x_new - x - {}", e),
})?;
let s_norm = tensor_norm(client, &s).map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: s norm - {}", e),
})?;
if s_norm < options.base.x_tol || (fx - fx_new).abs() < options.base.f_tol {
return Ok(TensorMinimizeResult {
x: x_new,
fun: fx_new,
iterations: iter + 1,
nfev,
converged: true,
});
}
let grad_new = finite_difference_gradient(client, &f, &x_new, fx_new, options.base.eps)
.map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: new gradient - {}", e),
})?;
nfev += n;
let y = client
.sub(&grad_new, &grad)
.map_err(|e| OptimizeError::NumericalError {
message: format!("lbfgs: y = grad_new - grad - {}", e),
})?;
let ys = tensor_dot(client, &y, &s)?;
if ys.abs() > SINGULAR_THRESHOLD {
let rho = 1.0 / ys;
if s_history.len() >= options.m {
s_history.remove(0);
y_history.remove(0);
rho_history.remove(0);
}
s_history.push(s);
y_history.push(y);
rho_history.push(rho);
}
x = x_new;
fx = fx_new;
grad = grad_new;
}
Ok(TensorMinimizeResult {
x,
fun: fx,
iterations: options.base.max_iter,
nfev,
converged: false,
})
}
fn two_loop_recursion<R, C>(
client: &C,
grad: &Tensor<R>,
s_history: &[Tensor<R>],
y_history: &[Tensor<R>],
rho_history: &[f64],
) -> OptimizeResult<Tensor<R>>
where
R: Runtime<DType = DType>,
C: TensorOps<R> + ScalarOps<R> + RuntimeClient<R>,
{
if s_history.is_empty() {
return client
.mul_scalar(grad, -1.0)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: negate grad - {}", e),
});
}
let m = s_history.len();
let mut q = grad.clone();
let mut alpha: Vec<f64> = vec![0.0; m];
for i in (0..m).rev() {
let s_q = tensor_dot(client, &s_history[i], &q)?;
alpha[i] = rho_history[i] * s_q;
let alpha_y = client.mul_scalar(&y_history[i], alpha[i]).map_err(|e| {
OptimizeError::NumericalError {
message: format!("two_loop: alpha * y - {}", e),
}
})?;
q = client
.sub(&q, &alpha_y)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: q - alpha*y - {}", e),
})?;
}
let last_idx = m - 1;
let s_y = tensor_dot(client, &s_history[last_idx], &y_history[last_idx])?;
let y_y = tensor_dot(client, &y_history[last_idx], &y_history[last_idx])?;
let gamma = if y_y.abs() > SINGULAR_THRESHOLD {
s_y / y_y
} else {
1.0
};
let mut r = client
.mul_scalar(&q, gamma)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: H_0 @ q - {}", e),
})?;
for i in 0..m {
let y_r = tensor_dot(client, &y_history[i], &r)?;
let beta = rho_history[i] * y_r;
let coeff = alpha[i] - beta;
let coeff_s =
client
.mul_scalar(&s_history[i], coeff)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: coeff * s - {}", e),
})?;
r = client
.add(&r, &coeff_s)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: r + coeff*s - {}", e),
})?;
}
client
.mul_scalar(&r, -1.0)
.map_err(|e| OptimizeError::NumericalError {
message: format!("two_loop: negate result - {}", e),
})
}