use crate::linalg::cholesky::cholesky_in_place;
use crate::linalg::lu::{lu_in_place, lu_solve};
use crate::matrix::vector::Vector;
use crate::traits::FloatScalar;
use crate::Matrix;
use super::{LeastSquaresResult, OptimError};
#[derive(Debug, Clone, Copy)]
pub struct LmSettings<T> {
pub grad_tol: T,
pub x_tol: T,
pub f_tol: T,
pub max_iter: usize,
pub mu_init: T,
pub mu_increase: T,
pub mu_decrease: T,
pub mu_min: T,
pub mu_max: T,
}
impl Default for LmSettings<f64> {
fn default() -> Self {
Self {
grad_tol: 1e-8,
x_tol: 1e-12,
f_tol: 1e-12,
max_iter: 100,
mu_init: 1e-3,
mu_increase: 10.0,
mu_decrease: 0.1,
mu_min: 1e-10,
mu_max: 1e10,
}
}
}
impl Default for LmSettings<f32> {
fn default() -> Self {
Self {
grad_tol: 1e-4,
x_tol: 1e-6,
f_tol: 1e-6,
max_iter: 100,
mu_init: 1e-3,
mu_increase: 10.0,
mu_decrease: 0.1,
mu_min: 1e-10,
mu_max: 1e10,
}
}
}
pub fn least_squares_lm<T: FloatScalar, const M: usize, const N: usize>(
mut residual: impl FnMut(&Vector<T, N>) -> Vector<T, M>,
mut jacobian: impl FnMut(&Vector<T, N>) -> Matrix<T, M, N>,
x0: &Vector<T, N>,
settings: &LmSettings<T>,
) -> Result<LeastSquaresResult<T, N>, OptimError> {
let mut x = *x0;
let mut r = residual(&x);
let mut r_evals = 1usize;
let mut j_evals = 0usize;
let half = T::one() / (T::one() + T::one());
let mut cost = r.dot(&r) * half;
let mut mu = settings.mu_init;
for iter in 0..settings.max_iter {
let j = jacobian(&x);
j_evals += 1;
let jtj = j.transpose() * j;
let g = j.transpose() * r;
let g_norm = g.norm();
if g_norm < settings.grad_tol {
return Ok(LeastSquaresResult {
x,
cost,
grad_norm: g_norm,
iterations: iter,
r_evals,
j_evals,
});
}
let neg_g = -g;
let delta = solve_damped(&jtj, mu, &neg_g)?;
let x_new = x + delta;
let r_new = residual(&x_new);
r_evals += 1;
let cost_new = r_new.dot(&r_new) * half;
let predicted = delta.dot(&(delta * mu - g));
let eps = T::epsilon();
if predicted > T::zero() && predicted.abs() >= eps * cost.abs() {
let actual = cost - cost_new;
let gain_ratio = actual / predicted;
if gain_ratio > T::zero() {
x = x_new;
r = r_new;
cost = cost_new;
mu = (mu * settings.mu_decrease).max(settings.mu_min);
if delta.norm() < settings.x_tol * (T::one() + x.norm()) {
return Ok(LeastSquaresResult {
x,
cost,
grad_norm: g_norm,
iterations: iter + 1,
r_evals,
j_evals,
});
}
if actual.abs() < settings.f_tol * (T::one() + cost.abs()) {
return Ok(LeastSquaresResult {
x,
cost,
grad_norm: g_norm,
iterations: iter + 1,
r_evals,
j_evals,
});
}
} else {
mu = (mu * settings.mu_increase).min(settings.mu_max);
}
} else {
mu = (mu * settings.mu_increase).min(settings.mu_max);
}
}
Err(OptimError::MaxIterations)
}
fn solve_damped<T: FloatScalar, const N: usize>(
a: &Matrix<T, N, N>,
mu: T,
b: &Vector<T, N>,
) -> Result<Vector<T, N>, OptimError> {
let mut damped = *a;
for i in 0..N {
damped[(i, i)] = damped[(i, i)] + mu;
}
let mut chol = damped;
if cholesky_in_place(&mut chol).is_ok() {
let mut y = [T::zero(); N];
let mut b_flat = [T::zero(); N];
for i in 0..N {
b_flat[i] = b[i];
}
crate::linalg::cholesky::forward_substitute(&chol, &b_flat, &mut y);
let mut x_flat = [T::zero(); N];
crate::linalg::cholesky::back_substitute_lt(&chol, &y, &mut x_flat);
return Ok(Vector::from_array(x_flat));
}
let mut lu = damped;
let mut perm = [0usize; N];
lu_in_place(&mut lu, &mut perm).map_err(|_| OptimError::Singular)?;
let mut b_flat = [T::zero(); N];
for i in 0..N {
b_flat[i] = b[i];
}
let mut x_flat = [T::zero(); N];
lu_solve(&lu, &perm, &b_flat, &mut x_flat);
Ok(Vector::from_array(x_flat))
}