use crate::primitives::{Matrix, Vector};
use super::line_search::{BacktrackingLineSearch, LineSearch};
use super::{ConvergenceStatus, OptimizationResult, Optimizer};
#[derive(Debug, Clone)]
pub struct DampedNewton {
pub(crate) max_iter: usize,
pub(crate) tol: f32,
pub(crate) epsilon: f32,
pub(crate) line_search: BacktrackingLineSearch,
}
impl DampedNewton {
#[must_use]
pub fn new(max_iter: usize, tol: f32) -> Self {
Self {
max_iter,
tol,
epsilon: 1e-5, line_search: BacktrackingLineSearch::new(1e-4, 0.5, 50),
}
}
#[must_use]
pub fn with_epsilon(mut self, epsilon: f32) -> Self {
self.epsilon = epsilon;
self
}
fn approximate_hessian<G>(&self, grad: &G, x: &Vector<f32>) -> Matrix<f32>
where
G: Fn(&Vector<f32>) -> Vector<f32>,
{
let n = x.len();
let mut h_data = vec![0.0; n * n];
let g0 = grad(x);
for i in 0..n {
let mut x_plus = x.clone();
x_plus[i] += self.epsilon;
let g_plus = grad(&x_plus);
for j in 0..n {
h_data[j * n + i] = (g_plus[j] - g0[j]) / self.epsilon;
}
}
for i in 0..n {
for j in (i + 1)..n {
let avg = f32::midpoint(h_data[i * n + j], h_data[j * n + i]);
h_data[i * n + j] = avg;
h_data[j * n + i] = avg;
}
}
Matrix::from_vec(n, n, h_data).expect("Matrix dimensions should be valid")
}
fn norm(v: &Vector<f32>) -> f32 {
let mut sum = 0.0;
for i in 0..v.len() {
sum += v[i] * v[i];
}
sum.sqrt()
}
}
impl Optimizer for DampedNewton {
fn step(&mut self, _params: &mut Vector<f32>, _gradients: &Vector<f32>) {
panic!(
"Damped Newton does not support stochastic updates (step). Use minimize() for batch optimization."
)
}
fn minimize<F, G>(&mut self, objective: F, gradient: G, x0: Vector<f32>) -> OptimizationResult
where
F: Fn(&Vector<f32>) -> f32,
G: Fn(&Vector<f32>) -> Vector<f32>,
{
let start_time = std::time::Instant::now();
let n = x0.len();
let mut x = x0;
let mut fx = objective(&x);
let mut grad = gradient(&x);
let mut grad_norm = Self::norm(&grad);
for iter in 0..self.max_iter {
if grad_norm < self.tol {
return OptimizationResult {
solution: x,
objective_value: fx,
iterations: iter,
status: ConvergenceStatus::Converged,
gradient_norm: grad_norm,
constraint_violation: 0.0,
elapsed_time: start_time.elapsed(),
};
}
let hessian = self.approximate_hessian(&gradient, &x);
let mut neg_grad = Vector::zeros(n);
for i in 0..n {
neg_grad[i] = -grad[i];
}
let d = if let Ok(direction) = hessian.cholesky_solve(&neg_grad) {
let mut grad_dot_d = 0.0;
for i in 0..n {
grad_dot_d += grad[i] * direction[i];
}
if grad_dot_d < 0.0 {
direction
} else {
let mut sd = Vector::zeros(n);
for i in 0..n {
sd[i] = -grad[i];
}
sd
}
} else {
let mut sd = Vector::zeros(n);
for i in 0..n {
sd[i] = -grad[i];
}
sd
};
let alpha = self.line_search.search(&objective, &gradient, &x, &d);
if alpha < 1e-12 {
return OptimizationResult {
solution: x,
objective_value: fx,
iterations: iter,
status: ConvergenceStatus::Stalled,
gradient_norm: grad_norm,
constraint_violation: 0.0,
elapsed_time: start_time.elapsed(),
};
}
let mut x_new = Vector::zeros(n);
for i in 0..n {
x_new[i] = x[i] + alpha * d[i];
}
let fx_new = objective(&x_new);
let grad_new = gradient(&x_new);
if fx_new.is_nan() || fx_new.is_infinite() {
return OptimizationResult {
solution: x,
objective_value: fx,
iterations: iter,
status: ConvergenceStatus::NumericalError,
gradient_norm: grad_norm,
constraint_violation: 0.0,
elapsed_time: start_time.elapsed(),
};
}
x = x_new;
fx = fx_new;
grad = grad_new;
grad_norm = Self::norm(&grad);
}
OptimizationResult {
solution: x,
objective_value: fx,
iterations: self.max_iter,
status: ConvergenceStatus::MaxIterations,
gradient_norm: grad_norm,
constraint_violation: 0.0,
elapsed_time: start_time.elapsed(),
}
}
fn reset(&mut self) {
}
}
#[cfg(test)]
#[path = "damped_newton_tests.rs"]
mod tests;