use crate::error::OptimizeError;
use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1};
pub trait HessianApproximation: Send + Sync {
fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError>;
fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError>;
fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError>;
fn to_dense(&self) -> Array2<f64>;
fn reset(&mut self);
fn dim(&self) -> usize;
}
pub struct FiniteDiffHessian<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
pub fun: F,
pub x: Vec<f64>,
pub step: f64,
hess: Array2<f64>,
}
impl<F> FiniteDiffHessian<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
pub fn new(fun: F, x0: &[f64], step: f64) -> Result<Self, OptimizeError> {
let x = x0.to_vec();
let hess = compute_fd_hessian(&fun, &x, step)?;
Ok(Self {
fun,
x,
step,
hess,
})
}
pub fn with_default_step(fun: F, x0: &[f64]) -> Result<Self, OptimizeError> {
Self::new(fun, x0, f64::EPSILON.cbrt())
}
pub fn recompute_at(&mut self, x: &[f64]) -> Result<(), OptimizeError> {
self.x = x.to_vec();
self.hess = compute_fd_hessian(&self.fun, &self.x, self.step)?;
Ok(())
}
}
impl<F> HessianApproximation for FiniteDiffHessian<F>
where
F: Fn(&ArrayView1<f64>) -> f64 + Send + Sync,
{
fn update(&mut self, _s: &[f64], _y: &[f64]) -> Result<(), OptimizeError> {
let n = self.x.len();
let mut x_new = self.x.clone();
for i in 0..n {
x_new[i] += _s[i];
}
self.recompute_at(&x_new)
}
fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.hess.nrows();
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut result = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
result[i] += self.hess[[i, j]] * v[j];
}
}
Ok(result)
}
fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.hess.nrows();
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.hess.row(i).to_vec()).collect();
let mut b = v.to_vec();
gaussian_solve(&mut a, &mut b).ok_or_else(|| {
OptimizeError::ComputationError("Hessian is singular; cannot invert".to_string())
})
}
fn to_dense(&self) -> Array2<f64> {
self.hess.clone()
}
fn reset(&mut self) {
let n = self.x.len();
self.hess = Array2::eye(n);
}
fn dim(&self) -> usize {
self.x.len()
}
}
pub struct SR1Update {
pub b: Array2<f64>,
pub n: usize,
pub r: f64,
}
impl SR1Update {
pub fn new(n: usize) -> Self {
Self {
b: Array2::eye(n),
n,
r: 1e-8,
}
}
pub fn with_safeguard(n: usize, r: f64) -> Self {
Self {
b: Array2::eye(n),
n,
r,
}
}
}
impl HessianApproximation for SR1Update {
fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
let n = self.n;
if s.len() != n || y.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch in SR1".to_string()));
}
let bs: Vec<f64> = (0..n)
.map(|i| (0..n).map(|j| self.b[[i, j]] * s[j]).sum())
.collect();
let u: Vec<f64> = (0..n).map(|i| y[i] - bs[i]).collect();
let uts: f64 = (0..n).map(|i| u[i] * s[i]).sum();
let u_norm: f64 = (0..n).map(|i| u[i] * u[i]).sum::<f64>().sqrt();
let s_norm: f64 = (0..n).map(|i| s[i] * s[i]).sum::<f64>().sqrt();
if uts.abs() <= self.r * u_norm * s_norm {
return Ok(());
}
for i in 0..n {
for j in 0..n {
self.b[[i, j]] += u[i] * u[j] / uts;
}
}
Ok(())
}
fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut result = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
result[i] += self.b[[i, j]] * v[j];
}
}
Ok(result)
}
fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.b.row(i).to_vec()).collect();
let mut b = v.to_vec();
gaussian_solve(&mut a, &mut b).ok_or_else(|| {
OptimizeError::ComputationError("SR1 Hessian singular".to_string())
})
}
fn to_dense(&self) -> Array2<f64> {
self.b.clone()
}
fn reset(&mut self) {
self.b = Array2::eye(self.n);
}
fn dim(&self) -> usize {
self.n
}
}
pub struct BFGSUpdate {
pub b: Array2<f64>,
pub h_inv: Array2<f64>,
pub n: usize,
}
impl BFGSUpdate {
pub fn new(n: usize) -> Self {
Self {
b: Array2::eye(n),
h_inv: Array2::eye(n),
n,
}
}
}
impl HessianApproximation for BFGSUpdate {
fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
let n = self.n;
if s.len() != n || y.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch in BFGS".to_string()));
}
let sy: f64 = (0..n).map(|i| s[i] * y[i]).sum();
if sy <= 1e-10 * (0..n).map(|i| y[i] * y[i]).sum::<f64>().sqrt() {
return Ok(());
}
let rho = 1.0 / sy;
let bs: Vec<f64> = (0..n)
.map(|i| (0..n).map(|j| self.b[[i, j]] * s[j]).sum())
.collect();
let s_bs: f64 = (0..n).map(|i| s[i] * bs[i]).sum();
for i in 0..n {
for j in 0..n {
self.b[[i, j]] += rho * y[i] * y[j] - bs[i] * bs[j] / s_bs.max(1e-300);
}
}
let mut h_new = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
let mut val = 0.0f64;
for k in 0..n {
for l in 0..n {
let li = if i == k { 1.0 } else { 0.0 } - rho * s[i] * y[k];
let rj = if j == l { 1.0 } else { 0.0 } - rho * y[l] * s[j];
val += li * self.h_inv[[k, l]] * rj;
}
}
h_new[[i, j]] = val + rho * s[i] * s[j];
}
}
self.h_inv = h_new;
Ok(())
}
fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut result = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
result[i] += self.b[[i, j]] * v[j];
}
}
Ok(result)
}
fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut result = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
result[i] += self.h_inv[[i, j]] * v[j];
}
}
Ok(result)
}
fn to_dense(&self) -> Array2<f64> {
self.b.clone()
}
fn reset(&mut self) {
self.b = Array2::eye(self.n);
self.h_inv = Array2::eye(self.n);
}
fn dim(&self) -> usize {
self.n
}
}
pub struct DFP {
pub c: Array2<f64>,
pub n: usize,
}
impl DFP {
pub fn new(n: usize) -> Self {
Self {
c: Array2::eye(n),
n,
}
}
}
impl HessianApproximation for DFP {
fn update(&mut self, s: &[f64], y: &[f64]) -> Result<(), OptimizeError> {
let n = self.n;
if s.len() != n || y.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch in DFP".to_string()));
}
let sy: f64 = (0..n).map(|i| s[i] * y[i]).sum();
if sy <= 1e-10 {
return Ok(());
}
let cy: Vec<f64> = (0..n)
.map(|i| (0..n).map(|j| self.c[[i, j]] * y[j]).sum())
.collect();
let y_cy: f64 = (0..n).map(|i| y[i] * cy[i]).sum();
for i in 0..n {
for j in 0..n {
self.c[[i, j]] += s[i] * s[j] / sy - cy[i] * cy[j] / y_cy.max(1e-300);
}
}
Ok(())
}
fn multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut a: Vec<Vec<f64>> = (0..n).map(|i| self.c.row(i).to_vec()).collect();
let mut b = v.to_vec();
gaussian_solve(&mut a, &mut b)
.ok_or_else(|| OptimizeError::ComputationError("DFP inverse singular".to_string()))
}
fn inverse_multiply(&self, v: &[f64]) -> Result<Vec<f64>, OptimizeError> {
let n = self.n;
if v.len() != n {
return Err(OptimizeError::ValueError("Dimension mismatch".to_string()));
}
let mut result = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
result[i] += self.c[[i, j]] * v[j];
}
}
Ok(result)
}
fn to_dense(&self) -> Array2<f64> {
self.c.clone()
}
fn reset(&mut self) {
self.c = Array2::eye(self.n);
}
fn dim(&self) -> usize {
self.n
}
}
fn compute_fd_hessian<F>(
fun: &F,
x: &[f64],
step: f64,
) -> Result<Array2<f64>, OptimizeError>
where
F: Fn(&ArrayView1<f64>) -> f64,
{
let n = x.len();
let mut hess = Array2::<f64>::zeros((n, n));
let mut x_tmp = Array1::from(x.to_vec());
let f0 = fun(&x_tmp.view());
for i in 0..n {
let hi = step * (1.0 + x[i].abs());
x_tmp[i] = x[i] + hi;
let fp = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
let fm = fun(&x_tmp.view());
x_tmp[i] = x[i];
if !fp.is_finite() || !fm.is_finite() {
return Err(OptimizeError::ComputationError(
"Non-finite value computing Hessian diagonal".to_string(),
));
}
hess[[i, i]] = (fp - 2.0 * f0 + fm) / (hi * hi);
for j in (i + 1)..n {
let hj = step * (1.0 + x[j].abs());
x_tmp[i] = x[i] + hi;
x_tmp[j] = x[j] + hj;
let fpp = fun(&x_tmp.view());
x_tmp[i] = x[i] + hi;
x_tmp[j] = x[j] - hj;
let fpm = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
x_tmp[j] = x[j] + hj;
let fmp = fun(&x_tmp.view());
x_tmp[i] = x[i] - hi;
x_tmp[j] = x[j] - hj;
let fmm = fun(&x_tmp.view());
x_tmp[i] = x[i];
x_tmp[j] = x[j];
let val = (fpp - fpm - fmp + fmm) / (4.0 * hi * hj);
hess[[i, j]] = val;
hess[[j, i]] = val;
}
}
Ok(hess)
}
fn gaussian_solve(a: &mut Vec<Vec<f64>>, b: &mut Vec<f64>) -> Option<Vec<f64>> {
let n = b.len();
for col in 0..n {
let mut max_row = col;
let mut max_val = a[col][col].abs();
for row in (col + 1)..n {
let v = a[row][col].abs();
if v > max_val {
max_val = v;
max_row = row;
}
}
if max_val < 1e-14 {
return None;
}
a.swap(col, max_row);
b.swap(col, max_row);
let pivot = a[col][col];
for row in (col + 1)..n {
let factor = a[row][col] / pivot;
b[row] -= factor * b[col];
for k in col..n {
let v = a[col][k];
a[row][k] -= factor * v;
}
}
}
let mut x = vec![0.0f64; n];
for i in (0..n).rev() {
let mut sum = b[i];
for j in (i + 1)..n {
sum -= a[i][j] * x[j];
}
if a[i][i].abs() < 1e-14 {
return None;
}
x[i] = sum / a[i][i];
}
Some(x)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
fn quadratic(x: &ArrayView1<f64>) -> f64 {
x[0] * x[0] + 4.0 * x[1] * x[1]
}
#[test]
fn test_bfgs_identity_update() {
let mut bfgs = BFGSUpdate::new(2);
let s = vec![-1.0, -1.0];
let y = vec![-2.0, -8.0];
bfgs.update(&s, &y).expect("BFGS update failed");
let v = vec![1.0, 0.0];
let hv = bfgs.multiply(&v).expect("multiply failed");
assert!(hv[0] > 0.0); }
#[test]
fn test_sr1_update() {
let mut sr1 = SR1Update::new(2);
let s = vec![1.0, 0.0];
let y = vec![2.0, 0.0]; sr1.update(&s, &y).expect("SR1 update failed");
let hv = sr1.multiply(&vec![1.0, 0.0]).expect("multiply failed");
assert_abs_diff_eq!(hv[0], 2.0, epsilon = 1e-8);
}
#[test]
fn test_dfp_inverse_multiply() {
let mut dfp = DFP::new(2);
let s = vec![1.0, 0.0];
let y = vec![2.0, 0.0];
dfp.update(&s, &y).expect("DFP update failed");
let sv = dfp.inverse_multiply(&y).expect("inverse multiply failed");
assert!(sv[0] > 0.0);
}
#[test]
fn test_fd_hessian_diagonal() {
let hess = compute_fd_hessian(&quadratic, &[0.0, 0.0], 1e-5).expect("FD Hessian failed");
assert_abs_diff_eq!(hess[[0, 0]], 2.0, epsilon = 1e-4);
assert_abs_diff_eq!(hess[[1, 1]], 8.0, epsilon = 1e-4);
assert_abs_diff_eq!(hess[[0, 1]], 0.0, epsilon = 1e-4);
}
#[test]
fn test_gaussian_solve() {
let mut a = vec![vec![2.0, 1.0], vec![1.0, 3.0]];
let mut b = vec![5.0, 7.0];
let x = gaussian_solve(&mut a, &mut b).expect("solve failed");
assert_abs_diff_eq!(x[0], 1.6, epsilon = 1e-10);
assert_abs_diff_eq!(x[1], 1.8, epsilon = 1e-10);
}
#[test]
fn test_bfgs_reset() {
let mut bfgs = BFGSUpdate::new(3);
bfgs.update(&[1.0, 0.0, 0.0], &[2.0, 0.0, 0.0])
.expect("update failed");
bfgs.reset();
let v = vec![1.0, 2.0, 3.0];
let hv = bfgs.multiply(&v).expect("multiply failed");
assert_abs_diff_eq!(hv[0], 1.0, epsilon = 1e-12);
assert_abs_diff_eq!(hv[1], 2.0, epsilon = 1e-12);
assert_abs_diff_eq!(hv[2], 3.0, epsilon = 1e-12);
}
}