use crate::error::OptimizeError;
use crate::unconstrained::result::OptimizeResult;
use crate::unconstrained::utils::{finite_difference_gradient, finite_difference_hessian};
use crate::unconstrained::Options;
use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
use super::functions_2::{
calculate_predicted_reduction, trust_region_lanczos_subproblem, trust_region_subproblem,
};
use super::types::{TrustRegionConfig, TrustRegionResult};
pub fn cauchy_point(
gradient: &Array1<f64>,
hessian: &Array2<f64>,
trust_radius: f64,
) -> (Array1<f64>, bool) {
let g_norm = gradient.dot(gradient).sqrt();
if g_norm < 1e-15 {
return (Array1::zeros(gradient.len()), false);
}
let g_bg = gradient.dot(&hessian.dot(gradient));
let tau = if g_bg <= 0.0 {
1.0
} else {
let tau_unconstrained = g_norm.powi(3) / (trust_radius * g_bg);
tau_unconstrained.min(1.0)
};
let step_length = tau * trust_radius / g_norm;
let p_cauchy = gradient * (-step_length);
let hits_boundary = (tau - 1.0).abs() < 1e-12;
(p_cauchy, hits_boundary)
}
pub fn dogleg_step(
gradient: &Array1<f64>,
hessian: &Array2<f64>,
trust_radius: f64,
) -> (Array1<f64>, bool) {
let n = gradient.len();
let g_norm = gradient.dot(gradient).sqrt();
if g_norm < 1e-15 {
return (Array1::zeros(n), false);
}
let newton_step = solve_symmetric_system(hessian, &(-gradient.clone()));
let newton_valid = newton_step.iter().all(|v| v.is_finite());
if newton_valid {
let newton_norm = newton_step.dot(&newton_step).sqrt();
if newton_norm <= trust_radius {
return (newton_step, false);
}
}
let (p_cauchy, _) = cauchy_point(gradient, hessian, trust_radius);
let cauchy_norm = p_cauchy.dot(&p_cauchy).sqrt();
if cauchy_norm >= trust_radius {
let scale = trust_radius / cauchy_norm;
return (p_cauchy * scale, true);
}
if !newton_valid {
if cauchy_norm > 1e-15 {
let scale = trust_radius / cauchy_norm;
return (p_cauchy * scale, true);
}
return (Array1::zeros(n), false);
}
let diff = &newton_step - &p_cauchy;
let diff_norm_sq = diff.dot(&diff);
let cauchy_dot_diff = p_cauchy.dot(&diff);
let cauchy_norm_sq = p_cauchy.dot(&p_cauchy);
let a_coeff = diff_norm_sq;
let b_coeff = 2.0 * cauchy_dot_diff;
let c_coeff = cauchy_norm_sq - trust_radius * trust_radius;
let discriminant = b_coeff * b_coeff - 4.0 * a_coeff * c_coeff;
let discriminant = discriminant.max(0.0);
let tau = if a_coeff.abs() < 1e-15 {
0.0
} else {
(-b_coeff + discriminant.sqrt()) / (2.0 * a_coeff)
};
let tau = tau.clamp(0.0, 1.0);
let step = &p_cauchy + &(&diff * tau);
(step, true)
}
fn solve_symmetric_system(hess: &Array2<f64>, rhs: &Array1<f64>) -> Array1<f64> {
let n = hess.nrows();
let regularization_levels = [0.0, 1e-12, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2];
for ® in ®ularization_levels {
let mut l = Array2::<f64>::zeros((n, n));
let mut success = true;
for i in 0..n {
for j in 0..=i {
let mut sum = hess[[i, j]];
if i == j {
sum += reg;
}
for k in 0..j {
sum -= l[[i, k]] * l[[j, k]];
}
if i == j {
if sum <= 0.0 {
success = false;
break;
}
l[[i, j]] = sum.sqrt();
} else {
if l[[j, j]].abs() < 1e-15 {
success = false;
break;
}
l[[i, j]] = sum / l[[j, j]];
}
}
if !success {
break;
}
}
if !success {
continue;
}
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let mut sum = rhs[i];
for j in 0..i {
sum -= l[[i, j]] * y[j];
}
if l[[i, i]].abs() < 1e-15 {
success = false;
break;
}
y[i] = sum / l[[i, i]];
}
if !success {
continue;
}
let mut x = Array1::<f64>::zeros(n);
for i in (0..n).rev() {
let mut sum = y[i];
for j in (i + 1)..n {
sum -= l[[j, i]] * x[j];
}
if l[[i, i]].abs() < 1e-15 {
success = false;
break;
}
x[i] = sum / l[[i, i]];
}
if success && x.iter().all(|v| v.is_finite()) {
return x;
}
}
let max_diag = (0..n).map(|i| hess[[i, i]].abs()).fold(1.0, f64::max);
rhs / max_diag
}
pub fn solve_trust_subproblem(
gradient: &Array1<f64>,
hessian: &Array2<f64>,
trust_radius: f64,
) -> (Array1<f64>, f64, bool) {
let (step, hits_boundary) = dogleg_step(gradient, hessian, trust_radius);
let g_dot_p = gradient.dot(&step);
let p_bp = step.dot(&hessian.dot(&step));
let predicted_reduction = -(g_dot_p + 0.5 * p_bp);
(step, predicted_reduction, hits_boundary)
}
pub fn trust_region_minimize<F, G, H>(
mut objective: F,
gradient_fn: Option<G>,
hessian_fn: Option<H>,
x0: Array1<f64>,
config: Option<TrustRegionConfig>,
) -> Result<TrustRegionResult, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> f64,
G: Fn(&ArrayView1<f64>) -> Array1<f64>,
H: Fn(&ArrayView1<f64>) -> Array2<f64>,
{
let config = config.unwrap_or_default();
config.validate()?;
let n = x0.len();
if n == 0 {
return Err(OptimizeError::ValueError(
"Initial guess must have at least one element".to_string(),
));
}
let mut x = x0;
let mut trust_radius = config.initial_radius;
let mut n_fev: usize = 0;
let mut n_gev: usize = 0;
let mut n_hev: usize = 0;
let mut f_val = objective(&x.view());
n_fev += 1;
if !f_val.is_finite() {
return Err(OptimizeError::ComputationError(
"Initial function value is not finite".to_string(),
));
}
let mut grad = match &gradient_fn {
Some(gf) => {
n_gev += 1;
gf(&x.view())
}
None => {
let g = finite_difference_gradient(&mut objective, &x.view(), config.eps)?;
n_fev += 2 * n;
g
}
};
let mut converged = false;
let mut n_iter = 0;
let mut message = String::new();
for iter in 0..config.max_iter {
n_iter = iter + 1;
let grad_norm = grad.dot(&grad).sqrt();
if grad_norm < config.tolerance {
converged = true;
message = format!(
"Converged: gradient norm {:.2e} < tolerance {:.2e}",
grad_norm, config.tolerance
);
break;
}
let hess = match &hessian_fn {
Some(hf) => {
n_hev += 1;
hf(&x.view())
}
None => {
let h = finite_difference_hessian(&mut objective, &x.view(), config.eps)?;
n_fev += 1 + 4 * n * (n + 1) / 2;
h
}
};
let (step, predicted_reduction, hits_boundary) =
solve_trust_subproblem(&grad, &hess, trust_radius);
let x_trial = &x + &step;
let f_trial = objective(&x_trial.view());
n_fev += 1;
let actual_reduction = f_val - f_trial;
let ratio = if predicted_reduction.abs() < 1e-15 {
if actual_reduction.abs() < 1e-15 {
1.0
} else if actual_reduction > 0.0 {
1.0
} else {
0.0
}
} else {
actual_reduction / predicted_reduction
};
if ratio < config.eta1 {
trust_radius *= config.gamma1;
} else if ratio > config.eta2 && hits_boundary {
trust_radius = (trust_radius * config.gamma2).min(config.max_radius);
}
if ratio > config.eta1 {
x = x_trial;
f_val = f_trial;
grad = match &gradient_fn {
Some(gf) => {
n_gev += 1;
gf(&x.view())
}
None => {
let g = finite_difference_gradient(&mut objective, &x.view(), config.eps)?;
n_fev += 2 * n;
g
}
};
if actual_reduction.abs() < config.ftol * (1.0 + f_val.abs()) {
converged = true;
message = format!(
"Converged: function change {:.2e} < ftol {:.2e}",
actual_reduction.abs(),
config.ftol * (1.0 + f_val.abs())
);
break;
}
}
if trust_radius < config.min_radius {
converged = true;
message = format!(
"Converged: trust radius {:.2e} < minimum {:.2e}",
trust_radius, config.min_radius
);
break;
}
}
if !converged {
message = format!("Maximum iterations ({}) reached", config.max_iter);
}
let grad_norm = grad.dot(&grad).sqrt();
Ok(TrustRegionResult {
x,
f_val,
n_iter,
converged,
trust_radius_final: trust_radius,
n_fev,
n_gev,
n_hev,
grad_norm,
message,
})
}
#[allow(dead_code)]
pub fn minimize_trust_ncg<F, S>(
mut fun: F,
x0: Array1<f64>,
options: &Options,
) -> Result<OptimizeResult<S>, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> S,
S: Into<f64> + Clone,
{
let ftol = options.ftol;
let gtol = options.gtol;
let max_iter = options.max_iter;
let eps = options.eps;
let initial_trust_radius = options.trust_radius.unwrap_or(1.0);
let max_trust_radius = options.max_trust_radius.unwrap_or(1000.0);
let min_trust_radius = options.min_trust_radius.unwrap_or(1e-10);
let eta = options.trust_eta.unwrap_or(1e-4);
let n = x0.len();
let mut x = x0.to_owned();
let mut nfev = 0;
let mut f = fun(&x.view()).into();
nfev += 1;
let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
nfev += n;
let mut trust_radius = initial_trust_radius;
let mut iter = 0;
while iter < max_iter {
let g_norm = g.dot(&g).sqrt();
if g_norm < gtol {
break;
}
let f_old = f;
let hess = finite_difference_hessian(&mut fun, &x.view(), eps)?;
nfev += n * n;
let (step, hits_boundary) = trust_region_subproblem(&g, &hess, trust_radius);
let pred_reduction = calculate_predicted_reduction(&g, &hess, &step);
let x_new = &x + &step;
let f_new = fun(&x_new.view()).into();
nfev += 1;
let actual_reduction = f - f_new;
let ratio = if pred_reduction.abs() < 1e-8 {
1.0
} else {
actual_reduction / pred_reduction
};
if ratio < 0.25 {
trust_radius *= 0.25;
} else if ratio > 0.75 && hits_boundary {
trust_radius = f64::min(2.0 * trust_radius, max_trust_radius);
}
if ratio > eta {
x = x_new;
f = f_new;
g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
nfev += n;
}
if trust_radius < min_trust_radius {
break;
}
if ratio > eta && (f_old - f).abs() < ftol * (1.0 + f.abs()) {
break;
}
iter += 1;
}
let final_fun = fun(&x.view());
Ok(OptimizeResult {
x,
fun: final_fun,
nit: iter,
func_evals: nfev,
nfev,
success: iter < max_iter,
message: if iter < max_iter {
"Optimization terminated successfully.".to_string()
} else {
"Maximum iterations reached.".to_string()
},
jacobian: Some(g),
hessian: None,
})
}
#[allow(dead_code)]
pub fn minimize_trust_krylov<F, S>(
mut fun: F,
x0: Array1<f64>,
options: &Options,
) -> Result<OptimizeResult<S>, OptimizeError>
where
F: FnMut(&ArrayView1<f64>) -> S,
S: Into<f64> + Clone,
{
let ftol = options.ftol;
let gtol = options.gtol;
let max_iter = options.max_iter;
let eps = options.eps;
let initial_trust_radius = options.trust_radius.unwrap_or(1.0);
let max_trust_radius = options.max_trust_radius.unwrap_or(1000.0);
let min_trust_radius = options.min_trust_radius.unwrap_or(1e-10);
let eta = options.trust_eta.unwrap_or(1e-4);
let n = x0.len();
let mut x = x0.to_owned();
let mut nfev = 0;
let mut f = fun(&x.view()).into();
nfev += 1;
let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
nfev += n;
let mut trust_radius = initial_trust_radius;
let mut iter = 0;
while iter < max_iter {
let g_norm = g.dot(&g).sqrt();
if g_norm < gtol {
break;
}
let f_old = f;
let hess = finite_difference_hessian(&mut fun, &x.view(), eps)?;
nfev += n * n;
let (step, hits_boundary) = trust_region_lanczos_subproblem(&g, &hess, trust_radius);
let pred_reduction = calculate_predicted_reduction(&g, &hess, &step);
let x_new = &x + &step;
let f_new = fun(&x_new.view()).into();
nfev += 1;
let actual_reduction = f - f_new;
let ratio = if pred_reduction.abs() < 1e-8 {
1.0
} else {
actual_reduction / pred_reduction
};
if ratio < 0.25 {
trust_radius *= 0.25;
} else if ratio > 0.75 && hits_boundary {
trust_radius = f64::min(2.0 * trust_radius, max_trust_radius);
}
if ratio > eta {
x = x_new;
f = f_new;
g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
nfev += n;
}
if trust_radius < min_trust_radius {
break;
}
if ratio > eta && (f_old - f).abs() < ftol * (1.0 + f.abs()) {
break;
}
iter += 1;
}
let final_fun = fun(&x.view());
Ok(OptimizeResult {
x,
fun: final_fun,
nit: iter,
func_evals: nfev,
nfev,
success: iter < max_iter,
message: if iter < max_iter {
"Optimization terminated successfully.".to_string()
} else {
"Maximum iterations reached.".to_string()
},
jacobian: Some(g),
hessian: None,
})
}