use crate::error::OptimizeError;
use crate::unconstrained::Bounds;
use scirs2_core::ndarray::{Array1, ArrayView1};
#[derive(Debug, Clone)]
pub struct LineSearchResult {
pub alpha: f64,
pub f_val: f64,
pub grad: Option<Array1<f64>>,
pub derphi: Option<f64>,
pub n_fev: usize,
pub n_gev: usize,
pub success: bool,
pub message: String,
}
#[derive(Debug, Clone)]
pub struct StrongWolfeConfig {
pub c1: f64,
pub c2: f64,
pub alpha_init: f64,
pub alpha_max: f64,
pub max_fev: usize,
pub alpha_min: f64,
}
impl Default for StrongWolfeConfig {
fn default() -> Self {
Self {
c1: 1e-4,
c2: 0.9,
alpha_init: 1.0,
alpha_max: 1e10,
max_fev: 100,
alpha_min: 1e-14,
}
}
}
pub struct StrongWolfe {
pub config: StrongWolfeConfig,
}
impl StrongWolfe {
pub fn new(config: StrongWolfeConfig) -> Self {
Self { config }
}
pub fn default_search() -> Self {
Self {
config: StrongWolfeConfig::default(),
}
}
pub fn search<Phi, DPhi>(
&self,
mut phi: Phi,
mut dphi: DPhi,
phi0: f64,
dphi0: f64,
) -> Result<LineSearchResult, OptimizeError>
where
Phi: FnMut(f64) -> f64,
DPhi: FnMut(f64) -> f64,
{
let cfg = &self.config;
if dphi0 >= 0.0 {
return Err(OptimizeError::ValueError(
"Initial directional derivative must be negative (descent direction required)"
.to_string(),
));
}
let mut n_fev = 1usize; let mut n_gev = 1usize;
let mut alpha_prev = 0.0f64;
let mut alpha = cfg.alpha_init.min(cfg.alpha_max);
let mut phi_prev = phi0;
let mut dphi_prev = dphi0;
let _ = dphi_prev;
for _iter in 0..cfg.max_fev {
let phi_a = phi(alpha);
n_fev += 1;
if phi_a > phi0 + cfg.c1 * alpha * dphi0 || (phi_a >= phi_prev && _iter > 0) {
let (result_alpha, result_phi, nf, ng) = self.zoom(
&mut phi,
&mut dphi,
alpha_prev,
phi_prev,
alpha,
phi_a,
phi0,
dphi0,
);
n_fev += nf;
n_gev += ng;
return Ok(LineSearchResult {
alpha: result_alpha,
f_val: result_phi,
grad: None,
derphi: None,
n_fev,
n_gev,
success: true,
message: "Strong Wolfe conditions satisfied (zoom from upper bracket)"
.to_string(),
});
}
let dphi_a = dphi(alpha);
n_gev += 1;
if dphi_a.abs() <= -cfg.c2 * dphi0 {
return Ok(LineSearchResult {
alpha,
f_val: phi_a,
grad: None,
derphi: Some(dphi_a),
n_fev,
n_gev,
success: true,
message: "Strong Wolfe conditions satisfied".to_string(),
});
}
if dphi_a >= 0.0 {
let (result_alpha, result_phi, nf, ng) = self.zoom(
&mut phi,
&mut dphi,
alpha,
phi_a,
alpha_prev,
phi_prev,
phi0,
dphi0,
);
n_fev += nf;
n_gev += ng;
return Ok(LineSearchResult {
alpha: result_alpha,
f_val: result_phi,
grad: None,
derphi: None,
n_fev,
n_gev,
success: true,
message: "Strong Wolfe conditions satisfied (zoom from positive derivative)"
.to_string(),
});
}
alpha_prev = alpha;
phi_prev = phi_a;
dphi_prev = dphi_a;
let alpha_new = (alpha + cfg.alpha_max) * 0.5;
alpha = cubic_min_bracket(alpha_prev, phi_prev, dphi_a, alpha_new, phi_a)
.unwrap_or(alpha_new)
.clamp(alpha * 1.1, cfg.alpha_max);
}
let f_alpha = phi(alpha);
n_fev += 1;
Ok(LineSearchResult {
alpha,
f_val: f_alpha,
grad: None,
derphi: None,
n_fev,
n_gev,
success: false,
message: "Strong Wolfe search did not converge within max evaluations".to_string(),
})
}
fn zoom<Phi, DPhi>(
&self,
phi: &mut Phi,
dphi: &mut DPhi,
alpha_lo: f64,
phi_lo: f64,
alpha_hi: f64,
phi_hi: f64,
phi0: f64,
dphi0: f64,
) -> (f64, f64, usize, usize)
where
Phi: FnMut(f64) -> f64,
DPhi: FnMut(f64) -> f64,
{
let cfg = &self.config;
let mut n_fev = 0usize;
let mut n_gev = 0usize;
let mut a_lo = alpha_lo;
let mut f_lo = phi_lo;
let mut a_hi = alpha_hi;
let mut f_hi = phi_hi;
for _ in 0..cfg.max_fev {
let alpha_j = cubic_min_bracket(a_lo, f_lo, dphi(a_lo), a_hi, f_hi)
.unwrap_or((a_lo + a_hi) * 0.5)
.clamp(
a_lo.min(a_hi) + 1e-10,
a_lo.max(a_hi) - 1e-10,
);
n_gev += 1;
let phi_j = phi(alpha_j);
n_fev += 1;
if phi_j > phi0 + cfg.c1 * alpha_j * dphi0 || phi_j >= f_lo {
a_hi = alpha_j;
f_hi = phi_j;
} else {
let dphi_j = dphi(alpha_j);
n_gev += 1;
if dphi_j.abs() <= -cfg.c2 * dphi0 {
return (alpha_j, phi_j, n_fev, n_gev);
}
if dphi_j * (a_hi - a_lo) >= 0.0 {
a_hi = a_lo;
f_hi = f_lo;
}
a_lo = alpha_j;
f_lo = phi_j;
}
if (a_hi - a_lo).abs() < cfg.alpha_min {
break;
}
}
(a_lo, f_lo, n_fev, n_gev)
}
}
fn cubic_min_bracket(a: f64, fa: f64, dfa: f64, b: f64, fb: f64) -> Option<f64> {
let d1 = dfa + (fb - fa) / (b - a) * 2.0 - (fb - fa) / (b - a);
let ab = b - a;
let d = dfa;
let d2 = 3.0 * (fa - fb) / ab + d;
let discr = d2 * d2 - d * ((fb - fa) / ab * 3.0 - d2);
let _ = d1;
if discr < 0.0 {
return None;
}
let t = d2 - discr.sqrt();
let denom = (2.0 * d2 - dfa / ab).abs();
if denom < 1e-300 {
return None;
}
let alpha = a + ab * t / denom;
Some(alpha)
}
#[derive(Debug, Clone)]
pub struct HagerZhangConfig {
pub delta: f64,
pub sigma: f64,
pub epsilon: f64,
pub theta: f64,
pub gamma: f64,
pub max_fev: usize,
pub alpha_init: f64,
}
impl Default for HagerZhangConfig {
fn default() -> Self {
Self {
delta: 0.1,
sigma: 0.9,
epsilon: 1e-6,
theta: 0.5,
gamma: 0.66,
max_fev: 50,
alpha_init: 1.0,
}
}
}
pub struct HagerZhang {
pub config: HagerZhangConfig,
}
impl HagerZhang {
pub fn new(config: HagerZhangConfig) -> Self {
Self { config }
}
pub fn default_search() -> Self {
Self {
config: HagerZhangConfig::default(),
}
}
pub fn search<Phi, DPhi>(
&self,
mut phi: Phi,
mut dphi: DPhi,
phi0: f64,
dphi0: f64,
) -> Result<LineSearchResult, OptimizeError>
where
Phi: FnMut(f64) -> f64,
DPhi: FnMut(f64) -> f64,
{
if dphi0 >= 0.0 {
return Err(OptimizeError::ValueError(
"Initial directional derivative must be negative".to_string(),
));
}
let cfg = &self.config;
let mut n_fev = 1usize;
let mut n_gev = 1usize;
let c = cfg.epsilon * phi0.abs().max(1.0);
let wolfe1 = |pa: f64, a: f64| pa <= phi0 + cfg.delta * a * dphi0;
let approx_wolfe1 =
|pa: f64, a: f64| pa <= phi0 + c && cfg.delta * dphi0 >= (phi0 - pa) / a.max(1e-300);
let wolfe2 = |da: f64| da.abs() <= cfg.sigma * dphi0.abs();
let approx_wolfe2 =
|da: f64| (2.0 * cfg.delta - 1.0) * dphi0 <= da && da <= cfg.sigma * dphi0;
let _ = approx_wolfe1;
let mut a = 0.0f64;
let mut b = cfg.alpha_init;
let mut fa = phi0;
let mut fb = phi(b);
n_fev += 1;
let mut db = dphi(b);
n_gev += 1;
if wolfe1(fb, b) && wolfe2(db) {
return Ok(LineSearchResult {
alpha: b,
f_val: fb,
grad: None,
derphi: Some(db),
n_fev,
n_gev,
success: true,
message: "Hager-Zhang: initial step satisfies Wolfe conditions".to_string(),
});
}
for _ in 0..cfg.max_fev {
let mid = a + cfg.theta * (b - a);
let fm = phi(mid);
n_fev += 1;
let dm = dphi(mid);
n_gev += 1;
if wolfe1(fm, mid) && wolfe2(dm) {
return Ok(LineSearchResult {
alpha: mid,
f_val: fm,
grad: None,
derphi: Some(dm),
n_fev,
n_gev,
success: true,
message: "Hager-Zhang: bisection converged".to_string(),
});
}
if approx_wolfe2(dm) && fm <= phi0 + c {
return Ok(LineSearchResult {
alpha: mid,
f_val: fm,
grad: None,
derphi: Some(dm),
n_fev,
n_gev,
success: true,
message: "Hager-Zhang: approximate Wolfe satisfied".to_string(),
});
}
if dm < 0.0 && fm <= phi0 + c {
a = mid;
fa = fm;
} else {
b = mid;
fb = fm;
db = dm;
}
if (b - a).abs() < 1e-14 {
break;
}
let _ = fa;
let _ = fb;
let _ = db;
}
let alpha_best = a + cfg.theta * (b - a);
let f_best = phi(alpha_best);
n_fev += 1;
Ok(LineSearchResult {
alpha: alpha_best,
f_val: f_best,
grad: None,
derphi: None,
n_fev,
n_gev,
success: false,
message: "Hager-Zhang: max evaluations reached".to_string(),
})
}
}
pub struct SafeguardedPowell {
pub c1: f64,
pub max_fev: usize,
pub bracket_tol: f64,
pub alpha_init: f64,
}
impl Default for SafeguardedPowell {
fn default() -> Self {
Self {
c1: 1e-4,
max_fev: 50,
bracket_tol: 1e-14,
alpha_init: 1.0,
}
}
}
impl SafeguardedPowell {
pub fn new(c1: f64, max_fev: usize, alpha_init: f64) -> Self {
Self {
c1,
max_fev,
bracket_tol: 1e-14,
alpha_init,
}
}
pub fn search<Phi, DPhi>(
&self,
mut phi: Phi,
mut dphi: DPhi,
phi0: f64,
dphi0: f64,
) -> Result<LineSearchResult, OptimizeError>
where
Phi: FnMut(f64) -> f64,
DPhi: FnMut(f64) -> f64,
{
if dphi0 >= 0.0 {
return Err(OptimizeError::ValueError(
"Initial directional derivative must be negative".to_string(),
));
}
let mut n_fev = 1usize;
let mut n_gev = 1usize;
let mut alpha = self.alpha_init;
let mut alpha_lo = 0.0f64;
let mut f_lo = phi0;
let mut d_lo = dphi0;
for _ in 0..self.max_fev {
let fa = phi(alpha);
n_fev += 1;
if fa <= phi0 + self.c1 * alpha * dphi0 {
let da = dphi(alpha);
n_gev += 1;
return Ok(LineSearchResult {
alpha,
f_val: fa,
grad: None,
derphi: Some(da),
n_fev,
n_gev,
success: true,
message: "Safeguarded Powell: sufficient decrease satisfied".to_string(),
});
}
let alpha_new =
cubic_interpolate_safeguarded(alpha_lo, f_lo, d_lo, alpha, fa, self.bracket_tol);
if fa < f_lo {
alpha_lo = alpha;
f_lo = fa;
d_lo = dphi(alpha_new.min(alpha));
n_gev += 1;
}
if (alpha_new - alpha_lo).abs() < self.bracket_tol {
break;
}
alpha = alpha_new;
}
Ok(LineSearchResult {
alpha,
f_val: phi(alpha),
grad: None,
derphi: None,
n_fev: n_fev + 1,
n_gev,
success: false,
message: "Safeguarded Powell: max evaluations reached".to_string(),
})
}
}
fn cubic_interpolate_safeguarded(
a: f64,
fa: f64,
da: f64,
b: f64,
fb: f64,
tol: f64,
) -> f64 {
let ab = b - a;
if ab.abs() < tol {
return (a + b) * 0.5;
}
let d1 = da * ab;
let d2 = fb - fa;
let d3 = d2 * 3.0 / ab - da;
let d4 = d3 * d3 - da * (d2 * 3.0 / ab - da);
if d4 < 0.0 {
return (a + b) * 0.5;
}
let sqrt_d4 = d4.sqrt();
let t = 1.0 - (d3 + sqrt_d4) / (d3 + sqrt_d4 + d1 / ab);
let alpha_cubic = a + t.clamp(0.0, 1.0) * ab;
alpha_cubic.clamp(a + tol.abs(), b - tol.abs())
}
#[derive(Debug, Clone)]
pub struct BacktrackingArmijo {
pub c1: f64,
pub rho: f64,
pub alpha_init: f64,
pub alpha_min: f64,
pub max_steps: usize,
pub bounds: Option<Bounds>,
}
impl Default for BacktrackingArmijo {
fn default() -> Self {
Self {
c1: 1e-4,
rho: 0.5,
alpha_init: 1.0,
alpha_min: 1e-14,
max_steps: 60,
bounds: None,
}
}
}
impl BacktrackingArmijo {
pub fn new(c1: f64, rho: f64, alpha_init: f64, bounds: Option<Bounds>) -> Self {
Self {
c1,
rho,
alpha_init,
alpha_min: 1e-14,
max_steps: 60,
bounds,
}
}
pub fn search<F>(
&self,
fun: &mut F,
x: &ArrayView1<f64>,
d: &ArrayView1<f64>,
f0: f64,
slope: f64,
) -> LineSearchResult
where
F: FnMut(&ArrayView1<f64>) -> f64,
{
let mut alpha = self.alpha_init;
let n = x.len();
let mut n_fev = 0usize;
if slope >= 0.0 {
return LineSearchResult {
alpha: 1e-14,
f_val: f0,
grad: None,
derphi: None,
n_fev: 0,
n_gev: 0,
success: false,
message: "Backtracking: non-descent direction".to_string(),
};
}
for _ in 0..self.max_steps {
let mut x_new = Array1::zeros(n);
for i in 0..n {
x_new[i] = x[i] + alpha * d[i];
}
if let Some(ref b) = self.bounds {
if let Some(s) = x_new.as_slice_mut() {
b.project(s);
}
}
n_fev += 1;
let f_new = fun(&x_new.view());
if f_new <= f0 + self.c1 * alpha * slope {
return LineSearchResult {
alpha,
f_val: f_new,
grad: None,
derphi: None,
n_fev,
n_gev: 0,
success: true,
message: "Armijo condition satisfied".to_string(),
};
}
alpha *= self.rho;
if alpha < self.alpha_min {
return LineSearchResult {
alpha: self.alpha_min,
f_val: f_new,
grad: None,
derphi: None,
n_fev,
n_gev: 0,
success: false,
message: "Backtracking: alpha below minimum".to_string(),
};
}
}
let mut x_last = Array1::zeros(n);
for i in 0..n {
x_last[i] = x[i] + alpha * d[i];
}
n_fev += 1;
let f_last = fun(&x_last.view());
LineSearchResult {
alpha,
f_val: f_last,
grad: None,
derphi: None,
n_fev,
n_gev: 0,
success: false,
message: "Backtracking: max steps reached".to_string(),
}
}
pub fn search_scalar<Phi>(
&self,
mut phi: Phi,
phi0: f64,
dphi0: f64,
) -> LineSearchResult
where
Phi: FnMut(f64) -> f64,
{
let mut alpha = self.alpha_init;
let mut n_fev = 0usize;
if dphi0 >= 0.0 {
return LineSearchResult {
alpha: 1e-14,
f_val: phi0,
grad: None,
derphi: None,
n_fev: 0,
n_gev: 0,
success: false,
message: "Backtracking: non-descent direction".to_string(),
};
}
for _ in 0..self.max_steps {
n_fev += 1;
let fa = phi(alpha);
if fa <= phi0 + self.c1 * alpha * dphi0 {
return LineSearchResult {
alpha,
f_val: fa,
grad: None,
derphi: None,
n_fev,
n_gev: 0,
success: true,
message: "Armijo condition satisfied".to_string(),
};
}
alpha *= self.rho;
if alpha < self.alpha_min {
break;
}
}
LineSearchResult {
alpha,
f_val: phi(alpha),
grad: None,
derphi: None,
n_fev: n_fev + 1,
n_gev: 0,
success: false,
message: "Backtracking: max steps reached".to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn phi_quadratic(alpha: f64) -> f64 {
(1.0 - alpha).powi(2)
}
fn dphi_quadratic(alpha: f64) -> f64 {
-2.0 * (1.0 - alpha)
}
#[test]
fn test_strong_wolfe_quadratic() {
let sw = StrongWolfe::default_search();
let result = sw
.search(phi_quadratic, dphi_quadratic, 1.0, -2.0)
.expect("StrongWolfe failed");
assert!(result.success);
assert!(result.alpha > 0.0 && result.alpha <= 2.0);
assert!(result.f_val < 1.0);
}
#[test]
fn test_hager_zhang_quadratic() {
let hz = HagerZhang::default_search();
let result = hz
.search(phi_quadratic, dphi_quadratic, 1.0, -2.0)
.expect("HagerZhang failed");
assert!(result.alpha > 0.0);
assert!(result.f_val <= 1.0);
}
#[test]
fn test_backtracking_armijo_quadratic() {
let bt = BacktrackingArmijo::default();
let result = bt.search_scalar(phi_quadratic, 1.0, -2.0);
assert!(result.success);
assert!(result.f_val <= 1.0);
}
#[test]
fn test_safeguarded_powell_quadratic() {
let pw = SafeguardedPowell::default();
let result = pw
.search(phi_quadratic, dphi_quadratic, 1.0, -2.0)
.expect("Powell failed");
assert!(result.alpha > 0.0);
assert!(result.f_val < 1.0);
}
#[test]
fn test_backtracking_armijo_bad_direction() {
let bt = BacktrackingArmijo::default();
let result = bt.search_scalar(phi_quadratic, 1.0, 1.0); assert!(!result.success);
}
#[test]
fn test_strong_wolfe_bad_direction() {
let sw = StrongWolfe::default_search();
let err = sw.search(phi_quadratic, dphi_quadratic, 1.0, 1.0);
assert!(err.is_err());
}
}