use super::*;
#[derive(Debug, Clone, derive_getters::Getters)]
pub struct MoreThuenteB {
c1: Floating, c2: Floating, t_min: Floating,
t_max: Floating,
delta_min: Floating,
delta: Floating,
delta_max: Floating,
lower_bound: DVector<Floating>,
upper_bound: DVector<Floating>,
}
impl MoreThuenteB {
pub fn new(n: usize) -> Self {
MoreThuenteB {
c1: 1e-4,
c2: 0.9,
t_min: 0.0,
t_max: Floating::INFINITY,
delta_min: 0.58333333,
delta: 0.66,
delta_max: 1.1,
lower_bound: DVector::from_element(n, -Floating::INFINITY),
upper_bound: DVector::from_element(n, Floating::INFINITY),
}
}
pub fn with_lower_bound(mut self, lower_bound: DVector<Floating>) -> Self {
self.lower_bound = lower_bound;
self
}
pub fn with_upper_bound(mut self, upper_bound: DVector<Floating>) -> Self {
self.upper_bound = upper_bound;
self
}
pub fn with_deltas(
mut self,
delta_min: Floating,
delta: Floating,
delta_max: Floating,
) -> Self {
self.delta_min = delta_min;
self.delta = delta;
self.delta_max = delta_max;
self
}
pub fn with_t_min(mut self, t_min: Floating) -> Self {
self.t_min = t_min;
self
}
pub fn with_t_max(mut self, t_max: Floating) -> Self {
self.t_max = t_max;
self
}
pub fn with_c1(mut self, c1: Floating) -> Self {
assert!(c1 > 0.0, "c1 must be positive");
assert!(c1 < self.c2, "c1 must be less than c2");
self.c1 = c1;
self
}
pub fn with_c2(mut self, c2: Floating) -> Self {
assert!(c2 > 0.0, "c2 must be positive");
assert!(c2 < 1.0, "c2 must be less than 1");
assert!(c2 > self.c1, "c2 must be greater than c1");
self.c2 = c2;
self
}
pub fn update_interval(
f_tl: &Floating,
f_t: &Floating,
g_t: &Floating,
tl: &mut Floating,
t: Floating,
tu: &mut Floating,
) -> bool {
if f_t > f_tl {
*tu = t;
false
}
else if g_t * (*tl - t) > 0. {
*tl = t;
false
}
else if g_t * (*tl - t) < 0. {
*tu = *tl;
*tl = t;
false
} else {
true
}
}
pub fn cubic_minimizer(
ta: &Floating,
tb: &Floating,
f_ta: &Floating,
f_tb: &Floating,
g_ta: &Floating,
g_tb: &Floating,
) -> Floating {
let s = 3. * (f_tb - f_ta) / (tb - ta);
let z = s - g_ta - g_tb;
let w = (z.powi(2) - g_ta * g_tb).sqrt();
ta + ((tb - ta) * ((w - g_ta - z) / (g_tb - g_ta + 2. * w)))
}
pub fn quadratic_minimzer_1(
ta: &Floating,
tb: &Floating,
f_ta: &Floating,
f_tb: &Floating,
g_ta: &Floating,
) -> Floating {
let lin_int = (f_ta - f_tb) / (ta - tb);
ta - 0.5 * ((ta - tb) * g_ta / (g_ta - lin_int))
}
pub fn quadratic_minimizer_2(
ta: &Floating,
tb: &Floating,
g_ta: &Floating,
g_tb: &Floating,
) -> Floating {
trace!(target: "morethuente line search", "Quadratic minimizer 2: ta: {}, tb: {}, g_ta: {}, g_tb: {}", ta, tb, g_ta, g_tb);
ta - g_ta * ((ta - tb) / (g_ta - g_tb))
}
pub fn phi(eval: &FuncEvalMultivariate, direction_k: &DVector<Floating>) -> FuncEvalUnivariate {
let image = eval.f();
let derivative = eval.g().dot(direction_k);
FuncEvalUnivariate::new(*image, derivative)
}
pub fn psi(
&self,
phi_0: &FuncEvalUnivariate,
phi_t: &FuncEvalUnivariate,
t: &Floating,
) -> FuncEvalUnivariate {
let image = phi_t.f() - phi_0.f() - self.c1 * t * phi_0.g();
let derivative = phi_t.g() - self.c1 * phi_0.g();
FuncEvalUnivariate::new(image, derivative)
}
}
impl SufficientDecreaseCondition for MoreThuenteB {
fn c1(&self) -> Floating {
self.c1
}
}
impl CurvatureCondition for MoreThuenteB {
fn c2(&self) -> Floating {
self.c2
}
}
impl LineSearch for MoreThuenteB {
fn compute_step_len(
&mut self,
x_k: &DVector<Floating>, eval_x_k: &FuncEvalMultivariate, direction_k: &DVector<Floating>, oracle: &mut impl FnMut(&DVector<Floating>) -> FuncEvalMultivariate, max_iter: usize, ) -> Floating {
let mut use_modified_updating = false;
let mut interval_converged = false;
let t_max_candidate = direction_k
.iter()
.enumerate()
.map(|(i, d)| {
if *d > 0.0 {
(self.upper_bound[i] - x_k[i]) / d
} else if *d < 0.0 {
(self.lower_bound[i] - x_k[i]) / d
} else {
Floating::INFINITY
}
})
.fold(Floating::INFINITY, |acc, x| x.min(acc));
debug!(target: "morethuente line search", "t_max_candidate: {}",t_max_candidate);
self.t_max = self.t_max.min(t_max_candidate);
let mut t = 1.0f64.max(self.t_min).min(self.t_max);
let mut tl = self.t_min;
let mut tu = self.t_max;
let eval_0 = eval_x_k;
for i in 0..max_iter {
let eval_t = oracle(&(x_k + t * direction_k));
if self.strong_wolfe_conditions_with_directional_derivative(
eval_0.f(),
eval_t.f(),
eval_0.g(),
eval_t.g(),
&t,
direction_k,
) {
trace!("Strong Wolfe conditions satisfied at iteration {}", i);
return t;
} else if interval_converged {
trace!("Interval converged at iteration {}", i);
return t;
} else if t == tl {
trace!("t is at the minimum value at iteration {}", i);
return t;
} else if t == tu {
trace!("t is at the maximum value at iteration {}", i);
return t;
}
let phi_t = Self::phi(&eval_t, direction_k);
let phi_0 = Self::phi(eval_0, direction_k);
let psi_t = self.psi(&phi_0, &phi_t, &t);
if !use_modified_updating && psi_t.f() <= &0. && phi_t.g() > &0. {
use_modified_updating = true;
}
let eval_tl = oracle(&(x_k + tl * direction_k));
let phi_tl = Self::phi(&eval_tl, direction_k);
let (f_tl, g_tl, f_t, g_t) = if use_modified_updating {
(*phi_tl.f(), *phi_tl.g(), phi_t.f(), phi_t.g())
} else {
let psi_tl = self.psi(&phi_0, &phi_tl, &tl);
(*psi_tl.f(), *psi_tl.g(), psi_t.f(), psi_t.g())
};
if f_t > &f_tl {
let tc = Self::cubic_minimizer(&tl, &t, &f_tl, f_t, &g_tl, g_t);
let tq = Self::quadratic_minimzer_1(&tl, &t, &f_tl, f_t, &g_tl);
trace!(target: "morethuente line search", "Case 1: tc: {}, tq: {}", tc, tq);
if (tc - tl).abs() < (tq - tl).abs() {
t = tc;
} else {
t = 0.5 * (tq + tc); }
}
else if g_t * g_tl < 0. {
let tc = Self::cubic_minimizer(&tl, &t, &f_tl, f_t, &g_tl, g_t);
let ts = Self::quadratic_minimizer_2(&tl, &t, &g_tl, g_t);
trace!(target: "morethuente line search", "Case 2: tc: {}, ts: {}", tc, ts);
if (tc - t).abs() >= (ts - t).abs() {
t = tc;
} else {
t = ts;
}
}
else if g_t.abs() <= g_tl.abs() {
let tc = Self::cubic_minimizer(&tl, &t, &f_tl, f_t, &g_tl, g_t);
let ts = Self::quadratic_minimizer_2(&tl, &t, &g_tl, g_t);
trace!(target: "morethuente line search", "Case 3: tc: {}, ts: {}", tc, ts);
let t_plus = if (tc - t).abs() < (ts - t).abs() {
tc
} else {
ts
};
if t > tl {
t = t_plus.min(t + self.delta * (tu - t));
} else {
t = t_plus.max(t + self.delta * (tu - t));
}
}
else {
let (f_tu, g_tu) = {
let eval_tu = oracle(&(x_k + tu * direction_k));
let phi_tu = Self::phi(&eval_tu, direction_k);
if use_modified_updating {
(*phi_tu.f(), *phi_tu.g())
} else {
let psi_tu = self.psi(&phi_0, &phi_tu, &tu);
(*psi_tu.f(), *psi_tu.g())
}
};
trace!(target: "morethuente line search", "Case 4: f_tu: {}, g_tu: {}", f_tu, g_tu);
t = Self::cubic_minimizer(&tu, &t, f_t, &f_tu, g_t, &g_tu);
}
t = t.max(self.t_min).min(self.t_max);
interval_converged = Self::update_interval(&f_tl, f_t, g_t, &mut tl, t, &mut tu)
}
trace!("Line search did not converge in {} iterations", max_iter);
t
}
}
#[cfg(test)]
mod morethuente_test {
use super::*;
#[test]
pub fn test_phi() {
std::env::set_var("RUST_LOG", "debug");
let _ = Tracer::default()
.with_stdout_layer(Some(LogFormat::Normal))
.build();
let gamma = 90.0;
let mut f_and_g = |x: &DVector<Floating>| -> FuncEvalMultivariate {
let f = 0.5 * (x[0].powi(2) + gamma * x[1].powi(2));
let g = DVector::from(vec![x[0], gamma * x[1]]);
(f, g).into()
};
let max_iter = 10000;
let mut k = 1;
let mut iterate = DVector::from(vec![180.0, 152.0]);
let mut ls = MoreThuenteB::new(2);
let gradient_tol = 1e-12;
while max_iter > k {
trace!("Iterate: {:?}", iterate);
let eval = f_and_g(&iterate);
if eval.g().dot(eval.g()) < gradient_tol {
trace!("Gradient norm is lower than tolerance. Convergence!.");
break;
}
let direction = -eval.g();
let t = <MoreThuenteB as LineSearch>::compute_step_len(
&mut ls,
&iterate,
&eval,
&direction,
&mut f_and_g,
max_iter,
);
iterate += t * direction;
k += 1;
}
println!("Iterate: {:?}", iterate);
println!("Function eval: {:?}", f_and_g(&iterate));
assert!((iterate[0] - 0.0).abs() < 1e-6);
trace!("Test took {} iterations", k);
}
}