use super::trsapp::Trsapp;
use crate::core::math::Scalar;
use crate::solver::powell::{QuadraticModel, TrustRegionSubproblem};
pub(crate) struct NewuoaConfig<F = f64> {
pub(crate) rho_beg: F,
pub(crate) rho_end: F,
pub(crate) max_fun: usize,
pub(crate) npt: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NewuoaStop {
RhoReached,
MaxFun,
}
pub(crate) struct NewuoaOutcome<F = f64> {
pub(crate) x: Vec<F>,
pub(crate) f: F,
pub(crate) nf: usize,
pub(crate) stop: NewuoaStop,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Transition {
Continue,
RhoReduced,
Converged,
}
pub(crate) struct StepOutcome<F = f64> {
pub(crate) transition: Transition,
pub(crate) evaluated: Vec<(Vec<F>, F)>,
}
pub(crate) struct NewuoaWork<F = f64> {
model: QuadraticModel<F>,
rho_end: F,
rho: F,
delta: F,
history: Vec<(F, F)>,
itest: usize,
}
impl<F: Scalar> NewuoaWork<F> {
pub(crate) fn try_init<E>(
x0: Vec<F>,
rho_beg: F,
rho_end: F,
npt: usize,
eval: &mut impl FnMut(&[F]) -> Result<F, E>,
) -> Result<(Self, Vec<F>, F), E> {
assert!(
rho_beg > rho_end && rho_end > F::zero(),
"NEWUOA needs rho_beg > rho_end > 0"
);
let model = QuadraticModel::try_initialize(x0, rho_beg, npt, eval)?;
let best_x = model.best_point();
let best_f = model.fopt();
let work = Self {
model,
rho_end,
rho: rho_beg,
delta: rho_beg,
history: Vec::new(),
itest: 0,
};
Ok((work, best_x, best_f))
}
pub(crate) fn rho(&self) -> F {
self.rho
}
pub(crate) fn step<E>(
&mut self,
eval: &mut impl FnMut(&[F]) -> Result<F, E>,
) -> Result<StepOutcome<F>, E> {
let n = self.model.n();
let half = F::from_f64(0.5).expect("0.5 representable");
let c01 = F::from_f64(0.1).expect("0.1 representable");
let c07 = F::from_f64(0.7).expect("0.7 representable");
let c15 = F::from_f64(1.5).expect("1.5 representable");
let two = F::from_f64(2.0).expect("2.0 representable");
let ten = F::from_f64(10.0).expect("10.0 representable");
let eighth = F::from_f64(0.125).expect("0.125 representable");
let mut evaluated: Vec<(Vec<F>, F)> = Vec::new();
let trs = Trsapp.solve(&self.model, self.delta, &());
let d = trs.d;
let dnorm = norm(&d);
let crvmin = trs.crvmin;
let pred = trs.predicted_reduction;
let kopt = self.model.kopt();
let xopt_disp = self.model.xpt_row(kopt).to_vec();
let f_opt = self.model.fopt();
if dnorm < half * self.rho {
let thr = eighth * self.rho * self.rho * crvmin;
let box14_y = self.history.len() >= 3
&& self
.history
.iter()
.rev()
.take(3)
.all(|(dn, err)| *dn <= self.rho && *err <= thr);
if box14_y {
if self.rho <= self.rho_end {
let (xabs, f_new) = eval_step(&self.model, &xopt_disp, &d, eval)?;
evaluated.push((xabs, f_new));
return Ok(StepOutcome {
transition: Transition::Converged,
evaluated,
});
}
return Ok(StepOutcome {
transition: self.finish_rho(half),
evaluated,
});
}
self.delta = (half * self.delta).max(self.rho);
let (t, dist) = far_point(&self.model);
if dist >= two * self.delta {
let ev = do_geometry(
&mut self.model,
t,
dist,
self.delta,
self.rho,
eval,
&mut self.history,
)?;
evaluated.push(ev);
self.itest = 0; return Ok(StepOutcome {
transition: Transition::Continue,
evaluated,
});
}
if self.delta > self.rho {
return Ok(StepOutcome {
transition: Transition::Continue,
evaluated,
});
}
return Ok(StepOutcome {
transition: self.finish_rho(half),
evaluated,
});
}
let xnew_disp: Vec<F> = (0..n).map(|i| xopt_disp[i] + d[i]).collect();
let xabs: Vec<F> = (0..n).map(|i| self.model.x0()[i] + xnew_disp[i]).collect();
let f_new = eval(&xabs)?;
evaluated.push((xabs, f_new));
let ratio = if pred > F::zero() {
(f_opt - f_new) / pred
} else {
F::zero()
};
self.delta = revise_delta(self.delta, dnorm, ratio, self.rho, half, c01, c07, c15, two);
let (xnew_disp, xopt_disp) = if maybe_shift_origin(&mut self.model, dnorm) {
(d.clone(), vec![F::zero(); n])
} else {
(xnew_disp, xopt_disp)
};
self.history.push((dnorm, (f_new - f_opt + pred).abs()));
let knew = apply_move_update(
&mut self.model,
&xnew_disp,
&xopt_disp,
f_new,
f_opt,
self.delta,
self.rho,
);
if knew {
let galt = self.model.alt_gradient_at_opt();
let gisq: F = galt.iter().map(|x| *x * *x).sum();
let gopt = self.model.gradient_at_opt();
let gqsq: F = gopt.iter().map(|x| *x * *x).sum();
let flag = ratio <= c01 && gqsq >= ten * gisq;
self.itest = if flag { self.itest + 1 } else { 0 };
if self.itest >= 3 {
self.model.adopt_alt_model();
self.itest = 0;
}
}
if ratio >= c01 {
return Ok(StepOutcome {
transition: Transition::Continue,
evaluated,
});
}
let (t, dist) = far_point(&self.model);
if dist >= two * self.delta {
let ev = do_geometry(
&mut self.model,
t,
dist,
self.delta,
self.rho,
eval,
&mut self.history,
)?;
evaluated.push(ev);
return Ok(StepOutcome {
transition: Transition::Continue,
evaluated,
});
}
if dnorm > self.rho || self.delta > self.rho || ratio > F::zero() {
return Ok(StepOutcome {
transition: Transition::Continue,
evaluated,
});
}
Ok(StepOutcome {
transition: self.finish_rho(half),
evaluated,
})
}
fn finish_rho(&mut self, half: F) -> Transition {
if self.rho <= self.rho_end {
return Transition::Converged;
}
let rho_new = shrink_rho(self.rho, self.rho_end);
self.delta = (half * self.rho).max(rho_new);
self.rho = rho_new;
self.history.clear();
Transition::RhoReduced
}
}
pub(crate) fn minimize<F: Scalar>(
x0: Vec<F>,
cfg: &NewuoaConfig<F>,
f: impl Fn(&[F]) -> F,
) -> NewuoaOutcome<F> {
let mut eval = |x: &[F]| Ok::<F, core::convert::Infallible>(f(x));
let (mut work, mut best_x, mut best_f) =
NewuoaWork::try_init(x0, cfg.rho_beg, cfg.rho_end, cfg.npt, &mut eval)
.expect("infallible objective");
let mut nf = cfg.npt;
loop {
if nf >= cfg.max_fun {
return NewuoaOutcome {
x: best_x,
f: best_f,
nf,
stop: NewuoaStop::MaxFun,
};
}
let out = work.step(&mut eval).expect("infallible objective");
for (xabs, f_new) in out.evaluated {
nf += 1;
if f_new < best_f {
best_f = f_new;
best_x = xabs;
}
}
match out.transition {
Transition::Converged => {
return NewuoaOutcome {
x: best_x,
f: best_f,
nf,
stop: NewuoaStop::RhoReached,
};
}
Transition::Continue | Transition::RhoReduced => {}
}
}
}
fn eval_step<F: Scalar, E>(
model: &QuadraticModel<F>,
xopt_disp: &[F],
d: &[F],
eval: &mut impl FnMut(&[F]) -> Result<F, E>,
) -> Result<(Vec<F>, F), E> {
let n = model.n();
let xabs: Vec<F> = (0..n)
.map(|i| model.x0()[i] + xopt_disp[i] + d[i])
.collect();
let val = eval(&xabs)?;
Ok((xabs, val))
}
fn far_point<F: Scalar>(model: &QuadraticModel<F>) -> (usize, F) {
let kopt = model.kopt();
let xopt = model.xpt_row(kopt);
let mut best_t = kopt;
let mut best_d2 = F::zero();
for t in 0..model.m() {
if t == kopt {
continue;
}
let row = model.xpt_row(t);
let d2: F = (0..model.n())
.map(|i| (row[i] - xopt[i]) * (row[i] - xopt[i]))
.sum();
if d2 > best_d2 {
best_d2 = d2;
best_t = t;
}
}
(best_t, best_d2.sqrt())
}
#[allow(clippy::too_many_arguments)]
fn do_geometry<F: Scalar, E>(
model: &mut QuadraticModel<F>,
t: usize,
dist: F,
delta: F,
rho: F,
eval: &mut impl FnMut(&[F]) -> Result<F, E>,
history: &mut Vec<(F, F)>,
) -> Result<(Vec<F>, F), E> {
let n = model.n();
let half = F::from_f64(0.5).expect("0.5 representable");
let c01 = F::from_f64(0.1).expect("0.1 representable");
let delta_bar = ((c01 * dist).min(half * delta)).max(rho);
let mut d = model.biglag(t, delta_bar).d;
maybe_shift_origin(model, norm(&d));
let kopt = model.kopt();
let xopt_disp = model.xpt_row(kopt).to_vec();
let f_opt = model.fopt();
let mut xnew_disp: Vec<F> = (0..n).map(|i| xopt_disp[i] + d[i]).collect();
let mut ctx = model.prepare_update(&xnew_disp);
let mut sc = model.update_params(t, &ctx);
let c08 = F::from_f64(0.8).expect("0.8 representable");
if sc.sigma.abs() < c08 * sc.tau * sc.tau {
d = model.bigden(t, &d);
xnew_disp = (0..n).map(|i| xopt_disp[i] + d[i]).collect();
ctx = model.prepare_update(&xnew_disp);
sc = model.update_params(t, &ctx);
}
let dnorm = norm(&d);
let xabs: Vec<F> = (0..n).map(|i| model.x0()[i] + xnew_disp[i]).collect();
let f_new = eval(&xabs)?;
let q_new = model.eval_change(&xnew_disp);
let q_opt = model.eval_change(&xopt_disp);
let model_err = (f_new - f_opt - (q_new - q_opt)).abs();
history.push((dnorm.min(delta_bar), model_err));
if sc.sigma != F::zero() {
model.commit_update(t, &ctx, &sc, f_new);
}
Ok((xabs, f_new))
}
fn maybe_shift_origin<F: Scalar>(model: &mut QuadraticModel<F>, dnorm: F) -> bool {
let c1em3 = F::from_f64(1e-3).expect("1e-3 representable");
let xopt = model.xpt_row(model.kopt());
let xopt_sq: F = xopt.iter().map(|x| *x * *x).sum();
if xopt_sq > F::zero() && dnorm * dnorm < c1em3 * xopt_sq {
model.shift_origin();
true
} else {
false
}
}
fn apply_move_update<F: Scalar>(
model: &mut QuadraticModel<F>,
xnew_disp: &[F],
xopt_disp: &[F],
f_new: F,
f_opt: F,
delta: F,
rho: F,
) -> bool {
let m = model.m();
let kopt = model.kopt();
let one = F::one();
let c01 = F::from_f64(0.1).expect("0.1 representable");
let ctx = model.prepare_update(xnew_disp);
let xstar: &[F] = if f_new < f_opt { xnew_disp } else { xopt_disp };
let denom = (c01 * delta).max(rho);
let mut chosen: Option<(usize, crate::solver::powell::update::UpdateScalars<F>, F)> = None;
for t in 0..m {
if f_new >= f_opt && t == kopt {
continue;
}
let sc = model.update_params(t, &ctx);
let dist = norm_diff(model.xpt_row(t), xstar);
let r = dist / denom;
let w = (r * r * r * r * r * r).max(one);
let w_sigma = w * sc.sigma.abs();
if chosen.as_ref().is_none_or(|c| w_sigma > c.2) {
chosen = Some((t, sc, w_sigma));
}
}
if let Some((t_star, scalars, w_sigma)) = chosen {
let move_zero = f_new >= f_opt && w_sigma < one;
if !move_zero && scalars.sigma != F::zero() {
model.commit_update(t_star, &ctx, &scalars, f_new);
return true; }
}
false
}
#[allow(clippy::too_many_arguments)]
fn revise_delta<F: Scalar>(
delta_old: F,
dnorm: F,
ratio: F,
rho: F,
half: F,
c01: F,
c07: F,
c15: F,
two: F,
) -> F {
let delta_int = if ratio <= c01 {
half * dnorm
} else if ratio <= c07 {
dnorm.max(half * delta_old)
} else {
(two * dnorm).max(half * delta_old)
};
if delta_int <= c15 * rho {
rho
} else {
delta_int
}
}
fn shrink_rho<F: Scalar>(rho: F, rho_end: F) -> F {
let c16 = F::from_f64(16.0).expect("16 representable");
let c250 = F::from_f64(250.0).expect("250 representable");
let c01 = F::from_f64(0.1).expect("0.1 representable");
if rho <= c16 * rho_end {
rho_end
} else if rho <= c250 * rho_end {
(rho * rho_end).sqrt()
} else {
c01 * rho
}
}
fn norm<F: Scalar>(v: &[F]) -> F {
v.iter().map(|x| *x * *x).sum::<F>().sqrt()
}
fn norm_diff<F: Scalar>(a: &[F], b: &[F]) -> F {
a.iter()
.zip(b)
.map(|(x, y)| (*x - *y) * (*x - *y))
.sum::<F>()
.sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
fn cfg(n: usize, rho_beg: f64, rho_end: f64) -> NewuoaConfig<f64> {
NewuoaConfig {
rho_beg,
rho_end,
max_fun: 500,
npt: 2 * n + 1,
}
}
fn norm2(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b)
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>()
.sqrt()
}
#[test]
fn convex_quadratic_2d() {
let f = |x: &[f64]| (x[0] - 1.0).powi(2) + 2.0 * (x[1] + 2.0).powi(2);
let out = minimize(vec![0.0, 0.0], &cfg(2, 0.5, 1e-8), f);
assert!(norm2(&out.x, &[1.0, -2.0]) < 1e-6, "x = {:?}", out.x);
assert!(out.f < 1e-10, "f = {}", out.f);
assert_eq!(out.stop, NewuoaStop::RhoReached);
}
#[test]
fn convex_quadratic_4d() {
let diag = [1.0, 10.0, 100.0, 0.5];
let xstar = [3.0, -1.0, 2.0, -4.0];
let f = move |x: &[f64]| {
(0..4)
.map(|i| diag[i] * (x[i] - xstar[i]).powi(2))
.sum::<f64>()
};
let out = minimize(vec![0.0; 4], &cfg(4, 1.0, 1e-8), f);
assert!(norm2(&out.x, &xstar) < 1e-5, "x = {:?}", out.x);
assert!(out.f < 1e-8, "f = {}", out.f);
}
#[test]
fn rosenbrock_2d() {
let f = |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2);
let out = minimize(vec![-1.2, 1.0], &cfg(2, 0.5, 1e-8), f);
assert!(out.f < 1e-7, "f = {}, x = {:?}", out.f, out.x);
}
#[test]
fn chained_rosenbrock_6d() {
let n = 6;
let f = |x: &[f64]| {
(0..x.len() - 1)
.map(|i| (1.0 - x[i]).powi(2) + 100.0 * (x[i + 1] - x[i] * x[i]).powi(2))
.sum::<f64>()
};
let out = minimize(vec![-1.0; n], &cfg(n, 0.5, 1e-7), f);
assert!(out.f < 1e-6, "f = {}, x = {:?}", out.f, out.x);
}
#[test]
fn vardim_8d() {
use crate::solver::powell::update::QINT_ADOPTIONS;
use std::sync::atomic::Ordering;
let n = 8;
let f = |x: &[f64]| {
let s: f64 = (0..x.len()).map(|l| (l as f64 + 1.0) * (x[l] - 1.0)).sum();
let sq: f64 = (0..x.len()).map(|l| (x[l] - 1.0).powi(2)).sum();
sq + s * s + s.powi(4)
};
let x_start: Vec<f64> = (0..n).map(|l| 1.0 - (l as f64 + 1.0) / n as f64).collect();
let adopt0 = QINT_ADOPTIONS.load(Ordering::Relaxed);
let out = minimize(
x_start,
&NewuoaConfig {
rho_beg: 0.5,
rho_end: 1e-8,
max_fun: 2000,
npt: 2 * n + 1,
},
f,
);
assert!(out.f < 1e-6, "f = {}, x = {:?}", out.f, out.x);
assert!(norm2(&out.x, &vec![1.0; n]) < 1e-3, "x = {:?}", out.x);
assert!(
QINT_ADOPTIONS.load(Ordering::Relaxed) > adopt0,
"expected the §8 Qint adoption path to fire"
);
}
#[test]
fn respects_max_fun() {
let f = |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2);
let out = minimize(
vec![-1.2, 1.0],
&NewuoaConfig {
rho_beg: 0.5,
rho_end: 1e-10,
max_fun: 15,
npt: 5,
},
f,
);
assert_eq!(out.stop, NewuoaStop::MaxFun);
assert!(out.nf <= 16); }
}