use crate::core::math::Scalar;
use crate::solver::powell::{QuadraticModel, TrustRegionStep, TrustRegionSubproblem};
pub(crate) struct ShiftedBox<F = f64> {
pub(crate) sl: Vec<F>,
pub(crate) su: Vec<F>,
}
pub(crate) struct Trsbox;
impl<F: Scalar> TrustRegionSubproblem<F> for Trsbox {
type Region = ShiftedBox<F>;
fn solve(
&self,
model: &QuadraticModel<F>,
delta: F,
region: &ShiftedBox<F>,
) -> TrustRegionStep<F> {
trsbox(model, delta, ®ion.sl, ®ion.su)
}
}
fn dot_free<F: Scalar>(a: &[F], b: &[F], xbdi: &[i8]) -> F {
let mut s = F::zero();
for i in 0..a.len() {
if xbdi[i] == 0 {
s = s + a[i] * b[i];
}
}
s
}
fn interval_fun<F: Scalar>(hangt: F, args: &[F; 5]) -> F {
if hangt.abs() > F::zero() {
let one = F::one();
let half = F::from_f64(0.5).unwrap();
let sth = (hangt + hangt) / (one + hangt * hangt);
let mut f = args[0] + hangt * (hangt * args[1] - args[2] - args[2]);
f = sth * (hangt * args[3] - args[4] - half * sth * f);
f
} else {
F::zero()
}
}
fn interval_max<F: Scalar>(lb: F, ub: F, args: &[F; 5], grid_size: usize) -> F {
if ub <= lb {
return lb;
}
let gm1 = F::from_usize(grid_size - 1).unwrap();
let xgrid = |k: usize| lb + (ub - lb) * F::from_usize(k).unwrap() / gm1;
let fgrid: Vec<F> = (0..grid_size)
.map(|k| interval_fun(xgrid(k), args))
.collect();
if fgrid.iter().all(|f| f.is_nan()) {
return lb;
}
let mut kopt = 0;
let mut fopt = F::neg_infinity();
for (k, &f) in fgrid.iter().enumerate() {
if !f.is_nan() && f > fopt {
fopt = f;
kopt = k;
}
}
if kopt == 0 {
return lb;
}
if kopt == grid_size - 1 {
return ub;
}
let fprev = fgrid[kopt - 1];
let fnext = fgrid[kopt + 1];
let mut step = F::zero();
if (fprev - fnext).abs() > F::zero() {
let half = F::from_f64(0.5).unwrap();
step = half * ((fnext - fprev) / (fopt + fopt - fprev - fnext));
}
if step.is_finite() && step.abs() > F::zero() {
lb + (ub - lb) * (F::from_usize(kopt).unwrap() + step) / gm1
} else {
xgrid(kopt)
}
}
pub(crate) fn trsbox<F: Scalar>(
model: &QuadraticModel<F>,
delta: F,
sl: &[F],
su: &[F],
) -> TrustRegionStep<F> {
assert!(delta > F::zero(), "trsbox: delta must be positive");
let n = model.n();
let zero = F::zero();
let one = F::one();
let half = F::from_f64(0.5).unwrap();
let ctest = F::from_f64(0.01).unwrap();
let eps = F::epsilon();
let xopt = model.xpt_row(model.kopt()).to_vec();
let gopt_raw = model.gradient_at_opt();
let maxg = gopt_raw.iter().fold(zero, |m, g| m.max(g.abs()));
let scaled = maxg > F::from_f64(1e12).unwrap();
let modscal = if scaled { one / maxg } else { one };
let gopt: Vec<F> = gopt_raw.iter().map(|g| *g * modscal).collect();
let hess = |s: &[F]| -> Vec<F> {
let mut hs = model.hessian_matvec(s);
if scaled {
for v in hs.iter_mut() {
*v = *v * modscal;
}
}
hs
};
let mut xbdi = vec![0i8; n];
for i in 0..n {
if xopt[i] >= su[i] && gopt[i] <= zero {
xbdi[i] = 1;
} else if xopt[i] <= sl[i] && gopt[i] >= zero {
xbdi[i] = -1;
}
}
let mut nact = xbdi.iter().filter(|&&b| b != 0).count();
let mut d = vec![zero; n];
let mut crvmin: Option<F> = None;
let mut gnew = gopt.clone();
let mut gredsq = dot_free(&gnew, &gnew, &xbdi);
let mut delsq = delta * delta;
let mut qred = zero;
let mut beta = zero;
let mut itercg = 0usize;
let mut twod_search = false;
let mut s = vec![zero; n];
let mut ggsav = zero;
let maxiter = (n - nact).saturating_mul(n - nact).min(10_000);
for _ in 0..maxiter {
let resid = delsq - {
let mut acc = zero;
for i in 0..n {
if xbdi[i] == 0 {
acc = acc + d[i] * d[i];
}
}
acc
};
if resid <= zero {
twod_search = true;
break;
}
for i in 0..n {
s[i] = if itercg == 0 {
-gnew[i]
} else {
beta * s[i] - gnew[i]
};
}
for i in 0..n {
if xbdi[i] != 0 {
s[i] = zero;
}
}
let stepsq: F = s.iter().fold(zero, |a, v| a + *v * *v);
let ds = dot_free(&d, &s, &xbdi);
if !(stepsq > eps * delsq
&& gredsq * delsq > (ctest * qred) * (ctest * qred)
&& ds.is_finite())
{
break;
}
let disc = stepsq * resid + ds * ds;
let sqrtd = (disc.max(zero))
.sqrt()
.max((stepsq * resid).max(zero).sqrt())
.max(ds.abs());
let bstep = if ds >= zero {
resid / (sqrtd + ds)
} else {
(sqrtd - ds) / stepsq
};
if bstep <= zero || !bstep.is_finite() {
break;
}
let hs = hess(&s);
let shs = dot_free(&s, &hs, &xbdi);
let mut stplen = bstep;
if shs > zero {
stplen = bstep.min(gredsq / shs);
}
let mut iact: Option<usize> = None;
{
let mut sbound = vec![stplen; n];
for i in 0..n {
let xnew_i = xopt[i] + d[i];
let xtest = xnew_i + stplen * s[i];
if s[i] > zero && xtest > su[i] {
sbound[i] = (su[i] - xnew_i) / s[i];
} else if s[i] < zero && xtest < sl[i] {
sbound[i] = (sl[i] - xnew_i) / s[i];
}
if sbound[i].is_nan() {
sbound[i] = stplen;
}
}
let mut best = stplen;
for i in 0..n {
if sbound[i] < best {
best = sbound[i];
iact = Some(i);
}
}
if let Some(_ia) = iact {
stplen = best;
}
}
let mut sdec = zero;
if stplen > zero {
itercg += 1;
let rayleigh = shs / stepsq;
if iact.is_none() && rayleigh > zero {
crvmin = Some(match crvmin {
Some(c) => c.min(rayleigh),
None => rayleigh,
});
}
ggsav = gredsq;
for i in 0..n {
gnew[i] = gnew[i] + stplen * hs[i];
}
gredsq = dot_free(&gnew, &gnew, &xbdi);
for i in 0..n {
d[i] = d[i] + stplen * s[i];
}
sdec = (stplen * (ggsav - half * stplen * shs)).max(zero);
qred = qred + sdec;
}
if let Some(ia) = iact {
nact += 1;
xbdi[ia] = if s[ia] > zero { 1 } else { -1 };
if nact >= n {
break;
}
delsq = delsq - d[ia] * d[ia];
if delsq <= zero {
twod_search = true;
break;
}
beta = zero;
itercg = 0;
gredsq = dot_free(&gnew, &gnew, &xbdi);
} else if stplen < bstep {
if itercg >= n - nact || sdec <= ctest * qred || sdec.is_nan() || qred.is_nan() {
break;
}
beta = gredsq / ggsav;
} else {
twod_search = true;
break;
}
}
let maxiter2 = if twod_search {
crvmin = Some(zero);
10 * (n - nact)
} else {
0
};
let mut nactsav = nact.wrapping_sub(1);
let mut hdred = vec![zero; n];
let mut dredsq = zero;
for iter in 0..maxiter2 {
let xnew: Vec<F> = (0..n).map(|i| xopt[i] + d[i]).collect();
for i in 0..n {
if xbdi[i] == 0 && xnew[i] >= su[i] {
xbdi[i] = 1;
} else if xbdi[i] == 0 && xnew[i] <= sl[i] {
xbdi[i] = -1;
}
}
nact = xbdi.iter().filter(|&&b| b != 0).count();
if nact >= n - 1 {
break;
}
gredsq = dot_free(&gnew, &gnew, &xbdi);
let dredg = dot_free(&d, &gnew, &xbdi);
if iter == 0 || nact > nactsav {
dredsq = {
let mut a = zero;
for i in 0..n {
if xbdi[i] == 0 {
a = a + d[i] * d[i];
}
}
a
};
let mut dred = d.clone();
for i in 0..n {
if xbdi[i] != 0 {
dred[i] = zero;
}
}
hdred = hess(&dred);
nactsav = nact;
}
let temp0 = gredsq * dredsq - dredg * dredg;
if temp0 <= ctest * ctest * qred * qred || temp0.is_nan() || qred.is_nan() {
break;
}
let temp = temp0.sqrt();
for i in 0..n {
s[i] = (dredg * d[i] - dredsq * gnew[i]) / temp;
if xbdi[i] != 0 {
s[i] = zero;
}
}
let sredg = -temp;
let mut hangt_bd = one;
let mut iact: Option<usize> = None;
let mut tanbd = vec![one; n];
for i in 0..n {
if xbdi[i] != 0 {
continue;
}
let ssq = d[i] * d[i] + s[i] * s[i];
let ssqrt = ssq.sqrt();
if xopt[i] - sl[i] < ssqrt {
let disc = (ssq - (xopt[i] - sl[i]) * (xopt[i] - sl[i]))
.max(zero)
.sqrt();
if disc - s[i] > zero {
tanbd[i] = tanbd[i].min((xnew[i] - sl[i]) / (disc - s[i]));
}
}
if su[i] - xopt[i] < ssqrt {
let disc = (ssq - (su[i] - xopt[i]) * (su[i] - xopt[i]))
.max(zero)
.sqrt();
if disc + s[i] > zero {
tanbd[i] = tanbd[i].min((su[i] - xnew[i]) / (disc + s[i]));
}
}
if tanbd[i].is_nan() {
tanbd[i] = zero;
}
}
for i in 0..n {
if xbdi[i] == 0 && tanbd[i] < hangt_bd {
hangt_bd = tanbd[i];
iact = Some(i);
}
}
if hangt_bd >= one {
iact = None;
}
if hangt_bd <= zero {
break;
}
let hs = hess(&s);
let shs = dot_free(&s, &hs, &xbdi);
let dhs = dot_free(&d, &hs, &xbdi);
let dhd = dot_free(&d, &hdred, &xbdi);
let args = [shs, dhd, dhs, dredg, sredg];
if args.iter().any(|a| a.is_nan()) {
break;
}
let gs_f = F::from_f64(17.0).unwrap() * hangt_bd + F::from_f64(4.1).unwrap();
let grid_size = 2 * (gs_f.to_f64().unwrap().round() as usize).max(2);
let hangt = interval_max(zero, hangt_bd, &args, grid_size);
let sdec = interval_fun(hangt, &args);
if sdec <= zero || sdec.is_nan() {
break;
}
let cth = (one - hangt * hangt) / (one + hangt * hangt);
let sth = (hangt + hangt) / (one + hangt * hangt);
for i in 0..n {
gnew[i] = gnew[i] + (cth - one) * hdred[i] + sth * hs[i];
}
for i in 0..n {
if xbdi[i] == 0 {
d[i] = cth * d[i] + sth * s[i];
}
}
for i in 0..n {
hdred[i] = cth * hdred[i] + sth * hs[i];
}
qred = qred + sdec;
if let Some(ia) = iact {
if hangt >= hangt_bd {
let mid = half * (sl[ia] + su[ia]);
xbdi[ia] = if xopt[ia] + d[ia] - mid >= zero {
1
} else {
-1
};
continue;
}
}
if sdec <= ctest * qred || sdec.is_nan() {
break;
}
}
let mut xnew: Vec<F> = (0..n)
.map(|i| sl[i].max(su[i].min(xopt[i] + d[i])))
.collect();
for i in 0..n {
if xbdi[i] == -1 {
xnew[i] = sl[i];
} else if xbdi[i] == 1 {
xnew[i] = su[i];
}
}
for i in 0..n {
d[i] = xnew[i] - xopt[i];
}
let mut crvmin_out = match crvmin {
Some(c) if c.is_finite() => c,
_ => zero,
};
if scaled && crvmin_out > zero {
crvmin_out = crvmin_out / modscal;
}
let predicted_reduction = if scaled { qred / modscal } else { qred };
TrustRegionStep {
d,
crvmin: crvmin_out,
predicted_reduction,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::solver::powell::QuadraticModel;
fn model_for(
x0: Vec<f64>,
lower: &[f64],
upper: &[f64],
rho: f64,
m: usize,
f: impl Fn(&[f64]) -> f64,
) -> (QuadraticModel<f64>, Vec<f64>, Vec<f64>) {
let out = QuadraticModel::try_initialize_bounded::<core::convert::Infallible>(
x0,
lower,
upper,
rho,
m,
&mut |x| Ok(f(x)),
)
.unwrap();
(out.model, out.sl, out.su)
}
fn norm(v: &[f64]) -> f64 {
v.iter().map(|x| x * x).sum::<f64>().sqrt()
}
fn assert_qred_matches(model: &QuadraticModel<f64>, d: &[f64], qred: f64) {
let xopt = model.xpt_row(model.kopt()).to_vec();
let xopt_d: Vec<f64> = xopt.iter().zip(d).map(|(a, b)| a + b).collect();
let want = model.eval_change(&xopt) - model.eval_change(&xopt_d);
assert!(
(qred - want).abs() < 1e-9,
"qred={qred}, Q(xopt)-Q(xopt+d)={want}"
);
}
#[test]
fn interior_unconstrained_equivalent() {
let f = |x: &[f64]| (x[0] - 1.0).powi(2) + 2.0 * (x[1] + 2.0).powi(2);
let (model, sl, su) = model_for(vec![0.0, 0.0], &[-10.0, -10.0], &[10.0, 10.0], 0.5, 5, f);
let step = trsbox(&model, 100.0, &sl, &su);
assert!(step.predicted_reduction > 0.0);
assert_qred_matches(&model, &step.d, step.predicted_reduction);
let xopt = model.xpt_row(model.kopt()).to_vec();
for i in 0..2 {
let xi = xopt[i] + step.d[i];
assert!(xi >= sl[i] - 1e-9 && xi <= su[i] + 1e-9);
}
assert!(step.crvmin > 0.0, "crvmin={}", step.crvmin);
}
#[test]
fn boundary_step_resets_crvmin() {
let f = |x: &[f64]| (x[0] - 5.0).powi(2) + (x[1] - 5.0).powi(2);
let (model, sl, su) = model_for(vec![0.0, 0.0], &[-10.0, -10.0], &[10.0, 10.0], 0.5, 5, f);
let delta = 0.5;
let step = trsbox(&model, delta, &sl, &su);
assert!(
(norm(&step.d) - delta).abs() < 1e-6,
"‖d‖={}",
norm(&step.d)
);
assert_eq!(step.crvmin, 0.0);
assert!(step.predicted_reduction > 0.0);
assert_qred_matches(&model, &step.d, step.predicted_reduction);
}
#[test]
fn active_bound_keeps_feasible() {
let f = |x: &[f64]| (x[0] - 5.0).powi(2) + (x[1] - 5.0).powi(2);
let (model, sl, su) = model_for(vec![0.0, 0.0], &[-2.0, -2.0], &[2.0, 2.0], 0.5, 5, f);
let xopt = model.xpt_row(model.kopt()).to_vec();
let step = trsbox(&model, 100.0, &sl, &su);
assert!(step.predicted_reduction > 0.0);
assert_qred_matches(&model, &step.d, step.predicted_reduction);
for i in 0..2 {
let xi = xopt[i] + step.d[i];
assert!(
xi >= sl[i] - 1e-9 && xi <= su[i] + 1e-9,
"coord {i} infeasible: {xi} not in [{}, {}]",
sl[i],
su[i]
);
}
let at_bound = (0..2).any(|i| (xopt[i] + step.d[i] - su[i]).abs() < 1e-7);
assert!(at_bound, "expected a coordinate pinned to the upper bound");
}
}