use crate::core::math::Scalar;
use crate::solver::powell::QuadraticModel;
pub(crate) struct BiglagResult<F = f64> {
pub(crate) d: Vec<F>,
pub(crate) tau: F,
}
impl<F: Scalar> QuadraticModel<F> {
pub(crate) fn biglag(&self, t: usize, delta_bar: F) -> BiglagResult<F> {
assert!(delta_bar > F::zero(), "biglag: delta_bar must be positive");
assert!(t < self.m(), "biglag: t must index an interpolation point");
let n = self.n();
let half = F::from_f64(0.5).expect("0.5 representable");
let two = F::from_f64(2.0).expect("2.0 representable");
let (g_x0, lambda) = self.lagrange_coeffs(t);
let xopt = self.xpt_row(self.kopt()).to_vec();
let base = if t == self.kopt() {
F::one()
} else {
F::zero()
};
let h_xopt = self.lagrange_hessian_matvec(&lambda, &xopt);
let grad_opt: Vec<F> = (0..n).map(|i| g_x0[i] + h_xopt[i]).collect();
let ell = |d: &[F]| -> F {
let hd = self.lagrange_hessian_matvec(&lambda, d);
base + dot(d, &grad_opt) + half * dot(d, &hd)
};
let xt = self.xpt_row(t).to_vec();
let mut u0: Vec<F> = (0..n).map(|i| xt[i] - xopt[i]).collect();
let mut u0_norm = norm(&u0);
if u0_norm <= F::zero() {
u0 = grad_opt.clone();
u0_norm = norm(&u0);
if u0_norm <= F::zero() {
return BiglagResult {
d: vec![F::zero(); n],
tau: base,
};
}
}
let scale0 = delta_bar / u0_norm;
let dpos: Vec<F> = (0..n).map(|i| scale0 * u0[i]).collect();
let ell_pos = ell(&dpos);
let ell_neg = ell(&dpos.iter().map(|x| -*x).collect::<Vec<_>>());
let mut d: Vec<F> = if ell_pos.abs() >= ell_neg.abs() {
dpos
} else {
dpos.iter().map(|x| -*x).collect()
};
let mut ell_d = ell(&d); let hd0 = self.lagrange_hessian_matvec(&lambda, &d);
let mut grad_cur: Vec<F> = (0..n).map(|i| grad_opt[i] + hd0[i]).collect();
let delta_sq = delta_bar * delta_bar;
let degen = F::from_f64(1e-8).expect("1e-8 representable");
for j in 0..n {
let dd = dot(&d, &d);
let gsrc: &[F] = if j == 0 {
let dg = dot(&d, &grad_opt);
let gg = dot(&grad_opt, &grad_opt);
let cond_i = dg * dg <= from::<F>(0.99) * delta_sq * gg; let cond_ii = gg.sqrt() * delta_bar >= from::<F>(0.1) * ell_d.abs(); if cond_i && cond_ii {
&grad_opt
} else {
&grad_cur
}
} else {
&grad_cur
};
let coef = dot(&d, gsrc) / dd;
let s_raw: Vec<F> = (0..n).map(|i| gsrc[i] - coef * d[i]).collect();
let s_raw_sq = dot(&s_raw, &s_raw);
let gsrc_sq = dot(gsrc, gsrc);
if s_raw_sq <= degen * gsrc_sq {
break; }
let s_scale = delta_bar / s_raw_sq.sqrt();
let s: Vec<F> = (0..n).map(|i| s_scale * s_raw[i]).collect();
let hs = self.lagrange_hessian_matvec(&lambda, &s);
let hd: Vec<F> = (0..n).map(|i| grad_cur[i] - grad_opt[i]).collect();
let d_g0 = dot(&d, &grad_opt);
let s_g0 = dot(&s, &grad_opt);
let d_hd = dot(&d, &hd);
let s_hd = dot(&s, &hd); let s_hs = dot(&s, &hs);
let ell_of = |theta: F| -> F {
let c = theta.cos();
let sn = theta.sin();
base + c * d_g0
+ sn * s_g0
+ half * (c * c * d_hd + two * sn * c * s_hd + sn * sn * s_hs)
};
const NTHETA: usize = 49;
let pi = F::from_f64(core::f64::consts::PI).expect("π representable");
let step =
(pi + pi) / F::from_f64((NTHETA + 1) as f64).expect("NTHETA+1 representable");
let mut best_theta = F::zero();
let mut best_abs = ell_of(F::zero()).abs(); for k in 1..=NTHETA {
let theta = step * F::from_f64(k as f64).expect("k representable");
let av = ell_of(theta).abs();
if av > best_abs {
best_abs = av;
best_theta = theta;
}
}
let am = ell_of(best_theta - step).abs();
let ap = ell_of(best_theta + step).abs();
let denom = am - (best_abs + best_abs) + ap;
let theta_star = if denom < F::zero() {
best_theta + half * (am - ap) / denom * step
} else {
best_theta
};
let c = theta_star.cos();
let sn = theta_star.sin();
let d_new: Vec<F> = (0..n).map(|i| c * d[i] + sn * s[i]).collect();
let ell_new = ell_of(theta_star);
let ell_prev_abs = ell_d.abs();
d = d_new;
ell_d = ell_new;
grad_cur = (0..n)
.map(|i| (F::one() - c) * grad_opt[i] + c * grad_cur[i] + sn * hs[i])
.collect();
let onep1 = from::<F>(1.1);
if ell_d.abs() <= onep1 * ell_prev_abs {
break;
}
}
BiglagResult { d, tau: ell_d }
}
}
fn from<F: Scalar>(x: f64) -> F {
F::from_f64(x).expect("constant representable")
}
fn dot<F: Scalar>(a: &[F], b: &[F]) -> F {
a.iter().zip(b).map(|(x, y)| *x * *y).sum()
}
fn norm<F: Scalar>(v: &[F]) -> F {
dot(v, v).sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::solver::powell::QuadraticModel;
fn quad(x: &[f64]) -> f64 {
(x[0] - 1.0).powi(2) + 2.0 * (x[1] + 2.0).powi(2) - x[0] * x[1]
}
fn rosen(x: &[f64]) -> f64 {
(1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2)
}
fn ell_at_point<F: crate::core::math::Scalar>(
model: &QuadraticModel<F>,
t: usize,
i: usize,
) -> F {
let (g_x0, lambda) = model.lagrange_coeffs(t);
let s = model.xpt_row(i).to_vec();
let hs = model.lagrange_hessian_matvec(&lambda, &s);
let half = F::from_f64(0.5).unwrap();
let base = if t == model.kopt() {
F::one()
} else {
F::zero()
};
let xopt = model.xpt_row(model.kopt()).to_vec();
let hxopt = model.lagrange_hessian_matvec(&lambda, &xopt);
let val_i = dot(&s, &g_x0) + half * dot(&s, &hs);
let val_opt = dot(&xopt, &g_x0) + half * dot(&xopt, &hxopt);
base + val_i - val_opt
}
#[test]
fn lagrange_conditions_hold() {
let model = QuadraticModel::initialize(vec![0.3, -0.7], 0.5, 5, &quad);
let m = model.m();
for t in 0..m {
for i in 0..m {
let want = if i == t { 1.0 } else { 0.0 };
let got = ell_at_point(&model, t, i);
assert!(
(got - want).abs() < 1e-9,
"ℓ_{t}(x_{i}) = {got}, want {want}"
);
}
}
}
#[test]
fn improves_over_initial_ray_and_respects_bound() {
let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
let kopt = model.kopt();
let delta_bar = 0.4;
let xopt = model.xpt_row(kopt).to_vec();
let (t, _) = (0..model.m())
.map(|j| {
let r = model.xpt_row(j);
let dist: f64 = (0..2).map(|i| (r[i] - xopt[i]).powi(2)).sum::<f64>().sqrt();
(j, dist)
})
.filter(|(j, _)| *j != kopt)
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
.unwrap();
let (g_x0, lambda) = model.lagrange_coeffs(t);
let h_xopt = model.lagrange_hessian_matvec(&lambda, &xopt);
let grad_opt: Vec<f64> = (0..2).map(|i| g_x0[i] + h_xopt[i]).collect();
let xt = model.xpt_row(t).to_vec();
let mut u0: Vec<f64> = (0..2).map(|i| xt[i] - xopt[i]).collect();
let nrm: f64 = u0.iter().map(|x| x * x).sum::<f64>().sqrt();
for v in &mut u0 {
*v *= delta_bar / nrm;
}
let ell_d0 = {
let hd = model.lagrange_hessian_matvec(&lambda, &u0);
dot(&u0, &grad_opt) + 0.5 * dot(&u0, &hd)
};
let res = model.biglag(t, delta_bar);
let dnorm: f64 = res.d.iter().map(|x| x * x).sum::<f64>().sqrt();
assert!(dnorm <= delta_bar * (1.0 + 1e-9), "‖d‖ = {dnorm} > Δ̄");
assert!(
dnorm >= 0.99 * delta_bar,
"‖d‖ = {dnorm} ≪ Δ̄ (bound inactive)"
);
assert!(
res.tau.abs() >= ell_d0.abs() - 1e-12,
"|ℓ_t| not improved: {} < {}",
res.tau.abs(),
ell_d0.abs()
);
}
#[test]
fn near_optimal_vs_brute_force() {
let model = QuadraticModel::initialize(vec![0.4, -0.6, 0.2], 0.5, 7, &|x: &[f64]| {
(0..3)
.map(|i| (x[i] - 0.5 * (i as f64)).powi(2))
.sum::<f64>()
+ 0.3 * x[0] * x[1]
- 0.2 * x[1] * x[2]
});
let n = 3;
let kopt = model.kopt();
let xopt = model.xpt_row(kopt).to_vec();
let delta_bar = 0.3;
for t in 0..model.m() {
if t == kopt {
continue;
}
let (g_x0, lambda) = model.lagrange_coeffs(t);
let h_xopt = model.lagrange_hessian_matvec(&lambda, &xopt);
let grad_opt: Vec<f64> = (0..n).map(|i| g_x0[i] + h_xopt[i]).collect();
let ell = |d: &[f64]| -> f64 {
let hd = model.lagrange_hessian_matvec(&lambda, d);
dot(d, &grad_opt) + 0.5 * dot(d, &hd)
};
let mut brute = 0.0_f64;
let steps = 40;
for a in 0..steps {
let theta = std::f64::consts::PI * (a as f64) / (steps as f64);
for b in 0..(2 * steps) {
let phi = std::f64::consts::PI * (b as f64) / (steps as f64);
let d = [
delta_bar * theta.sin() * phi.cos(),
delta_bar * theta.sin() * phi.sin(),
delta_bar * theta.cos(),
];
brute = brute.max(ell(&d).abs());
}
}
let res = model.biglag(t, delta_bar);
assert!(
res.tau.abs() >= 0.9 * brute,
"t={t}: biglag |ℓ_t|={} < 0.9 * brute {brute}",
res.tau.abs()
);
}
}
#[test]
fn produces_well_conditioned_replacement() {
let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
let kopt = model.kopt();
let xopt = model.xpt_row(kopt).to_vec();
let (t, _) = (0..model.m())
.filter(|j| *j != kopt)
.map(|j| {
let r = model.xpt_row(j);
let dist: f64 = (0..2).map(|i| (r[i] - xopt[i]).powi(2)).sum::<f64>().sqrt();
(j, dist)
})
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
.unwrap();
let res = model.biglag(t, 0.4);
let xnew: Vec<f64> = (0..2).map(|i| xopt[i] + res.d[i]).collect();
let ctx = model.prepare_update(&xnew);
let sc = model.update_params(t, &ctx);
assert!(sc.sigma.abs() > 1e-6, "σ = {} too small", sc.sigma);
}
}