use crate::core::math::Scalar;
use crate::solver::powell::QuadraticModel;
#[cfg(test)]
pub(crate) static BIGDEN_CALLS: std::sync::atomic::AtomicU32 = std::sync::atomic::AtomicU32::new(0);
impl<F: Scalar> QuadraticModel<F> {
fn omega_column(&self, t: usize) -> (F, Vec<F>) {
let m = self.m;
let rank = m - self.n - 1;
let mut col = vec![F::zero(); m];
for k in 0..rank {
let temp = self.zsign[k] * self.zmat.get(t, k);
for i in 0..m {
col[i] = col[i] + temp * self.zmat.get(i, k);
}
}
let alpha = col[t];
(alpha, col)
}
fn bigden_iter_coeffs(&self, t: usize, alpha: F, d: &[F], s: &[F]) -> (Vec<[F; 5]>, [F; 9]) {
let n = self.n;
let m = self.m;
let rank = m - n - 1;
let half = F::from_f64(0.5).expect("0.5 representable");
let quart = F::from_f64(0.25).expect("0.25 representable");
let two = F::from_f64(2.0).expect("2.0 representable");
let zero = F::zero();
let xopt = self.xpt_row(self.kopt).to_vec();
let dd = dot(d, d);
let xoptsq = dot(&xopt, &xopt);
let xoptd = dot(&xopt, d);
let xopts = dot(&xopt, s);
let mut den = [zero; 9];
let tempa = half * xoptd * xoptd;
let tempb = half * xopts * xopts;
den[0] = dd * (xoptsq + half * dd) + tempa + tempb;
den[1] = two * xoptd * dd;
den[2] = two * xopts * dd;
den[3] = tempa - tempb;
den[4] = xoptd * xopts;
let mut wvec = vec![[zero; 5]; m + n];
for k in 0..m {
let row = self.xpt_row(k);
let ta = dot(row, d);
let tb = dot(row, s);
let tc = dot(row, &xopt);
wvec[k][0] = quart * (ta * ta + tb * tb);
wvec[k][1] = ta * tc;
wvec[k][2] = tb * tc;
wvec[k][3] = quart * (ta * ta - tb * tb);
wvec[k][4] = half * ta * tb;
}
for i in 0..n {
let ip = m + i;
wvec[ip][1] = d[i];
wvec[ip][2] = s[i];
}
let mut prod = vec![[zero; 5]; m + n];
for jc in 0..5 {
let full = jc == 1 || jc == 2;
for j in 0..rank {
let mut sum = zero;
for k in 0..m {
sum = sum + self.zmat.get(k, j) * wvec[k][jc];
}
sum = self.zsign[j] * sum;
for k in 0..m {
prod[k][jc] = prod[k][jc] + sum * self.zmat.get(k, j);
}
}
if full {
for k in 0..m {
let mut sum = zero;
for r in 0..n {
sum = sum + self.bmat_xi.get(r, k) * wvec[m + r][jc];
}
prod[k][jc] = prod[k][jc] + sum;
}
}
let nw = if full { m + n } else { m };
for r in 0..n {
let mut sum = zero;
for i in 0..nw {
let bij = if i < m {
self.bmat_xi.get(r, i)
} else {
self.bmat_ups.get(i - m, r)
};
sum = sum + bij * wvec[i][jc];
}
prod[m + r][jc] = sum;
}
}
let mut par = [zero; 5];
for k in 0..(m + n) {
let mut sum = zero;
for i in 0..5 {
par[i] = half * prod[k][i] * wvec[k][i];
sum = sum + par[i];
}
den[0] = den[0] - par[0] - sum;
let ta = prod[k][0] * wvec[k][1] + prod[k][1] * wvec[k][0];
let tb = prod[k][1] * wvec[k][3] + prod[k][3] * wvec[k][1];
let tc = prod[k][2] * wvec[k][4] + prod[k][4] * wvec[k][2];
den[1] = den[1] - ta - half * (tb + tc);
den[5] = den[5] - half * (tb - tc);
let ta = prod[k][0] * wvec[k][2] + prod[k][2] * wvec[k][0];
let tb = prod[k][1] * wvec[k][4] + prod[k][4] * wvec[k][1];
let tc = prod[k][2] * wvec[k][3] + prod[k][3] * wvec[k][2];
den[2] = den[2] - ta - half * (tb - tc);
den[6] = den[6] - half * (tb + tc);
let ta = prod[k][0] * wvec[k][3] + prod[k][3] * wvec[k][0];
den[3] = den[3] - ta - par[1] + par[2];
let ta = prod[k][0] * wvec[k][4] + prod[k][4] * wvec[k][0];
let tb = prod[k][1] * wvec[k][2] + prod[k][2] * wvec[k][1];
den[4] = den[4] - ta - half * tb;
den[7] = den[7] - par[3] + par[4];
let ta = prod[k][3] * wvec[k][4] + prod[k][4] * wvec[k][3];
den[8] = den[8] - half * ta;
}
let mut denex = [zero; 9];
let mut sum = zero;
for i in 0..5 {
par[i] = half * prod[t][i] * prod[t][i];
sum = sum + par[i];
}
denex[0] = alpha * den[0] + par[0] + sum;
let ta = two * prod[t][0] * prod[t][1];
let tb = prod[t][1] * prod[t][3];
let tc = prod[t][2] * prod[t][4];
denex[1] = alpha * den[1] + ta + tb + tc;
denex[5] = alpha * den[5] + tb - tc;
let ta = two * prod[t][0] * prod[t][2];
let tb = prod[t][1] * prod[t][4];
let tc = prod[t][2] * prod[t][3];
denex[2] = alpha * den[2] + ta + tb - tc;
denex[6] = alpha * den[6] + tb + tc;
let ta = two * prod[t][0] * prod[t][3];
denex[3] = alpha * den[3] + ta + par[1] - par[2];
let ta = two * prod[t][0] * prod[t][4];
denex[4] = alpha * den[4] + ta + prod[t][1] * prod[t][2];
denex[7] = alpha * den[7] + par[3] - par[4];
denex[8] = alpha * den[8] + prod[t][3] * prod[t][4];
(prod, denex)
}
pub(crate) fn bigden(&self, t: usize, d0: &[F]) -> Vec<F> {
let n = self.n;
let m = self.m;
assert!(t < m, "bigden: t must index an interpolation point");
assert_eq!(d0.len(), n, "bigden: d0 must have length n");
#[cfg(test)]
BIGDEN_CALLS.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
let half = F::from_f64(0.5).expect("0.5 representable");
let one = F::one();
let zero = F::zero();
let twopi = F::from_f64(2.0 * core::f64::consts::PI).expect("2π representable");
let onep1 = from::<F>(1.1);
let c099 = from::<F>(0.99);
let degen = from::<F>(1e-8);
let kopt = self.kopt;
let xopt = self.xpt_row(kopt).to_vec();
let (alpha, w_npt) = self.omega_column(t);
let mut d = d0.to_vec();
let mut s: Vec<F> = (0..n).map(|i| self.xpt_row(t)[i] - xopt[i]).collect();
let mut dd = dot(&d, &d);
let mut ds = dot(&d, &s);
let mut ss = dot(&s, &s);
if dd <= zero {
return d0.to_vec();
}
if ds * ds > c099 * dd * ss {
let mut ksav = t;
let mut dtest = ds * ds / ss;
for k in 0..m {
if k == kopt {
continue;
}
let mut dstemp = zero;
let mut sstemp = zero;
for i in 0..n {
let diff = self.xpt_row(k)[i] - xopt[i];
dstemp = dstemp + d[i] * diff;
sstemp = sstemp + diff * diff;
}
if dstemp * dstemp / sstemp < dtest {
ksav = k;
dtest = dstemp * dstemp / sstemp;
ds = dstemp;
ss = sstemp;
}
}
for i in 0..n {
s[i] = self.xpt_row(ksav)[i] - xopt[i];
}
}
let mut ssden = dd * ss - ds * ds;
if ssden <= degen * dd * ss {
return d0.to_vec();
}
let mut densav = zero;
let mut par = [zero; 9];
for iterc in 1..=n {
let scale = one / ssden.sqrt();
for i in 0..n {
s[i] = scale * (dd * s[i] - ds * d[i]);
}
let (prod, denex) = self.bigden_iter_coeffs(t, alpha, &d, &s);
let mut sum = denex[0] + denex[1] + denex[3] + denex[5] + denex[7];
let denold = sum;
let mut denmax = sum;
let mut isave: isize = 0;
let iu = 49usize;
let step_t = twopi / F::from_f64((iu + 1) as f64).expect("iu+1 representable");
let mut tempa = zero;
let mut tempb = zero;
for i in 1..=iu {
let angle = F::from_f64(i as f64).expect("i representable") * step_t;
fill_harmonics(&mut par, angle);
let sumold = sum;
sum = zero;
for j in 0..9 {
sum = sum + denex[j] * par[j];
}
if sum.abs() > denmax.abs() {
denmax = sum;
isave = i as isize;
tempa = sumold;
} else if i as isize == isave + 1 {
tempb = sum;
}
}
if isave == 0 {
tempa = sum;
}
if isave == iu as isize {
tempb = denold;
}
let mut step = zero;
if tempa != tempb {
let ta = tempa - denmax;
let tb = tempb - denmax;
if ta + tb != zero {
step = half * (ta - tb) / (ta + tb);
}
}
let angle = step_t * (F::from_f64(isave as f64).expect("isave representable") + step);
fill_harmonics(&mut par, angle);
let mut denmax_new = zero;
for j in 0..9 {
denmax_new = denmax_new + denex[j] * par[j];
}
let mut vlag = vec![zero; m + n];
for k in 0..(m + n) {
let mut v = zero;
for j in 0..5 {
v = v + prod[k][j] * par[j];
}
vlag[k] = v;
}
let tau = vlag[t];
let cos = par[1];
let sin = par[2];
let mut wpt = vec![zero; n];
let mut sum_dw = zero;
let mut sum_ww = zero;
dd = zero;
for i in 0..n {
d[i] = cos * d[i] + sin * s[i];
wpt[i] = xopt[i] + d[i];
dd = dd + d[i] * d[i];
sum_dw = sum_dw + d[i] * wpt[i];
sum_ww = sum_ww + wpt[i] * wpt[i];
}
if iterc >= n {
break;
}
if iterc > 1 && denold > densav {
densav = denold;
}
if denmax_new.abs() <= onep1 * densav.abs() {
break;
}
densav = denmax_new;
for i in 0..n {
let temp = sum_dw * xopt[i] + sum_ww * d[i] - vlag[m + i];
s[i] = tau * self.bmat_xi.get(i, t) + alpha * temp;
}
for k in 0..m {
let row = self.xpt_row(k);
let sumk = dot(row, &wpt);
let temp = (tau * w_npt[k] - alpha * vlag[k]) * sumk;
for i in 0..n {
s[i] = s[i] + temp * row[i];
}
}
ss = dot(&s, &s);
ds = dot(&d, &s);
ssden = dd * ss - ds * ds;
if ssden < degen * dd * ss {
break;
}
}
d
}
}
fn from<F: Scalar>(x: f64) -> F {
F::from_f64(x).expect("constant representable")
}
fn dot<F: Scalar>(a: &[F], b: &[F]) -> F {
a.iter().zip(b).map(|(x, y)| *x * *y).sum()
}
fn fill_harmonics<F: Scalar>(par: &mut [F; 9], angle: F) {
par[0] = F::one();
par[1] = angle.cos();
par[2] = angle.sin();
let mut j = 3;
while j <= 7 {
par[j] = par[1] * par[j - 2] - par[2] * par[j - 1];
par[j + 1] = par[1] * par[j - 1] + par[2] * par[j - 2];
j += 2;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::solver::powell::QuadraticModel;
fn rosen(x: &[f64]) -> f64 {
(1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2)
}
fn sigma_at<F: Scalar>(model: &QuadraticModel<F>, t: usize, d: &[F]) -> F {
let xopt = model.xpt_row(model.kopt()).to_vec();
let n = model.n();
let xnew: Vec<F> = (0..n).map(|i| xopt[i] + d[i]).collect();
let ctx = model.prepare_update(&xnew);
model.update_params(t, &ctx).sigma
}
fn sigma_hat(denex: &[f64; 9], theta: f64) -> f64 {
let mut par = [0.0_f64; 9];
fill_harmonics(&mut par, theta);
(0..9).map(|j| denex[j] * par[j]).sum()
}
fn far_t<F: Scalar>(model: &QuadraticModel<F>) -> usize {
let kopt = model.kopt();
let xopt = model.xpt_row(kopt).to_vec();
(0..model.m())
.filter(|j| *j != kopt)
.map(|j| {
let r = model.xpt_row(j);
let d2: F = (0..model.n())
.map(|i| (r[i] - xopt[i]) * (r[i] - xopt[i]))
.sum();
(j, d2)
})
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
.unwrap()
.0
}
#[test]
fn sigma_coefficients_match_update_params() {
let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
let n = model.n();
let t = far_t(&model);
let (alpha, _) = model.omega_column(t);
let d = model.biglag(t, 0.4).d;
let dnorm = dot(&d, &d).sqrt();
let mut s = vec![-d[1], d[0]];
let snorm = dot(&s, &s).sqrt();
for v in &mut s {
*v *= dnorm / snorm;
}
let (_prod, denex) = model.bigden_iter_coeffs(t, alpha, &d, &s);
for k in 0..16 {
let theta = std::f64::consts::TAU * (k as f64) / 16.0;
let dq: Vec<f64> = (0..n)
.map(|i| theta.cos() * d[i] + theta.sin() * s[i])
.collect();
let want = sigma_at(&model, t, &dq);
let got = sigma_hat(&denex, theta);
assert!(
(got - want).abs() <= 1e-9 * (1.0 + want.abs()),
"θ={theta}: σ̂={got} vs update_params σ={want}"
);
}
}
#[test]
fn preserves_radius_and_improves_denominator() {
let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
let t = far_t(&model);
let d0 = model.biglag(t, 0.4).d;
let r0 = dot(&d0, &d0).sqrt();
let sig0 = sigma_at(&model, t, &d0).abs();
let d = model.bigden(t, &d0);
let r = dot(&d, &d).sqrt();
assert!((r - r0).abs() <= 1e-9 * (1.0 + r0), "‖d‖={r} vs ‖d0‖={r0}");
let sig = sigma_at(&model, t, &d).abs();
assert!(sig >= sig0 - 1e-12, "|σ| not improved: {sig} < seed {sig0}");
}
#[test]
fn near_optimal_vs_brute_force() {
let model = QuadraticModel::initialize(vec![0.4, -0.6, 0.2], 0.5, 7, &|x: &[f64]| {
(0..3)
.map(|i| (x[i] - 0.5 * (i as f64)).powi(2))
.sum::<f64>()
+ 0.3 * x[0] * x[1]
- 0.2 * x[1] * x[2]
});
let t = far_t(&model);
let d0 = model.biglag(t, 0.3).d;
let radius = dot(&d0, &d0).sqrt();
let d = model.bigden(t, &d0);
let sig = sigma_at(&model, t, &d).abs();
let mut brute = 0.0_f64;
let steps = 40;
for a in 0..steps {
let theta = std::f64::consts::PI * (a as f64) / (steps as f64);
for b in 0..(2 * steps) {
let phi = std::f64::consts::PI * (b as f64) / (steps as f64);
let dir = [
radius * theta.sin() * phi.cos(),
radius * theta.sin() * phi.sin(),
radius * theta.cos(),
];
brute = brute.max(sigma_at(&model, t, &dir).abs());
}
}
assert!(sig >= 0.9 * brute, "bigden |σ|={sig} < 0.9·brute {brute}");
}
#[test]
fn degenerate_n1_returns_seed() {
let model =
QuadraticModel::initialize(vec![0.3], 0.5, 3, &|x: &[f64]| (x[0] - 1.0).powi(2));
let kopt = model.kopt();
let t = (0..model.m()).find(|j| *j != kopt).unwrap();
let d0 = vec![0.2_f64];
let d = model.bigden(t, &d0);
assert_eq!(d, d0, "n=1 must return the seed unchanged");
}
}