1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
//! Closed-form initialization of the [`QuadraticModel`] (Powell 2006, §3).
//!
//! Given the base point `x0`, the initial trust radius `ρ_beg`, the number of
//! interpolation points `m`, and the objective `F`, this builds the first
//! quadratic model and the factored inverse-KKT matrix `H` in `O(m²)`
//! operations using the convenient geometry of the coordinate-cross sample set.
//!
//! Only `m ≥ 2n+1` is implemented (the recommended case). The `n+2 ≤ m ≤ 2n`
//! partial-model regime (Powell 2006, §3, second paragraph) is deferred.
use crate::core::math::{DenseMatrix, Scalar};
use crate::solver::powell::QuadraticModel;
impl<F: Scalar> QuadraticModel<F> {
/// Build the initial model from the coordinate-offset sample set (Powell
/// 2006, eqs. 3.2–3.5 for the geometry and the model's `Γ`/`∇Q`; eqs.
/// 3.13–3.20 for the closed-form `Ξ`/`Υ`/`Ω`-factorization).
///
/// `eval` evaluates the objective at an absolute point; it is called exactly
/// once per interpolation point (`m` evaluations total) and may fail — the
/// first `Err` aborts initialization and bubbles to the caller (the public
/// [`Newuoa`](crate::solver::Newuoa) solver forwards the problem's typed
/// cost error). Keeping it a closure lets initialization be unit-tested
/// without a `Problem`/driver; the infallible [`initialize`](Self::initialize)
/// wrapper serves the standalone driver and the tests.
///
/// # Panics
///
/// Panics unless `n ≥ 1` and `2n+1 ≤ m ≤ ½(n+1)(n+2)`.
pub(crate) fn try_initialize<E>(
x0: Vec<F>,
rho_beg: F,
m: usize,
eval: &mut impl FnMut(&[F]) -> Result<F, E>,
) -> Result<Self, E> {
let n = x0.len();
assert!(n >= 1, "NEWUOA model needs n >= 1");
assert!(
m > 2 * n,
"NEWUOA model init currently requires m >= 2n+1 (got m={m}, n={n}); \
the n+2 <= m <= 2n partial-model case is not yet implemented"
);
assert!(
m <= (n + 1) * (n + 2) / 2,
"NEWUOA model requires m <= (n+1)(n+2)/2 (got m={m}, n={n})"
);
let two = F::from_f64(2.0).expect("2.0 representable");
let half = F::from_f64(0.5).expect("0.5 representable");
let rho = rho_beg;
let rho2 = rho * rho;
// --- 1. Interpolation points (displacements from x0) + objective. ---
// (eq 3.2): point 0 = x0; points 1..=n = x0 + ρ e_k; n+1..=2n = x0 − ρ e_k.
let mut xpt = DenseMatrix::from_fn(m, n, |_, _| F::zero());
let mut fval = vec![F::zero(); m];
fval[0] = eval(x0.as_slice())?;
for k in 0..n {
// +ρ e_k → point (k+1).
xpt.set(k + 1, k, rho);
let mut xp = x0.clone();
xp[k] = xp[k] + rho;
fval[k + 1] = eval(xp.as_slice())?;
// −ρ e_k → point (k+1+n).
xpt.set(k + 1 + n, k, -rho);
let mut xm = x0.clone();
xm[k] = xm[k] - rho;
fval[k + 1 + n] = eval(xm.as_slice())?;
}
// (eq 3.4): σ_k = −1 if F(x0 − ρ e_k) < F(x0 + ρ e_k), else +1. Biases
// the paired off-diagonal points towards smaller objective values.
let mut sigma = vec![F::one(); n];
for k in 0..n {
if fval[k + 1 + n] < fval[k + 1] {
sigma[k] = -F::one();
}
}
// --- 2. Paired points for the off-diagonal Hessian (eq 3.3), m>2n+1. ---
// For each extra point store (ip, p, q, p̂, q̂) (all 0-based), where p,q
// are coordinates and p̂,q̂ the point indices of x0+σ_p ρ e_p and
// x0+σ_q ρ e_q (eq 3.19) — reused by both the Γ off-diagonal and the
// Ω-factorization column (eq 3.20).
let mut extras: Vec<(usize, usize, usize, usize, usize)> = Vec::new();
for ip in (2 * n + 1)..m {
// Powell's p,q selection rule (§3), converted to 0-based point index.
let jdiv = (ip - n - 1) / n; // ≥ 1 since ip ≥ 2n+1
let p1 = ip - n - jdiv * n; // 1-based coordinate in [1, n]
let mut q1 = p1 + jdiv;
if q1 > n {
q1 -= n;
}
let p_idx = p1 - 1;
let q_idx = q1 - 1;
// p̂: point index of x0 + σ_p ρ e_p (eq 3.19); +ρ ⇒ point p1, −ρ ⇒ p1+n.
let phat = if sigma[p_idx] > F::zero() { p1 } else { p1 + n };
let qhat = if sigma[q_idx] > F::zero() { q1 } else { q1 + n };
xpt.set(ip, p_idx, sigma[p_idx] * rho);
xpt.set(ip, q_idx, sigma[q_idx] * rho);
let mut x = x0.clone();
x[p_idx] = x[p_idx] + sigma[p_idx] * rho;
x[q_idx] = x[q_idx] + sigma[q_idx] * rho;
fval[ip] = eval(x.as_slice())?;
extras.push((ip, p_idx, q_idx, phat, qhat));
}
// --- 3. Model gradient and Hessian (the explicit part Γ; γⱼ ≡ 0). ---
let f0 = fval[0];
let mut gq = vec![F::zero(); n];
let mut gamma_explicit = DenseMatrix::from_fn(n, n, |_, _| F::zero());
// Diagonal + gradient from the central differences of the coordinate
// cross (the first 2n+1 points determine these uniquely, §3).
for k in 0..n {
let fp = fval[k + 1];
let fm = fval[k + 1 + n];
gq[k] = (fp - fm) / (two * rho);
gamma_explicit.set(k, k, (fp + fm - two * f0) / rho2);
}
// Off-diagonals from the paired points (eq 3.5):
// (∇²Q)_{pq} = σ_p σ_q ρ⁻² (F(x0) − F(p̂) − F(q̂) + F(paired)).
for &(ip, p_idx, q_idx, phat, qhat) in &extras {
let val =
sigma[p_idx] * sigma[q_idx] * (f0 - fval[phat] - fval[qhat] + fval[ip]) / rho2;
gamma_explicit.set(p_idx, q_idx, val);
gamma_explicit.set(q_idx, p_idx, val);
}
let gamma = vec![F::zero(); m];
// --- 4. Factored inverse-KKT H (suppressed (m+1)-th row/col). ---
let npt_minus = m - n - 1;
let mut bmat_xi = DenseMatrix::from_fn(n, m, |_, _| F::zero());
// Υ is identically zero for m ≥ 2n+1 (eq 3.16).
let bmat_ups = DenseMatrix::from_fn(n, n, |_, _| F::zero());
let mut zmat = DenseMatrix::from_fn(m, npt_minus, |_, _| F::zero());
let zsign = vec![F::one(); npt_minus]; // all +1 initially (eq 3.17)
// Ξ rows 2..n+1 of the full (n+1)×m matrix become rows 0..n of the
// suppressed bmat_xi (eq 3.14): Ξ_{r, r+1} = 1/(2ρ), Ξ_{r, r+1+n} = −1/(2ρ).
let inv_2rho = F::one() / (two * rho);
for r in 0..n {
bmat_xi.set(r, r + 1, inv_2rho);
bmat_xi.set(r, r + 1 + n, -inv_2rho);
}
// Ω-factorization diagonal columns (eq 3.18), columns 0..n−1:
// Z_{0,k} = −√2 ρ⁻², Z_{k+1,k} = Z_{k+1+n,k} = ½√2 ρ⁻².
let inv_rho2 = F::one() / rho2;
let sqrt2_over_rho2 = two.sqrt() * inv_rho2;
let half_sqrt2_over_rho2 = half * sqrt2_over_rho2;
for kk in 0..n {
zmat.set(0, kk, -sqrt2_over_rho2);
zmat.set(kk + 1, kk, half_sqrt2_over_rho2);
zmat.set(kk + 1 + n, kk, half_sqrt2_over_rho2);
}
// Off-diagonal columns (eq 3.20), one per extra point, column kk = ip−n−1:
// Z_{0,k} = ρ⁻², Z_{p̂,k} = Z_{q̂,k} = −ρ⁻², Z_{ip,k} = ρ⁻².
for &(ip, _p_idx, _q_idx, phat, qhat) in &extras {
let kk = ip - n - 1;
zmat.set(0, kk, inv_rho2);
zmat.set(phat, kk, -inv_rho2);
zmat.set(qhat, kk, -inv_rho2);
zmat.set(ip, kk, inv_rho2);
}
// --- 5. Best interpolation point (eq "opt"). ---
let mut kopt = 0;
for j in 1..m {
if fval[j] < fval[kopt] {
kopt = j;
}
}
Ok(QuadraticModel {
n,
m,
x0,
xpt,
fval,
kopt,
gq,
gamma_explicit,
gamma,
bmat_xi,
bmat_ups,
zmat,
zsign,
})
}
/// Infallible convenience wrapper around [`try_initialize`](Self::try_initialize)
/// for an objective that cannot fail (the standalone driver and the tests).
pub(crate) fn initialize(x0: Vec<F>, rho_beg: F, m: usize, f: impl Fn(&[F]) -> F) -> Self {
Self::try_initialize::<core::convert::Infallible>(x0, rho_beg, m, &mut |x| Ok(f(x)))
.expect("infallible objective")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::solver::powell::kkt::assert_h_matches_inverse;
/// T1: n=1, m=3 (=2n+1). An exact 1-D quadratic `F = a + b·x + ½c·x²` must
/// be recovered: `∇Q(x0) = b + c·x0`, `Γ = [c]`, `γ ≡ 0`, and
/// `eval_change(d)` reproduces `F(x0+d) − F(x0)` exactly.
#[test]
fn n1_recovers_exact_quadratic() {
let (a, b, c) = (1.5, -0.75, 2.25);
let x0v = 0.5;
let f = |x: &[f64]| a + b * x[0] + 0.5 * c * x[0] * x[0];
let model = QuadraticModel::initialize(vec![x0v], 0.1, 3, f);
assert!((model.gradient()[0] - (b + c * x0v)).abs() < 1e-12);
assert!((model.gamma_explicit.get(0, 0) - c).abs() < 1e-12);
assert!(model.gamma.iter().all(|&g| g == 0.0));
for d in [0.3, -1.7, 0.0, 2.0] {
let expected = f(&[x0v + d]) - f(&[x0v]);
assert!(
(model.eval_change(&[d]) - expected).abs() < 1e-12,
"eval_change({d})"
);
}
}
/// T2: n=2, m=5 (=2n+1). A separable quadratic must give `Γ = diag(c1,c2)`,
/// zero off-diagonals, `γ ≡ 0`, and `∇Q(x0) = b + diag(c)·x0`.
#[test]
fn n2_separable_quadratic_is_diagonal() {
let (c1, c2) = (3.0, 5.0);
let b = [1.0, -2.0];
let x0v = [0.25, -0.5];
let f = |x: &[f64]| 0.5 * (c1 * x[0] * x[0] + c2 * x[1] * x[1]) + b[0] * x[0] + b[1] * x[1];
let model = QuadraticModel::initialize(x0v.to_vec(), 0.2, 5, f);
assert!((model.gamma_explicit.get(0, 0) - c1).abs() < 1e-12);
assert!((model.gamma_explicit.get(1, 1) - c2).abs() < 1e-12);
assert!(model.gamma_explicit.get(0, 1).abs() < 1e-12);
assert!(model.gamma_explicit.get(1, 0).abs() < 1e-12);
assert!(model.gamma.iter().all(|&g| g == 0.0));
assert!((model.gradient()[0] - (b[0] + c1 * x0v[0])).abs() < 1e-12);
assert!((model.gradient()[1] - (b[1] + c2 * x0v[1])).abs() < 1e-12);
}
/// T3: n=2, m=6 (>2n+1). The single off-diagonal pair must recover the cross
/// term `G12` via the paired point (eq 3.5), with the σ sign rule exercised:
/// `x0 = 0`, `b = (1, 2)` makes the gradient positive in both coordinates,
/// so `σ_1 = σ_2 = −1` (the `−ρ e_k` points are lower).
#[test]
fn n2_recovers_off_diagonal_cross_term() {
let g = [[4.0, 1.5], [1.5, 3.0]];
let b = [1.0, 2.0];
let f = |x: &[f64]| {
0.5 * (g[0][0] * x[0] * x[0] + 2.0 * g[0][1] * x[0] * x[1] + g[1][1] * x[1] * x[1])
+ b[0] * x[0]
+ b[1] * x[1]
};
let model = QuadraticModel::initialize(vec![0.0, 0.0], 0.3, 6, f);
assert!(
(model.gamma_explicit.get(0, 1) - g[0][1]).abs() < 1e-12,
"off-diag = {}",
model.gamma_explicit.get(0, 1)
);
assert!((model.gamma_explicit.get(1, 0) - g[0][1]).abs() < 1e-12);
// Diagonals still exact.
assert!((model.gamma_explicit.get(0, 0) - g[0][0]).abs() < 1e-12);
assert!((model.gamma_explicit.get(1, 1) - g[1][1]).abs() < 1e-12);
}
/// T4: the master KKT identity — the stored `Ω`/`Ξ`/`Υ` blocks must equal
/// the corresponding blocks of `inv(W)` after INIT, validating eqs 3.13–3.20
/// across `m = 2n+1` (n=1, n=2) and `m > 2n+1` (n=2).
#[test]
fn init_satisfies_kkt_identity() {
// n=1, m=3.
let m1 = QuadraticModel::initialize(vec![0.5], 0.1, 3, |x: &[f64]| {
1.0 + 0.3 * x[0] + 0.5 * x[0] * x[0]
});
assert_h_matches_inverse(&m1, 1e-10);
// n=2, m=5.
let m2 = QuadraticModel::initialize(vec![0.25, -0.5], 0.2, 5, |x: &[f64]| {
x[0] * x[0] + 2.0 * x[1] * x[1] + x[0] - x[1]
});
assert_h_matches_inverse(&m2, 1e-10);
// n=2, m=6 (one off-diagonal pair).
let m3 = QuadraticModel::initialize(vec![0.0, 0.0], 0.3, 6, |x: &[f64]| {
2.0 * x[0] * x[0] + 1.5 * x[0] * x[1] + 1.5 * x[1] * x[1] + x[0] + 2.0 * x[1]
});
assert_h_matches_inverse(&m3, 1e-10);
}
/// T5: the model must interpolate `F` at every interpolation point. With no
/// stored constant, this is `eval_change(xⱼ − x0) = F(xⱼ) − F(x0)`.
#[test]
fn init_interpolates_at_all_points() {
let f = |x: &[f64]| {
2.0 * x[0] * x[0] + 1.5 * x[0] * x[1] + 3.0 * x[1] * x[1] + x[0] - 2.0 * x[1] + 0.7
};
let x0v = vec![0.4, -0.3];
let model = QuadraticModel::initialize(x0v.clone(), 0.25, 6, f);
let f0 = f(x0v.as_slice());
for j in 0..model.m() {
let disp = model.xpt_row(j).to_vec();
let xj: Vec<f64> = x0v.iter().zip(&disp).map(|(a, b)| a + b).collect();
let expected = f(xj.as_slice()) - f0;
assert!(
(model.eval_change(&disp) - expected).abs() < 1e-10,
"interpolation at point {j}: got {}, want {expected}",
model.eval_change(&disp)
);
}
}
}