basin 1.2.0

Numerical optimization in pure Rust, with pluggable linear-algebra backends and WASM support.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
//! BIGLAG: the NEWUOA geometry-improving step (Powell 2006, §6).
//!
//! When the trust-region iterations leave an interpolation point `x_t` far from
//! `x_opt`, the model degrades. BIGLAG repairs it by choosing a replacement
//! `x⁺ = x_opt + d` that approximately solves
//!
//! ```text
//! maximize  |ℓ_t(x_opt + d)|   subject to  ‖d‖ ≤ Δ̄ ,
//! ```
//!
//! where `ℓ_t` is the `t`-th Lagrange function of the current model (eqs.
//! 6.1–6.6). Because `τ = ℓ_t(x_opt + d)` enters the update denominator
//! `σ = αβ + τ²` (eq. 6.5), a large `|ℓ_t|` keeps the least-Frobenius `H`-update
//! well-conditioned, which is the whole point of the geometry step.
//!
//! The method (eqs. 6.7–6.16) is the same 2-D circular search that TRSAPP uses
//! on the trust-region boundary ([`super::trsapp`], eqs. 5.16–5.19): each
//! iteration maximizes `|ℓ_t(x_opt + d(θ))|` over `d(θ) = cosθ d_{j-1} + sinθ
//! s_j` with `s_j ⟂ d_{j-1}`, `‖d_{j-1}‖ = ‖s_j‖ = Δ̄`. The Lagrange function's
//! gradient and Hessian come from its coefficients
//! ([`QuadraticModel::lagrange_coeffs`]) without ever forming `∇²ℓ_t` (eq. 6.15).
//!
//! # BIGDEN fallback
//!
//! Powell rejects BIGLAG's `d` when `|σ| < 0.8 τ²` (eq. 6.17) and falls back to
//! [`bigden`](QuadraticModel::bigden), which maximizes the denominator `|σ|`
//! directly. The driver ([`super::driver`]) applies this test on the
//! recomputed update scalars after each BIGLAG step and substitutes BIGDEN's `d`
//! when it fires — a rare event in practice (Powell 2006, §6).

use crate::core::math::Scalar;
use crate::solver::powell::QuadraticModel;

/// Result of one BIGLAG solve (Powell 2006, §6).
pub(crate) struct BiglagResult<F = f64> {
    /// The geometry step `d`, length `n`, expressed **relative to `x_opt`** (the
    /// new interpolation point is `x_opt + d`). Same convention as
    /// [`super::trsapp::TrsappResult::d`].
    pub(crate) d: Vec<F>,
    /// `τ = ℓ_t(x_opt + d)` — the Lagrange-function value at the new point. The
    /// driver does not screen it directly: it recomputes `τ`/`σ` from the §4
    /// update (`prepare_update` / `update_params`) and applies the eq. 6.17
    /// BIGDEN test there. This field is informational.
    pub(crate) tau: F,
}

impl<F: Scalar> QuadraticModel<F> {
    /// Approximately maximize `|ℓ_t(x_opt + d)|` over `‖d‖ ≤ delta_bar` by the
    /// 2-D circular search of Powell 2006, §6 (subroutine BIGLAG).
    ///
    /// `t` is the interpolation index to be replaced (chosen by the driver's
    /// Box-7 "furthest point" rule); `delta_bar` is the bound `Δ̄` of eq. 6.16.
    ///
    /// # Panics
    ///
    /// Panics unless `delta_bar > 0` and `t < m`.
    pub(crate) fn biglag(&self, t: usize, delta_bar: F) -> BiglagResult<F> {
        assert!(delta_bar > F::zero(), "biglag: delta_bar must be positive");
        assert!(t < self.m(), "biglag: t must index an interpolation point");

        let n = self.n();
        let half = F::from_f64(0.5).expect("0.5 representable");
        let two = F::from_f64(2.0).expect("2.0 representable");

        // ℓ_t coefficients: g = ∇ℓ_t(x0), λ the implicit-Hessian coefficients
        // (Powell 2006, eqs. 6.2, 6.15). ∇²ℓ_t·u = lagrange_hessian_matvec(λ, u).
        let (g_x0, lambda) = self.lagrange_coeffs(t);
        let xopt = self.xpt_row(self.kopt()).to_vec();
        // ℓ_t(x_opt) = δ_(opt)t by the Lagrange conditions (eq. 6.1).
        let base = if t == self.kopt() {
            F::one()
        } else {
            F::zero()
        };
        // ∇ℓ_t(x_opt) = ∇ℓ_t(x0) + ∇²ℓ_t (x_opt − x0) (eq. 6.11 differentiated).
        let h_xopt = self.lagrange_hessian_matvec(&lambda, &xopt);
        let grad_opt: Vec<F> = (0..n).map(|i| g_x0[i] + h_xopt[i]).collect();

        // ℓ_t(x_opt + d) via eq. 6.11, shifted by the constant ℓ_t(x_opt).
        let ell = |d: &[F]| -> F {
            let hd = self.lagrange_hessian_matvec(&lambda, d);
            base + dot(d, &grad_opt) + half * dot(d, &hd)
        };

        // ---- d_0 (eq. 6.9): ±Δ̄ (x_t − x_opt)/‖x_t − x_opt‖, sign for larger |ℓ|.
        let xt = self.xpt_row(t).to_vec();
        let mut u0: Vec<F> = (0..n).map(|i| xt[i] - xopt[i]).collect();
        let mut u0_norm = norm(&u0);
        if u0_norm <= F::zero() {
            // x_t coincides with x_opt (the driver never picks t = opt, but guard
            // anyway): fall back to the Lagrange gradient as the search ray.
            u0 = grad_opt.clone();
            u0_norm = norm(&u0);
            if u0_norm <= F::zero() {
                return BiglagResult {
                    d: vec![F::zero(); n],
                    tau: base,
                };
            }
        }
        let scale0 = delta_bar / u0_norm;
        let dpos: Vec<F> = (0..n).map(|i| scale0 * u0[i]).collect();
        let ell_pos = ell(&dpos);
        let ell_neg = ell(&dpos.iter().map(|x| -*x).collect::<Vec<_>>());
        let mut d: Vec<F> = if ell_pos.abs() >= ell_neg.abs() {
            dpos
        } else {
            dpos.iter().map(|x| -*x).collect()
        };
        let mut ell_d = ell(&d); // ℓ_t(x_opt + d), signed
        // ∇ℓ_t(x_opt + d) = ∇ℓ_t(x_opt) + ∇²ℓ_t d.
        let hd0 = self.lagrange_hessian_matvec(&lambda, &d);
        let mut grad_cur: Vec<F> = (0..n).map(|i| grad_opt[i] + hd0[i]).collect();

        let delta_sq = delta_bar * delta_bar;
        // Near-degenerate cutoff for s_j ‖orthogonal component‖² (eq. 6.12):
        // the orthogonal part vanishes ⇔ ∇ℓ_t ∥ d, and BIGLAG returns d_{j-1}.
        let degen = F::from_f64(1e-8).expect("1e-8 representable");

        // ---- Iterations j = 1..n (eqs. 6.7–6.14). ----
        for j in 0..n {
            let dd = dot(&d, &d); // ≈ Δ̄²

            // Choose the gradient that spans the 2-D subspace with d_{j-1}.
            // j = 0 (the first iteration) may use ∇ℓ_t(x_opt) instead of
            // ∇ℓ_t(x_opt + d_0) when both eq. 6.10 conditions hold.
            let gsrc: &[F] = if j == 0 {
                let dg = dot(&d, &grad_opt);
                let gg = dot(&grad_opt, &grad_opt);
                let cond_i = dg * dg <= from::<F>(0.99) * delta_sq * gg; // (6.10a)
                let cond_ii = gg.sqrt() * delta_bar >= from::<F>(0.1) * ell_d.abs(); // (6.10b)
                if cond_i && cond_ii {
                    &grad_opt
                } else {
                    &grad_cur
                }
            } else {
                &grad_cur
            };

            // s_raw = gsrc − (dᵀgsrc/dd) d  (the component of gsrc ⟂ d, eq. 6.8).
            let coef = dot(&d, gsrc) / dd;
            let s_raw: Vec<F> = (0..n).map(|i| gsrc[i] - coef * d[i]).collect();
            let s_raw_sq = dot(&s_raw, &s_raw);
            let gsrc_sq = dot(gsrc, gsrc);
            if s_raw_sq <= degen * gsrc_sq {
                break; // (6.12): ∇ℓ_t ∥ d, no useful search direction.
            }

            // Rescale s ⟂ d, ‖s‖ = Δ̄.
            let s_scale = delta_bar / s_raw_sq.sqrt();
            let s: Vec<F> = (0..n).map(|i| s_scale * s_raw[i]).collect();
            let hs = self.lagrange_hessian_matvec(&lambda, &s);

            // ℓ_t(x_opt + d(θ)) with d(θ) = cosθ d + sinθ s. Using
            // ∇²ℓ_t d = ∇ℓ_t(x_opt + d) − ∇ℓ_t(x_opt) = grad_cur − grad_opt:
            let hd: Vec<F> = (0..n).map(|i| grad_cur[i] - grad_opt[i]).collect();
            let d_g0 = dot(&d, &grad_opt);
            let s_g0 = dot(&s, &grad_opt);
            let d_hd = dot(&d, &hd);
            let s_hd = dot(&s, &hd); // = dᵀ∇²ℓ_t s by symmetry
            let s_hs = dot(&s, &hs);
            let ell_of = |theta: F| -> F {
                let c = theta.cos();
                let sn = theta.sin();
                base + c * d_g0
                    + sn * s_g0
                    + half * (c * c * d_hd + two * sn * c * s_hd + sn * sn * s_hs)
            };

            // Maximize |ℓ_t(x_opt + d(θ))| over θ ∈ [0, 2π): the θ = 0 incumbent
            // plus NTHETA = 49 interior nodes on a 2π/(NTHETA+1) grid (50 distinct
            // angles, none at 2π ≡ 0 — matching PRIMA's biglag.f `TWOPI/(IU+1)`),
            // then a 3-point parabolic refinement, mirroring TRSAPP's boundary
            // sweep (trsapp.rs) but on the modulus rather than the model value.
            const NTHETA: usize = 49;
            let pi = F::from_f64(core::f64::consts::PI).expect("π representable");
            let step =
                (pi + pi) / F::from_f64((NTHETA + 1) as f64).expect("NTHETA+1 representable");
            let mut best_theta = F::zero();
            let mut best_abs = ell_of(F::zero()).abs(); // θ = 0 ⇒ d_{j-1}
            for k in 1..=NTHETA {
                let theta = step * F::from_f64(k as f64).expect("k representable");
                let av = ell_of(theta).abs();
                if av > best_abs {
                    best_abs = av;
                    best_theta = theta;
                }
            }
            // Parabolic refinement around the best node (concave-down guard).
            let am = ell_of(best_theta - step).abs();
            let ap = ell_of(best_theta + step).abs();
            let denom = am - (best_abs + best_abs) + ap;
            let theta_star = if denom < F::zero() {
                best_theta + half * (am - ap) / denom * step
            } else {
                best_theta
            };

            // Commit d_j = cosθ* d + sinθ* s and its Lagrange value. The refined
            // angle is committed unconditionally (no `max(best_abs)` guard, unlike
            // TRSAPP's defensive sweep) — faithful to PRIMA's biglag.f, where the
            // |ℓ| no-decrease guarantee rests on the θ = 0 seed being reachable and
            // the parabola being seeded at the grid maximum.
            let c = theta_star.cos();
            let sn = theta_star.sin();
            let d_new: Vec<F> = (0..n).map(|i| c * d[i] + sn * s[i]).collect();
            let ell_new = ell_of(theta_star);
            let ell_prev_abs = ell_d.abs();
            d = d_new;
            ell_d = ell_new;

            // ∇ℓ_t(x_opt + d_j) via the recurrence (eq. 6.14).
            grad_cur = (0..n)
                .map(|i| (F::one() - c) * grad_opt[i] + c * grad_cur[i] + sn * hs[i])
                .collect();

            // Termination (6.13): the iteration barely improved |ℓ_t|.
            let onep1 = from::<F>(1.1);
            if ell_d.abs() <= onep1 * ell_prev_abs {
                break;
            }
        }

        BiglagResult { d, tau: ell_d }
    }
}

/// Build an `F` from an `f64` literal (the constants are exactly representable).
fn from<F: Scalar>(x: f64) -> F {
    F::from_f64(x).expect("constant representable")
}

/// Plain dot product of two equal-length slices.
fn dot<F: Scalar>(a: &[F], b: &[F]) -> F {
    a.iter().zip(b).map(|(x, y)| *x * *y).sum()
}

/// Euclidean norm of `v`.
fn norm<F: Scalar>(v: &[F]) -> F {
    dot(v, v).sqrt()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::solver::powell::QuadraticModel;

    /// A smooth test objective whose model `initialize` can interpolate.
    fn quad(x: &[f64]) -> f64 {
        // Q(x) = (x0−1)² + 2(x1+2)² − x0 x1, a generic indefinite-free quadratic.
        (x[0] - 1.0).powi(2) + 2.0 * (x[1] + 2.0).powi(2) - x[0] * x[1]
    }

    fn rosen(x: &[f64]) -> f64 {
        (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0] * x[0]).powi(2)
    }

    /// Evaluate ℓ_t at an absolute interpolation point x_i using the
    /// coefficients, to check the Lagrange conditions ℓ_t(x_i) = δ_it (eq. 6.1).
    fn ell_at_point<F: crate::core::math::Scalar>(
        model: &QuadraticModel<F>,
        t: usize,
        i: usize,
    ) -> F {
        let (g_x0, lambda) = model.lagrange_coeffs(t);
        // ℓ_t(x0 + s) − ℓ_t(x0) = sᵀg + ½ sᵀ∇²ℓ_t s with s = x_i − x0.
        let s = model.xpt_row(i).to_vec();
        let hs = model.lagrange_hessian_matvec(&lambda, &s);
        let half = F::from_f64(0.5).unwrap();
        // Reference to point opt: ℓ_t(x_i) − ℓ_t(x_opt). With ℓ_t(x_opt)=δ_(opt)t.
        let base = if t == model.kopt() {
            F::one()
        } else {
            F::zero()
        };
        let xopt = model.xpt_row(model.kopt()).to_vec();
        let hxopt = model.lagrange_hessian_matvec(&lambda, &xopt);
        let val_i = dot(&s, &g_x0) + half * dot(&s, &hs);
        let val_opt = dot(&xopt, &g_x0) + half * dot(&xopt, &hxopt);
        base + val_i - val_opt
    }

    /// The coefficients from `lagrange_coeffs` must reproduce the defining
    /// Lagrange conditions `ℓ_t(x_i) = δ_it` at every interpolation point.
    #[test]
    fn lagrange_conditions_hold() {
        let model = QuadraticModel::initialize(vec![0.3, -0.7], 0.5, 5, &quad);
        let m = model.m();
        for t in 0..m {
            for i in 0..m {
                let want = if i == t { 1.0 } else { 0.0 };
                let got = ell_at_point(&model, t, i);
                assert!(
                    (got - want).abs() < 1e-9,
                    "ℓ_{t}(x_{i}) = {got}, want {want}"
                );
            }
        }
    }

    /// BIGLAG must (a) respect the bound ‖d‖ ≤ Δ̄ with the bound essentially
    /// active, and (b) not decrease |ℓ_t| below the initial ray d_0 (eq. 6.9).
    #[test]
    fn improves_over_initial_ray_and_respects_bound() {
        let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
        let kopt = model.kopt();
        let delta_bar = 0.4;
        // Pick the interpolation point furthest from x_opt (the driver's Box 7).
        let xopt = model.xpt_row(kopt).to_vec();
        let (t, _) = (0..model.m())
            .map(|j| {
                let r = model.xpt_row(j);
                let dist: f64 = (0..2).map(|i| (r[i] - xopt[i]).powi(2)).sum::<f64>().sqrt();
                (j, dist)
            })
            .filter(|(j, _)| *j != kopt)
            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
            .unwrap();

        // |ℓ_t| on the raw initial ray, for the improvement comparison.
        let (g_x0, lambda) = model.lagrange_coeffs(t);
        let h_xopt = model.lagrange_hessian_matvec(&lambda, &xopt);
        let grad_opt: Vec<f64> = (0..2).map(|i| g_x0[i] + h_xopt[i]).collect();
        let xt = model.xpt_row(t).to_vec();
        let mut u0: Vec<f64> = (0..2).map(|i| xt[i] - xopt[i]).collect();
        let nrm: f64 = u0.iter().map(|x| x * x).sum::<f64>().sqrt();
        for v in &mut u0 {
            *v *= delta_bar / nrm;
        }
        let ell_d0 = {
            let hd = model.lagrange_hessian_matvec(&lambda, &u0);
            dot(&u0, &grad_opt) + 0.5 * dot(&u0, &hd)
        };

        let res = model.biglag(t, delta_bar);
        let dnorm: f64 = res.d.iter().map(|x| x * x).sum::<f64>().sqrt();
        assert!(dnorm <= delta_bar * (1.0 + 1e-9), "‖d‖ = {dnorm} > Δ̄");
        assert!(
            dnorm >= 0.99 * delta_bar,
            "‖d‖ = {dnorm} ≪ Δ̄ (bound inactive)"
        );
        assert!(
            res.tau.abs() >= ell_d0.abs() - 1e-12,
            "|ℓ_t| not improved: {} < {}",
            res.tau.abs(),
            ell_d0.abs()
        );
    }

    /// BIGLAG must find a `|ℓ_t|` close to the brute-force maximum over the
    /// trust-region disk (it should not leave a much better point on the table).
    #[test]
    fn near_optimal_vs_brute_force() {
        let model = QuadraticModel::initialize(vec![0.4, -0.6, 0.2], 0.5, 7, &|x: &[f64]| {
            (0..3)
                .map(|i| (x[i] - 0.5 * (i as f64)).powi(2))
                .sum::<f64>()
                + 0.3 * x[0] * x[1]
                - 0.2 * x[1] * x[2]
        });
        let n = 3;
        let kopt = model.kopt();
        let xopt = model.xpt_row(kopt).to_vec();
        let delta_bar = 0.3;
        for t in 0..model.m() {
            if t == kopt {
                continue;
            }
            let (g_x0, lambda) = model.lagrange_coeffs(t);
            let h_xopt = model.lagrange_hessian_matvec(&lambda, &xopt);
            let grad_opt: Vec<f64> = (0..n).map(|i| g_x0[i] + h_xopt[i]).collect();
            let ell = |d: &[f64]| -> f64 {
                let hd = model.lagrange_hessian_matvec(&lambda, d);
                dot(d, &grad_opt) + 0.5 * dot(d, &hd)
            };
            // Brute force over a spherical grid of radius delta_bar.
            let mut brute = 0.0_f64;
            let steps = 40;
            for a in 0..steps {
                let theta = std::f64::consts::PI * (a as f64) / (steps as f64);
                for b in 0..(2 * steps) {
                    let phi = std::f64::consts::PI * (b as f64) / (steps as f64);
                    let d = [
                        delta_bar * theta.sin() * phi.cos(),
                        delta_bar * theta.sin() * phi.sin(),
                        delta_bar * theta.cos(),
                    ];
                    brute = brute.max(ell(&d).abs());
                }
            }
            let res = model.biglag(t, delta_bar);
            assert!(
                res.tau.abs() >= 0.9 * brute,
                "t={t}: biglag |ℓ_t|={} < 0.9 * brute {brute}",
                res.tau.abs()
            );
        }
    }

    /// After BIGLAG, swapping x_t for x_opt + d gives a non-negligible update
    /// denominator σ (the geometry step's purpose: keep the H-update well-posed).
    #[test]
    fn produces_well_conditioned_replacement() {
        let model = QuadraticModel::initialize(vec![-1.2, 1.0], 0.5, 5, &rosen);
        let kopt = model.kopt();
        let xopt = model.xpt_row(kopt).to_vec();
        let (t, _) = (0..model.m())
            .filter(|j| *j != kopt)
            .map(|j| {
                let r = model.xpt_row(j);
                let dist: f64 = (0..2).map(|i| (r[i] - xopt[i]).powi(2)).sum::<f64>().sqrt();
                (j, dist)
            })
            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
            .unwrap();
        let res = model.biglag(t, 0.4);
        let xnew: Vec<f64> = (0..2).map(|i| xopt[i] + res.d[i]).collect();
        let ctx = model.prepare_update(&xnew);
        let sc = model.update_params(t, &ctx);
        assert!(sc.sigma.abs() > 1e-6, "σ = {} too small", sc.sigma);
    }
}