Skip to main content

pounce_algorithm/hess/
lim_mem_quasi_newton.rs

1//! Limited-memory quasi-Newton (L-BFGS / SR1) — port of
2//! `Algorithm/IpLimMemQuasiNewtonUpdater.{hpp,cpp}`. **Phase 8.**
3//!
4//! Update strategy is selected by the `limited_memory_update_type`
5//! option (`bfgs` or `sr1`) per `MAIN_LOOP.md`.
6//!
7//! Phase 8 publishes the limited-memory Hessian as `data.w` via the
8//! **low-rank** assembler, for every problem size. At each
9//! `update_hessian` call we walk the curvature-pair history (oldest to
10//! newest) applying the rank-2 BFGS / rank-1 SR1 formulas to build the
11//! compact factors of `B = σ I + V Vᵀ − U Uᵀ`, then publish a
12//! [`pounce_linalg::low_rank_update_sym_matrix::LowRankUpdateSymMatrix`]
13//! as `data.w`. No dense `n×n` buffer is ever formed: the walk is
14//! `O(n · m)` per pair and `O(n · m²)` total (with `m = max_history`),
15//! and storage is `O(n · m)`, so the limited-memory path scales to
16//! arbitrarily large `n`. [`crate::kkt::low_rank_aug_system_solver`]
17//! applies the Hessian's inverse action via the Sherman-Morrison-Woodbury
18//! identity, factorizing only the diagonal `B0`. This removes the
19//! `eval_h` requirement (the user no longer needs to declare a Hessian
20//! sparsity pattern) and the former `O(n²)` memory cliff.
21//!
22//! `LowRankAugSystemSolver` wraps the standard augmented-system solver and
23//! forwards the Hessian-free init / equality-multiplier solves (which
24//! carry a non-low-rank `W`) straight through, so a single solver
25//! instance serves the whole iteration.
26//!
27//! Update kernels:
28//!   - [`initial_hessian_scalar`] (sigma per `LIM_MEM_INIT`)
29//!   - [`powell_damping_theta`] (modified-y damping for BFGS)
30//!   - [`bfgs_curvature_pair_ok`] (skip-criterion for L-BFGS)
31//!   - [`sr1_denominator_ok`] (skip-criterion for SR1)
32
33use crate::hess::r#trait::HessianUpdater;
34use crate::ipopt_cq::IpoptCqHandle;
35use crate::ipopt_data::IpoptDataHandle;
36use pounce_common::types::{Index, Number};
37use pounce_linalg::compound_vector::CompoundVector;
38use pounce_linalg::dense_vector::{DenseVector, DenseVectorSpace};
39use pounce_linalg::low_rank_update_sym_matrix::LowRankUpdateSymMatrixSpace;
40use pounce_linalg::multi_vector_matrix::{MultiVectorMatrix, MultiVectorMatrixSpace};
41use pounce_linalg::Vector;
42use std::rc::Rc;
43
44/// One curvature pair `(s, y)` plus the cached `||s||`, `||y||`, `s·y`
45/// scalars the BFGS / SR1 update kernels need on every history walk.
46#[derive(Debug, Clone)]
47pub struct CurvaturePair {
48    pub s: Rc<dyn Vector>,
49    pub y: Rc<dyn Vector>,
50    pub s_dot_y: Number,
51    pub s_norm: Number,
52    pub y_norm: Number,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum UpdateType {
57    Bfgs,
58    Sr1,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum InitialApprox {
63    Identity,
64    Scalar1,
65    Scalar2,
66}
67
68pub struct LimMemQuasiNewtonUpdater {
69    pub update_type: UpdateType,
70    pub initial_approx: InitialApprox,
71    pub max_history: i32,
72    /// Powell-damping threshold. Default per upstream
73    /// `IpLimMemQuasiNewtonUpdater.cpp:RegisterOptions`:
74    /// `limited_memory_init_val_max=1e8` (clamp on initial sigma);
75    /// the damping coefficient is hard-coded at 0.2 in the BFGS path.
76    pub init_val_max: Number,
77    pub init_val_min: Number,
78    /// Rolling FIFO of curvature pairs, oldest at index 0. Capped at
79    /// `max_history`; insertion drops the front.
80    pub history: Vec<CurvaturePair>,
81    /// `x` from the previous `update_hessian` call. None on the first
82    /// iteration.
83    pub last_x: Option<Rc<dyn Vector>>,
84    /// `∇f(x_prev)` cached for the upstream y-difference formula
85    /// (`IpLimMemQuasiNewtonUpdater.cpp:284`).
86    pub last_grad_f: Option<Rc<dyn Vector>>,
87    /// `J_c(x_prev)` cached for `J_c_prev^T · y_c_curr` in the
88    /// y-difference. Stored as the trait object so we can call
89    /// `trans_mult_vector` against the *current* multipliers — the
90    /// upstream formula evaluates both Jacobians against `y_c_curr`
91    /// (NOT `y_c_prev`).
92    pub last_jac_c: Option<Rc<dyn pounce_linalg::matrix::Matrix>>,
93    pub last_jac_d: Option<Rc<dyn pounce_linalg::matrix::Matrix>>,
94}
95
96impl Default for LimMemQuasiNewtonUpdater {
97    fn default() -> Self {
98        Self {
99            update_type: UpdateType::Bfgs,
100            initial_approx: InitialApprox::Scalar2,
101            max_history: 6,
102            init_val_max: 1e8,
103            init_val_min: 1e-8,
104            history: Vec::new(),
105            last_x: None,
106            last_grad_f: None,
107            last_jac_c: None,
108            last_jac_d: None,
109        }
110    }
111}
112
113impl LimMemQuasiNewtonUpdater {
114    pub fn new() -> Self {
115        Self::default()
116    }
117
118    /// Try to absorb a new curvature pair. Returns `true` when the
119    /// pair was accepted (and pushed to history), `false` when the
120    /// skip-criterion rejected it. The caller owns `s` and `y` as
121    /// `Rc<dyn Vector>` so the history can retain them cheaply.
122    ///
123    /// This matches the per-iteration path in
124    /// `IpLimMemQuasiNewtonUpdater.cpp:Update` after the `(x, ∇L)`
125    /// difference has been formed: skip-or-keep, then push, then
126    /// trim the history to `max_history`.
127    pub fn ingest_pair(&mut self, s: Rc<dyn Vector>, y: Rc<dyn Vector>) -> bool {
128        let s_dot_y = s.dot(&*y);
129        let s_norm = s.nrm2();
130        let y_norm = y.nrm2();
131        let accept = match self.update_type {
132            UpdateType::Bfgs => bfgs_curvature_pair_ok(s_dot_y, s_norm, y_norm),
133            UpdateType::Sr1 => {
134                // SR1's skip-criterion is `(y - Bs)^T s` not `s^T y`;
135                // without `B` available here we use the upstream
136                // fallback of `s^T y` magnitude as the gating heuristic
137                // (a more accurate test lands once the low-rank matrix
138                // is wired in).
139                sr1_denominator_ok(s_dot_y, s_norm, y_norm)
140            }
141        };
142        if !accept {
143            return false;
144        }
145        self.history.push(CurvaturePair {
146            s,
147            y,
148            s_dot_y,
149            s_norm,
150            y_norm,
151        });
152        // Drop oldest pairs to honor the memory budget.
153        while self.history.len() > self.max_history.max(0) as usize {
154            self.history.remove(0);
155        }
156        true
157    }
158}
159
160impl HessianUpdater for LimMemQuasiNewtonUpdater {
161    /// Snapshot the current `(x, ∇_x L)` pair, build `s = x − x_prev`
162    /// and `y = ∇L − ∇L_prev`, ingest into history (skip per the
163    /// BFGS / SR1 acceptance criterion), then build the low-rank factors
164    /// of `B = σ I + V Vᵀ − U Uᵀ` from the rolling history and publish a
165    /// [`pounce_linalg::low_rank_update_sym_matrix::LowRankUpdateSymMatrix`]
166    /// as `data.w`. Mirrors `IpLimMemQuasiNewtonUpdater::Update`.
167    fn update_hessian(&mut self, data: &IpoptDataHandle, cq: &IpoptCqHandle) -> bool {
168        let (curr_x, curr_y_c, curr_y_d) = match data.borrow().curr.as_ref() {
169            Some(c) => (c.x.clone(), c.y_c.clone(), c.y_d.clone()),
170            None => return true,
171        };
172        let curr_grad_f = cq.borrow().curr_grad_f();
173        let curr_jac_c = cq.borrow().curr_jac_c();
174        let curr_jac_d = cq.borrow().curr_jac_d();
175
176        // Upstream y formula (`IpLimMemQuasiNewtonUpdater.cpp:284-308`):
177        //   y = (∇f_curr − ∇f_last)
178        //     + (J_c_curr^T − J_c_last^T) · y_c_curr
179        //     + (J_d_curr^T − J_d_last^T) · y_d_curr
180        // i.e. the change in the *NLP* Lagrangian gradient (no bound
181        // multipliers) where BOTH Jacobians are dotted against the
182        // CURRENT y_c/y_d. Using `curr_grad_lag_x` here would inject
183        // the bound-multiplier delta into y, which collapses spuriously
184        // when μ drops and corrupts the BFGS update.
185        if let (Some(prev_x), Some(prev_grad_f), Some(prev_jac_c), Some(prev_jac_d)) = (
186            self.last_x.clone(),
187            self.last_grad_f.clone(),
188            self.last_jac_c.clone(),
189            self.last_jac_d.clone(),
190        ) {
191            let mut s = curr_x.make_new();
192            s.add_two_vectors(1.0, &*curr_x, -1.0, &*prev_x, 0.0);
193
194            let mut y = curr_x.make_new();
195            // y = ∇f_curr − ∇f_last
196            y.add_two_vectors(1.0, &*curr_grad_f, -1.0, &*prev_grad_f, 0.0);
197            // y += J_c_curr^T y_c_curr  −  J_c_last^T y_c_curr
198            curr_jac_c.trans_mult_vector(1.0, &*curr_y_c, 1.0, &mut *y);
199            prev_jac_c.trans_mult_vector(-1.0, &*curr_y_c, 1.0, &mut *y);
200            // y += J_d_curr^T y_d_curr  −  J_d_last^T y_d_curr
201            curr_jac_d.trans_mult_vector(1.0, &*curr_y_d, 1.0, &mut *y);
202            prev_jac_d.trans_mult_vector(-1.0, &*curr_y_d, 1.0, &mut *y);
203
204            self.ingest_pair(Rc::from(s), Rc::from(y));
205        }
206        self.last_x = Some(Rc::clone(&curr_x));
207        self.last_grad_f = Some(Rc::clone(&curr_grad_f));
208        self.last_jac_c = Some(Rc::clone(&curr_jac_c));
209        self.last_jac_d = Some(Rc::clone(&curr_jac_d));
210
211        let n_idx = curr_x.dim();
212        let nu = n_idx as usize;
213        let sigma = match self.update_type {
214            UpdateType::Bfgs => self.compute_sigma_bfgs(),
215            // SR1 uses the same `LIM_MEM_INIT` sigma source as BFGS for
216            // the diagonal `B0`; the rank-1 corrections carry the sign.
217            UpdateType::Sr1 => self.compute_sigma_bfgs(),
218        };
219
220        // Build the compact factors of  B = σ I + V Vᵀ − U Uᵀ  by walking
221        // the curvature history. No dense `n×n` is ever formed: the walk
222        // is `O(n · history_len)` per pair. Publishing a
223        // `LowRankUpdateSymMatrix` lets `LowRankAugSystemSolver` apply the
224        // Hessian via Sherman-Morrison-Woodbury with `O(n · m)` storage
225        // for arbitrarily large `n`.
226        let (v_cols, u_cols) = self.build_low_rank(sigma, nu);
227
228        // Build `D`, `V`, `U` in `curr_x`'s *native* vector space rather
229        // than a fabricated flat `DenseVectorSpace`. For an ordinary
230        // (dense-primal) solve this is the same dense space as before; in
231        // the feasibility-restoration sub-IPM the primal is a 5-block
232        // `CompoundVector` `[orig | n_c | p_c | n_d | p_d]`, and a flat
233        // dense `W` cannot be multiplied against those compound iterates —
234        // `LowRankUpdateSymMatrix::mult_vector` panics in
235        // `element_wise_multiply`/`lr_mult_vector` the moment restoration
236        // runs (pounce#102). Cloning `curr_x` keeps `W` type-consistent
237        // with the space it operates on.
238        let col_space = DenseVectorSpace::new(n_idx);
239        let mut diag = curr_x.make_new();
240        diag.set(sigma);
241
242        let lr_space = LowRankUpdateSymMatrixSpace::new(n_idx, None, false);
243        let mut lr = lr_space.make_new_low_rank();
244        lr.set_diag(Rc::from(diag));
245        if let Some(mvm) = build_multi_vector(&col_space, curr_x.as_ref(), &v_cols) {
246            lr.set_v(Rc::new(mvm));
247        }
248        if let Some(mvm) = build_multi_vector(&col_space, curr_x.as_ref(), &u_cols) {
249            lr.set_u(Rc::new(mvm));
250        }
251
252        data.borrow_mut().w = Some(Rc::new(lr));
253        true
254    }
255}
256
257impl LimMemQuasiNewtonUpdater {
258    fn compute_sigma_bfgs(&self) -> Number {
259        if self.history.is_empty() {
260            return 1.0;
261        }
262        let last = self.history.last().unwrap();
263        let s_dot_s = last.s_norm * last.s_norm;
264        let y_dot_y = last.y_norm * last.y_norm;
265        initial_hessian_scalar(
266            self.initial_approx,
267            s_dot_s,
268            last.s_dot_y,
269            y_dot_y,
270            self.init_val_min,
271            self.init_val_max,
272        )
273    }
274
275    /// Walk the curvature-pair history oldest→newest, applying the BFGS
276    /// rank-2 / SR1 rank-1 recurrences against the running approximation
277    /// `B = σ I + V Vᵀ − U Uᵀ` to grow the dense column lists `V` and
278    /// `U`. Returns `(v_cols, u_cols)` in full primal space.
279    ///
280    /// For BFGS, each accepted pair `(s, y)` appends one positive column
281    /// `r/√(sᵀr)` (the `r rᵀ/(sᵀr)` term, `r = θ y + (1−θ) Bs` after
282    /// Powell damping) and one negative column `Bs/√(sᵀBs)` (the
283    /// `−(Bs)(Bs)ᵀ/(sᵀBs)` term). For SR1 each pair appends a single
284    /// column `(y−Bs)/√|denom|` to `V` (denom > 0) or `U` (denom < 0).
285    /// This reproduces, column for column, the action of the former
286    /// dense rebuild while never materializing an `n×n` buffer.
287    fn build_low_rank(&self, sigma: Number, n: usize) -> (Vec<Vec<Number>>, Vec<Vec<Number>>) {
288        let mut v_cols: Vec<Vec<Number>> = Vec::new();
289        let mut u_cols: Vec<Vec<Number>> = Vec::new();
290        if n == 0 {
291            return (v_cols, u_cols);
292        }
293        for pair in &self.history {
294            let s = dense_from_vec(pair.s.as_ref(), n);
295            let y = dense_from_vec(pair.y.as_ref(), n);
296
297            // bs = B s = σ s + Σ_v (vᵀs) v − Σ_u (uᵀs) u.
298            let mut bs: Vec<Number> = s.iter().map(|&si| sigma * si).collect();
299            for v in &v_cols {
300                let c: Number = (0..n).map(|i| v[i] * s[i]).sum();
301                for i in 0..n {
302                    bs[i] += c * v[i];
303                }
304            }
305            for u in &u_cols {
306                let c: Number = (0..n).map(|i| u[i] * s[i]).sum();
307                for i in 0..n {
308                    bs[i] -= c * u[i];
309                }
310            }
311
312            match self.update_type {
313                UpdateType::Bfgs => {
314                    let s_bs: Number = (0..n).map(|i| s[i] * bs[i]).sum();
315                    if s_bs <= 0.0 {
316                        continue;
317                    }
318                    let sy = pair.s_dot_y;
319                    let theta = powell_damping_theta(sy, s_bs);
320                    let sr = theta * sy + (1.0 - theta) * s_bs;
321                    if sr <= 0.0 {
322                        continue;
323                    }
324                    let r_scale = 1.0 / sr.sqrt();
325                    let bs_scale = 1.0 / s_bs.sqrt();
326                    // r rᵀ / sr  →  positive column r/√sr.
327                    v_cols.push(
328                        (0..n)
329                            .map(|i| (theta * y[i] + (1.0 - theta) * bs[i]) * r_scale)
330                            .collect(),
331                    );
332                    // −(Bs)(Bs)ᵀ / s_bs  →  negative column Bs/√s_bs.
333                    u_cols.push(bs.iter().map(|&bi| bi * bs_scale).collect());
334                }
335                UpdateType::Sr1 => {
336                    let yms: Vec<Number> = (0..n).map(|i| y[i] - bs[i]).collect();
337                    let denom: Number = (0..n).map(|i| yms[i] * s[i]).sum();
338                    let yms_norm: Number = yms.iter().map(|&w| w * w).sum::<Number>().sqrt();
339                    if !sr1_denominator_ok(denom, pair.s_norm, yms_norm) {
340                        continue;
341                    }
342                    let scale = 1.0 / denom.abs().sqrt();
343                    let col: Vec<Number> = yms.iter().map(|&w| w * scale).collect();
344                    if denom > 0.0 {
345                        v_cols.push(col);
346                    } else {
347                        u_cols.push(col);
348                    }
349                }
350            }
351        }
352        (v_cols, u_cols)
353    }
354}
355
356/// Pack flat column data into a [`MultiVectorMatrix`] whose columns are
357/// allocated in `template`'s native vector space (so the resulting
358/// low-rank `W` is type-consistent with the primal iterates — dense for
359/// an ordinary solve, a 5-block resto `CompoundVector` under restoration;
360/// see pounce#102). Returns `None` when there are no columns, so the
361/// caller leaves the corresponding V/U slot unset.
362fn build_multi_vector(
363    col_space: &Rc<DenseVectorSpace>,
364    template: &dyn Vector,
365    cols: &[Vec<Number>],
366) -> Option<MultiVectorMatrix> {
367    if cols.is_empty() {
368        return None;
369    }
370    let space = MultiVectorMatrixSpace::new(cols.len() as Index, Rc::clone(col_space));
371    let mut mvm = space.make_new_multi_vector();
372    for (k, col) in cols.iter().enumerate() {
373        let mut cv = template.make_new();
374        set_expanded(cv.as_mut(), col);
375        mvm.set_vector(k as Index, Rc::from(cv));
376    }
377    Some(mvm)
378}
379
380/// Flatten a primal vector to its dense expanded values, handling both a
381/// plain [`DenseVector`] and a (possibly nested) restoration
382/// [`CompoundVector`].
383fn expanded_of(v: &dyn Vector) -> Vec<Number> {
384    if let Some(dv) = v.as_any().downcast_ref::<DenseVector>() {
385        return dv.expanded_values();
386    }
387    if let Some(cv) = v.as_any().downcast_ref::<CompoundVector>() {
388        let mut out = Vec::with_capacity(cv.dim() as usize);
389        for i in 0..cv.n_comps() {
390            out.extend(expanded_of(cv.comp(i)));
391        }
392        return out;
393    }
394    panic!("LimMemQuasiNewtonUpdater: unsupported primal vector type for expansion");
395}
396
397/// Inverse of [`expanded_of`]: scatter a flat slice back into a primal
398/// vector of the same structure (dense or compound).
399fn set_expanded(dst: &mut dyn Vector, flat: &[Number]) {
400    if let Some(dv) = dst.as_any_mut().downcast_mut::<DenseVector>() {
401        dv.set_values(flat);
402        return;
403    }
404    if let Some(cv) = dst.as_any_mut().downcast_mut::<CompoundVector>() {
405        let n = cv.n_comps();
406        let dims: Vec<usize> = (0..n).map(|i| cv.comp(i).dim() as usize).collect();
407        let mut off = 0usize;
408        for (i, &d) in dims.iter().enumerate() {
409            set_expanded(cv.comp_mut(i as Index), &flat[off..off + d]);
410            off += d;
411        }
412        return;
413    }
414    panic!("LimMemQuasiNewtonUpdater: unsupported primal vector type for set_expanded");
415}
416
417fn dense_from_vec(v: &dyn Vector, n: usize) -> Vec<Number> {
418    let ev = expanded_of(v);
419    debug_assert_eq!(ev.len(), n);
420    ev
421}
422
423/// Initial Hessian scalar used as the diagonal of `B_0` before the
424/// rank-2 updates are applied. Mirrors upstream's three options
425/// (`limited_memory_initialization` in
426/// `IpLimMemQuasiNewtonUpdater.cpp`):
427///
428/// * `Identity` → `1.0`
429/// * `Scalar1` → `(s^T y) / (s^T s)`
430/// * `Scalar2` → `(y^T y) / (s^T y)`
431///
432/// Result is clamped to `[min_val, max_val]` per upstream's
433/// `limited_memory_init_val_{min,max}` defaults.
434pub fn initial_hessian_scalar(
435    init: InitialApprox,
436    s_dot_s: Number,
437    s_dot_y: Number,
438    y_dot_y: Number,
439    min_val: Number,
440    max_val: Number,
441) -> Number {
442    let raw = match init {
443        InitialApprox::Identity => 1.0,
444        InitialApprox::Scalar1 => {
445            if s_dot_s > 0.0 {
446                s_dot_y / s_dot_s
447            } else {
448                1.0
449            }
450        }
451        InitialApprox::Scalar2 => {
452            if s_dot_y > 0.0 {
453                y_dot_y / s_dot_y
454            } else {
455                1.0
456            }
457        }
458    };
459    raw.clamp(min_val, max_val)
460}
461
462/// Powell damping coefficient `theta` for the modified-y BFGS update.
463/// When the curvature pair `(s, y)` violates `s^T y >= 0.2 * s^T B s`,
464/// we replace `y` by `y_bar = theta * y + (1 - theta) * B s` so that
465/// the resulting update is positive-definite.
466///
467/// ```text
468///   if s^T y >= 0.2 * s^T B s:  theta = 1
469///   else:                        theta = (0.8 * s^T B s) / (s^T B s - s^T y)
470/// ```
471///
472/// Mirrors upstream's `IpLimMemQuasiNewtonUpdater.cpp:PowellDamping`.
473pub fn powell_damping_theta(s_dot_y: Number, s_dot_b_s: Number) -> Number {
474    if s_dot_y >= 0.2 * s_dot_b_s {
475        1.0
476    } else {
477        let denom = s_dot_b_s - s_dot_y;
478        if denom > 0.0 {
479            0.8 * s_dot_b_s / denom
480        } else {
481            1.0
482        }
483    }
484}
485
486/// L-BFGS curvature-pair acceptance: include `(s, y)` in history iff
487/// `s^T y > eps * ||s|| ||y||`. Mirrors upstream's skip-criterion
488/// (`IpLimMemQuasiNewtonUpdater.cpp` ~line 750: `eps = 1e-8`).
489pub fn bfgs_curvature_pair_ok(s_dot_y: Number, s_norm: Number, y_norm: Number) -> bool {
490    let eps = 1e-8_f64;
491    s_dot_y > eps * s_norm * y_norm
492}
493
494/// SR1 acceptance: the SR1 update divides by `(y - Bs)^T s`, so we
495/// need `|(y - Bs)^T s| > eps * ||s|| ||y - Bs||`. Mirrors upstream's
496/// `IpLimMemQuasiNewtonUpdater.cpp` SR1 skip-criterion.
497pub fn sr1_denominator_ok(yms_dot_s: Number, s_norm: Number, yms_norm: Number) -> bool {
498    let eps = 1e-8_f64;
499    yms_dot_s.abs() > eps * s_norm * yms_norm
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505
506    #[test]
507    fn identity_init_returns_one() {
508        assert_eq!(
509            initial_hessian_scalar(InitialApprox::Identity, 1.0, 1.0, 1.0, 1e-8, 1e8),
510            1.0
511        );
512    }
513
514    #[test]
515    fn scalar1_init_is_sy_over_ss() {
516        // s_dot_s=4, s_dot_y=2 → 2/4 = 0.5.
517        let v = initial_hessian_scalar(InitialApprox::Scalar1, 4.0, 2.0, 0.0, 1e-8, 1e8);
518        assert!((v - 0.5).abs() < 1e-15);
519    }
520
521    #[test]
522    fn scalar2_init_is_yy_over_sy() {
523        // y_dot_y=8, s_dot_y=2 → 4.
524        let v = initial_hessian_scalar(InitialApprox::Scalar2, 0.0, 2.0, 8.0, 1e-8, 1e8);
525        assert!((v - 4.0).abs() < 1e-15);
526    }
527
528    #[test]
529    fn init_clamped_to_max() {
530        let v = initial_hessian_scalar(InitialApprox::Scalar2, 0.0, 1e-20, 1.0, 1e-8, 1e8);
531        assert_eq!(v, 1e8);
532    }
533
534    #[test]
535    fn init_clamped_to_min() {
536        let v = initial_hessian_scalar(InitialApprox::Scalar2, 0.0, 1e20, 1.0, 1e-8, 1e8);
537        assert_eq!(v, 1e-8);
538    }
539
540    #[test]
541    fn powell_no_damping_when_curvature_ok() {
542        // s^T y = 1, s^T B s = 1; 1 >= 0.2 * 1 → theta = 1.
543        assert_eq!(powell_damping_theta(1.0, 1.0), 1.0);
544    }
545
546    #[test]
547    fn powell_damps_when_curvature_violated() {
548        // s^T y = 0.1, s^T B s = 1; 0.1 < 0.2 → theta = 0.8/(1-0.1) = 8/9.
549        let theta = powell_damping_theta(0.1, 1.0);
550        assert!((theta - 8.0 / 9.0).abs() < 1e-15);
551    }
552
553    #[test]
554    fn bfgs_skip_criterion() {
555        // s_dot_y = 1, ||s|| = 1, ||y|| = 1 → 1 > 1e-8: ok.
556        assert!(bfgs_curvature_pair_ok(1.0, 1.0, 1.0));
557        // s_dot_y = 1e-10, ||s|| = 1, ||y|| = 1 → 1e-10 < 1e-8: skip.
558        assert!(!bfgs_curvature_pair_ok(1e-10, 1.0, 1.0));
559    }
560
561    #[test]
562    fn sr1_skip_criterion_uses_absolute_value() {
563        // Negative numerator is fine for SR1 (rank-1 update can have either sign).
564        assert!(sr1_denominator_ok(-1.0, 1.0, 1.0));
565        assert!(!sr1_denominator_ok(1e-10, 1.0, 1.0));
566    }
567
568    fn rcv(values: &[Number]) -> Rc<dyn Vector> {
569        let mut v = pounce_linalg::dense_vector::DenseVectorSpace::new(values.len() as i32)
570            .make_new_dense();
571        v.set(0.0);
572        v.values_mut().copy_from_slice(values);
573        Rc::new(v)
574    }
575
576    #[test]
577    fn ingest_pair_accepts_well_curved_pair() {
578        let mut updater = LimMemQuasiNewtonUpdater::new();
579        // s = (1, 0), y = (1, 0); s·y = 1 > 1e-8.
580        let accepted = updater.ingest_pair(rcv(&[1.0, 0.0]), rcv(&[1.0, 0.0]));
581        assert!(accepted);
582        assert_eq!(updater.history.len(), 1);
583        let pair = &updater.history[0];
584        assert!((pair.s_dot_y - 1.0).abs() < 1e-15);
585        assert!((pair.s_norm - 1.0).abs() < 1e-15);
586        assert!((pair.y_norm - 1.0).abs() < 1e-15);
587    }
588
589    #[test]
590    fn ingest_pair_skips_zero_curvature() {
591        let mut updater = LimMemQuasiNewtonUpdater::new();
592        // s · y = 0 ⇒ skip per BFGS criterion (eps · ||s|| · ||y||).
593        let accepted = updater.ingest_pair(rcv(&[1.0]), rcv(&[0.0]));
594        assert!(!accepted);
595        assert!(updater.history.is_empty());
596    }
597
598    #[test]
599    fn history_caps_at_max_history() {
600        let mut updater = LimMemQuasiNewtonUpdater {
601            max_history: 2,
602            ..LimMemQuasiNewtonUpdater::default()
603        };
604        for _ in 0..5 {
605            updater.ingest_pair(rcv(&[1.0]), rcv(&[1.0]));
606        }
607        assert_eq!(updater.history.len(), 2);
608    }
609
610    #[test]
611    fn sr1_path_routes_through_sr1_skip() {
612        let mut updater = LimMemQuasiNewtonUpdater {
613            update_type: UpdateType::Sr1,
614            ..LimMemQuasiNewtonUpdater::default()
615        };
616        // SR1's heuristic accepts negative s·y (rank-1 sign-indefinite).
617        assert!(updater.ingest_pair(rcv(&[1.0]), rcv(&[-1.0])));
618    }
619
620    fn pair(s: &[Number], y: &[Number]) -> CurvaturePair {
621        let s_rc = rcv(s);
622        let y_rc = rcv(y);
623        let s_dot_y = s_rc.dot(&*y_rc);
624        let s_norm = s_rc.nrm2();
625        let y_norm = y_rc.nrm2();
626        CurvaturePair {
627            s: s_rc,
628            y: y_rc,
629            s_dot_y,
630            s_norm,
631            y_norm,
632        }
633    }
634
635    /// Reconstruct the dense `B = σ I + V Vᵀ − U Uᵀ` from the low-rank
636    /// factors so we can check the Hessian *action* the SMW solver sees.
637    fn reconstruct_b(n: usize, sigma: Number, v: &[Vec<Number>], u: &[Vec<Number>]) -> Vec<Number> {
638        let mut b = vec![0.0_f64; n * n];
639        for i in 0..n {
640            b[i * n + i] = sigma;
641        }
642        for col in v {
643            for i in 0..n {
644                for j in 0..n {
645                    b[i * n + j] += col[i] * col[j];
646                }
647            }
648        }
649        for col in u {
650            for i in 0..n {
651                for j in 0..n {
652                    b[i * n + j] -= col[i] * col[j];
653                }
654            }
655        }
656        b
657    }
658
659    fn mat_vec(b: &[Number], n: usize, x: &[Number]) -> Vec<Number> {
660        (0..n)
661            .map(|i| (0..n).map(|j| b[i * n + j] * x[j]).sum())
662            .collect()
663    }
664
665    #[test]
666    fn bfgs_low_rank_recovers_hessian_action() {
667        // For a strictly-convex quadratic f(x) = ½ xᵀ A x with A SPD,
668        // a single BFGS update from B₀ = I along a curvature pair
669        // (s, y = A s) reproduces A on the s-direction:  B₁ s = y = A s.
670        // Use A = diag(2, 5), s = (1, 1), so y = (2, 5).
671        let mut up = LimMemQuasiNewtonUpdater::new();
672        up.history.push(pair(&[1.0, 1.0], &[2.0, 5.0]));
673        let (v, u) = up.build_low_rank(1.0, 2);
674        let b = reconstruct_b(2, 1.0, &v, &u);
675        let bs = mat_vec(&b, 2, &[1.0, 1.0]);
676        assert!((bs[0] - 2.0).abs() < 1e-12, "Bs[0]={}", bs[0]);
677        assert!((bs[1] - 5.0).abs() < 1e-12, "Bs[1]={}", bs[1]);
678    }
679
680    #[test]
681    fn bfgs_low_rank_keeps_symmetry() {
682        let mut up = LimMemQuasiNewtonUpdater::new();
683        up.history.push(pair(&[1.0, 0.5], &[2.0, 1.0]));
684        up.history.push(pair(&[0.7, 1.2], &[1.0, 2.5]));
685        let (v, u) = up.build_low_rank(3.0, 2);
686        let b = reconstruct_b(2, 3.0, &v, &u);
687        // VVᵀ and UUᵀ are symmetric by construction, so B must be too.
688        assert!((b[1] - b[2]).abs() < 1e-12);
689    }
690
691    #[test]
692    fn sr1_low_rank_recovers_hessian_action() {
693        // SR1 update with B₀ = I, s = (1, 1), y = (2, 5):
694        // y - B s = (1, 4); denom = (1, 4)·(1, 1) = 5 > 0 → one V column.
695        // ΔB = (1, 4)(1, 4)ᵀ / 5; B₁ s = (2.0, 5.0) = y. ✓
696        let mut up = LimMemQuasiNewtonUpdater {
697            update_type: UpdateType::Sr1,
698            ..LimMemQuasiNewtonUpdater::default()
699        };
700        up.history.push(pair(&[1.0, 1.0], &[2.0, 5.0]));
701        let (v, u) = up.build_low_rank(1.0, 2);
702        assert_eq!(v.len(), 1, "positive denom routes to V");
703        assert!(u.is_empty());
704        let b = reconstruct_b(2, 1.0, &v, &u);
705        let bs = mat_vec(&b, 2, &[1.0, 1.0]);
706        assert!((bs[0] - 2.0).abs() < 1e-12);
707        assert!((bs[1] - 5.0).abs() < 1e-12);
708    }
709
710    #[test]
711    fn empty_history_yields_no_columns() {
712        let up = LimMemQuasiNewtonUpdater::new();
713        let (v, u) = up.build_low_rank(1.0, 4);
714        assert!(v.is_empty() && u.is_empty());
715    }
716}