Skip to main content

opt/
lib.rs

1#![allow(non_snake_case)]
2//! Dense nonlinear optimization solvers in Rust.
3//!
4//! This crate provides:
5//! - `Problem` + `optimize`: the default API for solver selection that is hard to misuse.
6//! - `SecondOrderProblem` + `optimize`: automatic selection for Hessian-aware objectives.
7//! - `Bfgs`: dense quasi-Newton optimization with robust hybrid line search.
8//! - `NewtonTrustRegion`: Hessian-based trust-region optimization.
9//! - `Arc`: Adaptive Regularization with Cubics (ARC).
10//!
11//! All solvers support optional simple box constraints and are built around practical
12//! robustness for noisy/non-ideal objectives.
13//!
14//! # Features
15//! - `Bfgs` hybrid line search: Strong Wolfe with nonmonotone (GLL) Armijo, approximate-Wolfe, and
16//!   gradient-reduction acceptors, plus a best-seen salvage path and a small probing grid.
17//! - `Bfgs` trust-region (dogleg) fallback with CG-based solves on the inverse Hessian, diagonal
18//!   regularization, and scaled-identity resets under severe noise.
19//! - `NewtonTrustRegion`: projected Steihaug-Toint trust-region iterations using objective Hessians.
20//! - `Arc`: cubic-regularized model steps with adaptive regularization updates (`rho`, `sigma`).
21//! - Profile-based heuristic policy selection for rough, piecewise-flat objectives.
22//! - Adaptive strategy switching (Wolfe <-> Backtracking) based on success streaks (no timed flips).
23//! - Optional box constraints with projected gradients and coordinate clamping.
24//! - Optional flat-bracket midpoint acceptance inside zoom.
25//! - Stochastic jiggling of step sizes on persistent flats.
26//! - Multi-direction (coordinate) rescue when progress is flat.
27//!
28//! ## Defaults (key settings)
29//! - Line search: Strong Wolfe primary; GLL nonmonotone Armijo; approximate‑Wolfe and gradient‑drop
30//!   acceptors; probing grid; keep‑best salvage.
31//! - Trust region: dogleg fallback enabled; Δ₀ = min(1, 10/||g₀||); adaptive by ρ; SPD enforcement
32//!   and scaled‑identity resets when needed.
33//! - Tolerances: `c1=1e-4`, `c2=0.9`; heuristics selected by `Profile`.
34//! - Zoom midpoint: flat‑bracket midpoint acceptance under profile control.
35//! - Stochastic jiggling: default ON with scale 1e‑3 (only after repeated flats in backtracking).
36//! - Coordinate rescue: default ON (only after two consecutive flat accepts).
37//! - Strategy switching: switch Wolfe<->Backtracking only on success/failure streaks (no timed flips).
38//! - Clear, configurable builder API, and robust termination with informative errors.
39//!
40//! # Example
41//!
42//! Minimize the Rosenbrock function, a classic test case for optimization algorithms.
43//!
44//! ```
45//! use opt::{
46//!     optimize, FirstOrderObjective, FirstOrderSample, MaxIterations, Problem, Profile, Solution,
47//!     Tolerance,
48//! };
49//! use ndarray::{array, Array1};
50//!
51//! struct Rosenbrock;
52//!
53//! impl opt::ZerothOrderObjective for Rosenbrock {
54//!     fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, opt::ObjectiveEvalError> {
55//!         let a = 1.0;
56//!         let b = 100.0;
57//!         Ok((a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2))
58//!     }
59//! }
60//!
61//! impl FirstOrderObjective for Rosenbrock {
62//!     fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, opt::ObjectiveEvalError> {
63//!         let a = 1.0;
64//!         let b = 100.0;
65//!         let f = (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2);
66//!         let gradient = array![
67//!             -2.0 * (a - x[0]) - 4.0 * b * (x[1] - x[0].powi(2)) * x[0],
68//!             2.0 * b * (x[1] - x[0].powi(2)),
69//!         ];
70//!         Ok(FirstOrderSample { value: f, gradient })
71//!     }
72//! }
73//!
74//! // Set the initial guess.
75//! let x0 = array![-1.2, 1.0];
76//!
77//! // Run the solver.
78//! let Solution {
79//!     final_point: x_min,
80//!     final_value,
81//!     iterations,
82//!     ..
83//! } = optimize(Problem::new(x0, Rosenbrock))
84//!     .with_tolerance(Tolerance::new(1e-6).unwrap())
85//!     .with_max_iterations(MaxIterations::new(100).unwrap())
86//!     .with_profile(Profile::Robust)
87//!     .run()
88//!     .expect("BFGS failed to solve");
89//!
90//! println!(
91//!     "Found minimum f([{:.3}, {:.3}]) = {:.4} in {} iterations.",
92//!     x_min[0], x_min[1], final_value, iterations
93//! );
94//!
95//! // The known minimum is at [1.0, 1.0].
96//! assert!((x_min[0] - 1.0).abs() < 1e-5);
97//! assert!((x_min[1] - 1.0).abs() < 1e-5);
98//! ```
99
100use ndarray::{Array1, Array2};
101use std::collections::VecDeque;
102
103// Numerical helpers and small utilities
104const EPS: f64 = f64::EPSILON;
105#[inline]
106fn eps_f(fk: f64, tau: f64) -> f64 {
107    tau * EPS * (1.0 + fk.abs())
108}
109#[inline]
110fn eps_g(gk: &Array1<f64>, dk: &Array1<f64>, tau: f64) -> f64 {
111    tau * EPS * gk.dot(gk).sqrt() * dk.dot(dk).sqrt()
112}
113
114#[inline]
115fn directional_derivative(g: &Array1<f64>, s: &Array1<f64>, alpha: f64, d: &Array1<f64>) -> f64 {
116    if alpha > 0.0 {
117        g.dot(s) / alpha
118    } else {
119        g.dot(d)
120    }
121}
122
123#[inline]
124fn classify_line_search_accept(
125    core: &BfgsCore,
126    step_ok: bool,
127    f_k: f64,
128    fmax: f64,
129    f_trial: f64,
130    gk_ts: f64,
131    g_trial_dot_d: f64,
132    gk_dot_d_eff: f64,
133    g_trial_norm: f64,
134    gk_norm: f64,
135    drop_factor: f64,
136    eps_f_k: f64,
137    eps_g_k: f64,
138    c2: f64,
139) -> Option<AcceptKind> {
140    if !step_ok {
141        return None;
142    }
143    let armijo_ok = core.accept_armijo(f_k, gk_ts, f_trial);
144    let gll_ok = core.accept_gll_nonmonotone(fmax, gk_ts, f_trial);
145    let dir_ok = g_trial_dot_d <= -eps_g_k;
146    let strong_curv_ok = g_trial_dot_d.abs() <= c2 * gk_dot_d_eff.abs();
147    let approx_curv_ok =
148        g_trial_dot_d.abs() <= c2 * gk_dot_d_eff.abs() + core.curv_slack_scale * eps_g_k;
149    let f_flat_ok = f_trial <= f_k + eps_f_k;
150
151    if armijo_ok && strong_curv_ok {
152        Some(AcceptKind::StrongWolfe)
153    } else if armijo_ok && core.relaxed_acceptors_enabled() && f_flat_ok && approx_curv_ok && dir_ok
154    {
155        Some(AcceptKind::ApproxWolfe)
156    } else if gll_ok && approx_curv_ok {
157        Some(AcceptKind::Nonmonotone)
158    } else if core.relaxed_acceptors_enabled()
159        && f_flat_ok
160        && g_trial_norm <= drop_factor * gk_norm
161        && dir_ok
162    {
163        Some(AcceptKind::GradDrop)
164    } else {
165        None
166    }
167}
168
169#[inline]
170fn any_free_variables(active: &[bool]) -> bool {
171    active.iter().any(|&is_active| !is_active)
172}
173
174fn mask_vector_inplace(v: &mut Array1<f64>, active: &[bool]) {
175    for (vi, &is_active) in v.iter_mut().zip(active.iter()) {
176        if is_active {
177            *vi = 0.0;
178        }
179    }
180}
181
182fn masked_hv_inplace(h: &Array2<f64>, v: &Array1<f64>, active: &[bool], out: &mut Array1<f64>) {
183    out.fill(0.0);
184    for i in 0..h.nrows() {
185        if active[i] {
186            continue;
187        }
188        let mut accum = 0.0;
189        for j in 0..h.ncols() {
190            if active[j] {
191                continue;
192            }
193            accum += h[[i, j]] * v[j];
194        }
195        out[i] = accum;
196    }
197}
198
199fn cg_solve_masked_adaptive(
200    a: &Array2<f64>,
201    b: &Array1<f64>,
202    active: &[bool],
203    max_iter: usize,
204    tol_rel: f64,
205    ridge: f64,
206) -> Option<Array1<f64>> {
207    if a.nrows() != a.ncols() || a.nrows() != b.len() || active.len() != b.len() {
208        return None;
209    }
210    if !any_free_variables(active) {
211        return Some(Array1::zeros(b.len()));
212    }
213    if prefer_dense_direct(b.len()) {
214        let (effective_a, effective_b) = build_masked_subproblem_system(a, b, Some(active));
215        return dense_solve_shifted(&effective_a, &effective_b, ridge);
216    }
217
218    let n = b.len();
219    let mut x = Array1::<f64>::zeros(n);
220    let mut r = b.clone();
221    mask_vector_inplace(&mut r, active);
222    let b_norm = r.dot(&r).sqrt();
223    if !b_norm.is_finite() {
224        return None;
225    }
226    if b_norm <= 1e-32 {
227        return Some(x);
228    }
229    let tol_abs = tol_rel.max(0.0) * b_norm.max(1e-16);
230    let mut p = r.clone();
231    let mut rs_old = r.dot(&r);
232    let mut ap = Array1::<f64>::zeros(n);
233
234    for _ in 0..max_iter {
235        masked_hv_inplace(a, &p, active, &mut ap);
236        if ridge > 0.0 {
237            for i in 0..n {
238                ap[i] += ridge * p[i];
239            }
240        }
241        let p_ap = p.dot(&ap);
242        if !p_ap.is_finite() || p_ap <= 0.0 {
243            return None;
244        }
245        let alpha = rs_old / p_ap;
246        if !alpha.is_finite() {
247            return None;
248        }
249        x.scaled_add(alpha, &p);
250        r.scaled_add(-alpha, &ap);
251        mask_vector_inplace(&mut x, active);
252        mask_vector_inplace(&mut r, active);
253        let rs_new = r.dot(&r);
254        if !rs_new.is_finite() {
255            return None;
256        }
257        if rs_new.sqrt() <= tol_abs {
258            return Some(x);
259        }
260        let beta = rs_new / rs_old;
261        if !beta.is_finite() || beta < 0.0 {
262            return None;
263        }
264        p *= beta;
265        p += &r;
266        mask_vector_inplace(&mut p, active);
267        rs_old = rs_new;
268    }
269    Some(x)
270}
271
272fn bfgs_eval_cost<ObjFn>(
273    oracle: &mut FirstOrderCache,
274    obj_fn: &mut ObjFn,
275    x: &Array1<f64>,
276    func_evals: &mut usize,
277) -> Result<f64, ObjectiveEvalError>
278where
279    ObjFn: FirstOrderObjective,
280{
281    oracle.eval_cost(obj_fn, x, func_evals)
282}
283
284fn bfgs_eval_cost_grad<ObjFn>(
285    oracle: &mut FirstOrderCache,
286    obj_fn: &mut ObjFn,
287    x: &Array1<f64>,
288    func_evals: &mut usize,
289    grad_evals: &mut usize,
290) -> Result<(f64, Array1<f64>), ObjectiveEvalError>
291where
292    ObjFn: FirstOrderObjective,
293{
294    oracle.eval_cost_grad(obj_fn, x, func_evals, grad_evals)
295}
296
297// Ring buffer for GLL nonmonotone Armijo (internal only)
298struct GllWindow {
299    buf: VecDeque<f64>,
300    cap: usize,
301}
302impl GllWindow {
303    fn new(cap: usize) -> Self {
304        Self {
305            buf: VecDeque::with_capacity(cap.max(1)),
306            cap: cap.max(1),
307        }
308    }
309    fn clear(&mut self) {
310        self.buf.clear();
311    }
312    fn push(&mut self, f: f64) {
313        if self.buf.len() == self.cap {
314            self.buf.pop_front();
315        }
316        self.buf.push_back(f);
317    }
318    fn fmax(&self) -> f64 {
319        self.buf.iter().cloned().fold(f64::NEG_INFINITY, f64::max)
320    }
321    fn is_empty(&self) -> bool {
322        self.buf.is_empty()
323    }
324    fn set_cap(&mut self, cap: usize) {
325        self.cap = cap.max(1);
326        while self.buf.len() > self.cap {
327            self.buf.pop_front();
328        }
329    }
330}
331
332// Best-seen tracker during line search/zoom (internal only)
333#[derive(Clone)]
334struct ProbeBest {
335    f: f64,
336    x: Array1<f64>,
337    g: Array1<f64>,
338}
339impl ProbeBest {
340    fn new(x0: &Array1<f64>, f0: f64, g0: &Array1<f64>) -> Self {
341        Self {
342            x: x0.clone(),
343            f: f0,
344            g: g0.clone(),
345        }
346    }
347    fn consider(&mut self, x: &Array1<f64>, f: f64, g: &Array1<f64>) {
348        if !f.is_finite() || g.iter().any(|v| !v.is_finite()) {
349            return;
350        }
351        if !self.f.is_finite() || f < self.f {
352            self.f = f;
353            self.x = x.clone();
354            self.g = g.clone();
355        }
356    }
357}
358
359struct CgResult {
360    x: Array1<f64>,
361    rel_resid: f64,
362}
363
364// Conjugate gradient solve for (A + ridge*I) x = b; avoids dense factorizations.
365fn cg_solve_from(
366    a: &Array2<f64>,
367    b: &Array1<f64>,
368    x0: Array1<f64>,
369    max_iter: usize,
370    tol: f64,
371    ridge: f64,
372) -> Option<CgResult> {
373    let n = a.nrows();
374    if a.ncols() != n || b.len() != n {
375        return None;
376    }
377    let mut x = x0;
378    let mut ax = a.dot(&x);
379    if ridge > 0.0 {
380        for i in 0..n {
381            ax[i] += ridge * x[i];
382        }
383    }
384    let mut r = b - &ax;
385    let mut p = r.clone();
386    let mut rs_old = r.dot(&r);
387    if !rs_old.is_finite() {
388        return None;
389    }
390    let b_norm = b.dot(b).sqrt().max(1.0);
391    let tol_abs = tol * b_norm;
392    if rs_old.sqrt() <= tol_abs {
393        return Some(CgResult {
394            x,
395            rel_resid: rs_old.sqrt() / b_norm,
396        });
397    }
398    for _ in 0..max_iter {
399        let mut ap = a.dot(&p);
400        if ridge > 0.0 {
401            for i in 0..n {
402                ap[i] += ridge * p[i];
403            }
404        }
405        let p_ap = p.dot(&ap);
406        if !p_ap.is_finite() || p_ap <= 0.0 {
407            return None;
408        }
409        let alpha = rs_old / p_ap;
410        if !alpha.is_finite() {
411            return None;
412        }
413        x.scaled_add(alpha, &p);
414        r.scaled_add(-alpha, &ap);
415        let rs_new = r.dot(&r);
416        if !rs_new.is_finite() {
417            return None;
418        }
419        if rs_new.sqrt() <= tol_abs {
420            return Some(CgResult {
421                x,
422                rel_resid: rs_new.sqrt() / b_norm,
423            });
424        }
425        let beta = rs_new / rs_old;
426        p *= beta;
427        p += &r;
428        rs_old = rs_new;
429    }
430    Some(CgResult {
431        x,
432        rel_resid: rs_old.sqrt() / b_norm,
433    })
434}
435
436fn dense_solve_shifted(a: &Array2<f64>, b: &Array1<f64>, ridge: f64) -> Option<Array1<f64>> {
437    let n = a.nrows();
438    if a.ncols() != n || b.len() != n {
439        return None;
440    }
441    let mut mat = a.clone();
442    if ridge > 0.0 {
443        for i in 0..n {
444            mat[[i, i]] += ridge;
445        }
446    }
447    let mut rhs = b.clone();
448
449    for k in 0..n {
450        let mut pivot_row = k;
451        let mut pivot_abs = mat[[k, k]].abs();
452        for i in (k + 1)..n {
453            let cand = mat[[i, k]].abs();
454            if cand > pivot_abs {
455                pivot_abs = cand;
456                pivot_row = i;
457            }
458        }
459        if !pivot_abs.is_finite() || pivot_abs <= 1e-14 {
460            return None;
461        }
462        if pivot_row != k {
463            for j in k..n {
464                let tmp = mat[[k, j]];
465                mat[[k, j]] = mat[[pivot_row, j]];
466                mat[[pivot_row, j]] = tmp;
467            }
468            let tmp_rhs = rhs[k];
469            rhs[k] = rhs[pivot_row];
470            rhs[pivot_row] = tmp_rhs;
471        }
472
473        let pivot = mat[[k, k]];
474        for i in (k + 1)..n {
475            let factor = mat[[i, k]] / pivot;
476            mat[[i, k]] = 0.0;
477            for j in (k + 1)..n {
478                mat[[i, j]] -= factor * mat[[k, j]];
479            }
480            rhs[i] -= factor * rhs[k];
481        }
482    }
483
484    let mut x = Array1::<f64>::zeros(n);
485    for ii in 0..n {
486        let i = n - 1 - ii;
487        let mut sum = rhs[i];
488        for j in (i + 1)..n {
489            sum -= mat[[i, j]] * x[j];
490        }
491        let diag = mat[[i, i]];
492        if !diag.is_finite() || diag.abs() <= 1e-14 {
493            return None;
494        }
495        x[i] = sum / diag;
496    }
497    if x.iter().all(|v| v.is_finite()) {
498        Some(x)
499    } else {
500        None
501    }
502}
503
504#[inline]
505fn prefer_dense_direct(n: usize) -> bool {
506    n <= 128
507}
508
509fn build_masked_subproblem_system(
510    h: &Array2<f64>,
511    rhs: &Array1<f64>,
512    active: Option<&[bool]>,
513) -> (Array2<f64>, Array1<f64>) {
514    let mut effective_h = h.clone();
515    let mut effective_rhs = rhs.clone();
516    if let Some(active) = active
517        && !active.is_empty()
518    {
519        for i in 0..active.len() {
520            if active[i] {
521                effective_rhs[i] = 0.0;
522                for j in 0..active.len() {
523                    effective_h[[i, j]] = 0.0;
524                    effective_h[[j, i]] = 0.0;
525                }
526                effective_h[[i, i]] = 1.0;
527            }
528        }
529    }
530    (effective_h, effective_rhs)
531}
532
533fn dense_trust_region_step(
534    h: &Array2<f64>,
535    g: &Array1<f64>,
536    delta: f64,
537    active: Option<&[bool]>,
538) -> Option<(Array1<f64>, f64)> {
539    let rhs = -g.clone();
540    let (effective_h, effective_rhs) = build_masked_subproblem_system(h, &rhs, active);
541    let solve_with_shift = |lambda: f64| dense_solve_shifted(&effective_h, &effective_rhs, lambda);
542    let predicted = |s: &Array1<f64>| {
543        let hs = h.dot(s);
544        -(g.dot(s) + 0.5 * s.dot(&hs))
545    };
546
547    if let Some(s) = solve_with_shift(0.0) {
548        let s_norm = s.dot(&s).sqrt();
549        let pred = predicted(&s);
550        if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
551            return Some((s, pred));
552        }
553    }
554
555    let mut lambda_lo = 0.0;
556    let mut lambda_hi = 1e-8f64;
557    let mut best: Option<(Array1<f64>, f64)> = None;
558    for _ in 0..80 {
559        match solve_with_shift(lambda_hi) {
560            Some(s) => {
561                let s_norm = s.dot(&s).sqrt();
562                let pred = predicted(&s);
563                if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
564                    best = Some((s, pred));
565                    break;
566                }
567            }
568            None => {}
569        }
570        lambda_lo = lambda_hi;
571        lambda_hi *= 2.0;
572    }
573    let (mut best_step, mut best_pred) = best?;
574    for _ in 0..80 {
575        let lambda_mid = 0.5 * (lambda_lo + lambda_hi);
576        if !lambda_mid.is_finite() || (lambda_hi - lambda_lo) <= 1e-12 * lambda_hi.max(1.0) {
577            break;
578        }
579        match solve_with_shift(lambda_mid) {
580            Some(s) => {
581                let s_norm = s.dot(&s).sqrt();
582                let pred = predicted(&s);
583                if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
584                    lambda_hi = lambda_mid;
585                    best_step = s;
586                    best_pred = pred;
587                } else {
588                    lambda_lo = lambda_mid;
589                }
590            }
591            None => {
592                lambda_lo = lambda_mid;
593            }
594        }
595    }
596    Some((best_step, best_pred))
597}
598
599// Adaptive CG iteration cap: full solve for small n, capped growth for large n.
600fn cg_iter_cap(n: usize, base: usize) -> usize {
601    let full_solve_n = 128usize;
602    let cap = 200usize;
603    if n <= full_solve_n {
604        n.max(1)
605    } else {
606        n.min(cap).max(base)
607    }
608}
609
610// Adaptive CG: retry with a higher cap/tighter tol if residual is too large.
611fn cg_solve_adaptive(
612    a: &Array2<f64>,
613    b: &Array1<f64>,
614    base_iter: usize,
615    tol: f64,
616    ridge: f64,
617) -> Option<Array1<f64>> {
618    let n = a.nrows();
619    if prefer_dense_direct(n) {
620        return dense_solve_shifted(a, b, ridge);
621    }
622    let cap1 = cg_iter_cap(n, base_iter);
623    let stage1 = cg_solve_from(a, b, Array1::<f64>::zeros(n), cap1, tol, ridge)?;
624    if stage1.rel_resid.is_finite() && stage1.rel_resid <= tol * 10.0 {
625        return Some(stage1.x);
626    }
627    let cap2 = cg_iter_cap(n, base_iter.saturating_mul(2));
628    if cap2 <= cap1 {
629        return Some(stage1.x);
630    }
631    let refine_iters = cap2.saturating_sub(cap1).max(1);
632    let stage2 = cg_solve_from(a, b, stage1.x, refine_iters, tol * 0.1, ridge)?;
633    Some(stage2.x)
634}
635
636// Helper: return a scaled identity matrix (lambda * I_n).
637fn scaled_identity(n: usize, lambda: f64) -> Array2<f64> {
638    Array2::<f64>::eye(n) * lambda
639}
640
641fn hessian_is_effectively_symmetric(a: &Array2<f64>) -> bool {
642    let n = a.nrows();
643    let mut max_skew = 0.0f64;
644    let mut scale = 0.0f64;
645    for i in 0..n {
646        for j in (i + 1)..n {
647            let aij = a[[i, j]];
648            let aji = a[[j, i]];
649            max_skew = max_skew.max((aij - aji).abs());
650            scale = scale.max(aij.abs()).max(aji.abs());
651        }
652    }
653    max_skew <= 1e-12 * (1.0 + scale)
654}
655
656fn symmetrize_into(workspace: &mut Array2<f64>, a: &Array2<f64>) {
657    workspace.assign(a);
658    let n = a.nrows();
659    for i in 0..n {
660        for j in (i + 1)..n {
661            let v = 0.5 * (a[[i, j]] + a[[j, i]]);
662            workspace[[i, j]] = v;
663            workspace[[j, i]] = v;
664        }
665    }
666}
667
668fn has_finite_positive_diagonal(a: &Array2<f64>) -> bool {
669    for i in 0..a.nrows() {
670        let diag = a[[i, i]];
671        if !diag.is_finite() || diag <= 0.0 {
672            return false;
673        }
674    }
675    true
676}
677
678fn apply_inverse_bfgs_update_in_place(
679    h_inv: &mut Array2<f64>,
680    s: &Array1<f64>,
681    y: &Array1<f64>,
682    backup: &mut Array2<f64>,
683) -> bool {
684    backup.assign(h_inv);
685    let rho = 1.0 / s.dot(y);
686    let hy = backup.dot(y);
687    let yhy = y.dot(&hy);
688    let coeff = (1.0 + yhy * rho) * rho;
689    let n = h_inv.nrows();
690    for i in 0..n {
691        for j in i..n {
692            let v = backup[[i, j]] + coeff * s[i] * s[j] - rho * (hy[i] * s[j] + s[i] * hy[j]);
693            h_inv[[i, j]] = v;
694            h_inv[[j, i]] = v;
695        }
696    }
697    has_finite_positive_diagonal(h_inv)
698}
699
700// Box constraints with projection and active-set tolerance.
701#[derive(Clone)]
702struct BoxSpec {
703    lower: Array1<f64>,
704    upper: Array1<f64>,
705    tol: f64,
706}
707
708impl BoxSpec {
709    fn new(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Self {
710        Self { lower, upper, tol }
711    }
712
713    fn project(&self, x: &Array1<f64>) -> Array1<f64> {
714        let mut z = x.clone();
715        for i in 0..z.len() {
716            let lo = self.lower[i];
717            let hi = self.upper[i];
718            if z[i] < lo {
719                z[i] = lo;
720            } else if z[i] > hi {
721                z[i] = hi;
722            }
723        }
724        z
725    }
726
727    fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
728        let mut mask = vec![false; x.len()];
729        for i in 0..x.len() {
730            let lo = self.lower[i];
731            let hi = self.upper[i];
732            let tol = self.tol;
733            let at_lower = x[i] <= lo + tol;
734            let at_upper = x[i] >= hi - tol;
735            mask[i] = (at_lower && g[i] >= 0.0) || (at_upper && g[i] <= 0.0);
736        }
737        mask
738    }
739
740    fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
741        let mut gp = g.clone();
742        for i in 0..x.len() {
743            let lo = self.lower[i];
744            let hi = self.upper[i];
745            let tol = self.tol;
746            let at_lower = x[i] <= lo + tol;
747            let at_upper = x[i] >= hi - tol;
748            if (at_lower && g[i] >= 0.0) || (at_upper && g[i] <= 0.0) {
749                gp[i] = 0.0;
750            }
751        }
752        gp
753    }
754}
755
756#[derive(Debug, thiserror::Error)]
757pub enum BoundsError {
758    #[error("lower/upper lengths differ")]
759    DimensionMismatch,
760    #[error("lower bound exceeds upper bound at index {index}")]
761    InvertedInterval { index: usize },
762    #[error("bound tolerance must be finite and >= 0")]
763    InvalidTolerance,
764}
765
766#[derive(Clone)]
767pub struct Bounds {
768    spec: BoxSpec,
769}
770
771impl Bounds {
772    pub fn new(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Result<Self, BoundsError> {
773        if lower.len() != upper.len() {
774            return Err(BoundsError::DimensionMismatch);
775        }
776        for i in 0..lower.len() {
777            if lower[i] > upper[i] {
778                return Err(BoundsError::InvertedInterval { index: i });
779            }
780        }
781        if !tol.is_finite() || tol < 0.0 {
782            return Err(BoundsError::InvalidTolerance);
783        }
784        Ok(Self {
785            spec: BoxSpec::new(lower, upper, tol),
786        })
787    }
788}
789
790#[derive(Debug, Clone, Copy, PartialEq)]
791enum FiniteDiffStencil {
792    Central { h: f64 },
793    Forward { h: f64 },
794    Backward { h: f64 },
795    Fixed,
796}
797
798fn finite_difference_stencil(
799    bounds: Option<&BoxSpec>,
800    x: &Array1<f64>,
801    i: usize,
802    base_h: f64,
803) -> FiniteDiffStencil {
804    if !base_h.is_finite() || base_h <= 0.0 {
805        return FiniteDiffStencil::Fixed;
806    }
807    if let Some(bounds) = bounds {
808        let room_lo = (x[i] - bounds.lower[i]).max(0.0);
809        let room_hi = (bounds.upper[i] - x[i]).max(0.0);
810        if room_lo >= base_h && room_hi >= base_h {
811            FiniteDiffStencil::Central { h: base_h }
812        } else if room_hi >= room_lo && room_hi > 0.0 {
813            FiniteDiffStencil::Forward {
814                h: base_h.min(room_hi),
815            }
816        } else if room_lo > 0.0 {
817            FiniteDiffStencil::Backward {
818                h: base_h.min(room_lo),
819            }
820        } else if room_hi > 0.0 {
821            FiniteDiffStencil::Forward {
822                h: base_h.min(room_hi),
823            }
824        } else {
825            FiniteDiffStencil::Fixed
826        }
827    } else {
828        FiniteDiffStencil::Central { h: base_h }
829    }
830}
831
832// An enum to manage the adaptive strategy.
833#[derive(Debug, Clone, Copy)]
834enum LineSearchStrategy {
835    StrongWolfe,
836    Backtracking,
837}
838
839#[derive(Debug, Clone, Copy, PartialEq, Eq)]
840enum FallbackPolicy {
841    Never,
842    AutoBfgs,
843}
844
845#[derive(Debug, Clone, Copy)]
846enum FlatStepPolicy {
847    Strict,
848    MidpointWithJiggle { scale: f64 },
849}
850
851#[derive(Debug, Clone, Copy)]
852enum RescuePolicy {
853    Off,
854    CoordinateHybrid { pool_mult: f64, heads: usize },
855}
856
857#[derive(Debug, Clone, Copy)]
858enum StallPolicy {
859    Off,
860    On { window: usize },
861}
862
863#[derive(Debug, Clone, Copy)]
864enum AcceptKind {
865    StrongWolfe,
866    ApproxWolfe,
867    Nonmonotone,
868    GradDrop,
869    TrustRegion,
870    Rescue,
871}
872
873#[derive(Debug)]
874enum LineSearchError {
875    MaxAttempts(usize),
876    StepSizeTooSmall,
877    ObjectiveFailed(String),
878}
879
880#[derive(Debug, Clone, Copy, PartialEq, Eq)]
881pub enum LineSearchFailureReason {
882    MaxAttempts,
883    StepSizeTooSmall,
884}
885
886type LsResult = Result<(f64, f64, Array1<f64>, usize, usize, AcceptKind), LineSearchError>;
887const WOLFE_MAX_ATTEMPTS: usize = 20;
888const BACKTRACKING_MAX_ATTEMPTS: usize = 50;
889
890/// An error type for clear diagnostics.
891#[derive(Debug, thiserror::Error)]
892pub enum BfgsError {
893    #[error("Internal invariant violated: {message}")]
894    InternalInvariant { message: String },
895    #[error("Objective evaluation failed: {message}")]
896    ObjectiveFailed { message: String },
897    #[error(
898        "The line search failed ({failure_reason:?}) after {max_attempts} attempts. The optimization landscape may be pathological."
899    )]
900    LineSearchFailed {
901        /// The best solution found before the line search failed.
902        last_solution: Box<Solution>,
903        /// The number of attempts the line search made before failing.
904        max_attempts: usize,
905        /// Why the line search failed.
906        failure_reason: LineSearchFailureReason,
907    },
908    #[error(
909        "Maximum number of iterations reached without converging. The best solution found is returned."
910    )]
911    MaxIterationsReached {
912        /// The best solution found before the iteration limit was reached.
913        last_solution: Box<Solution>,
914    },
915    #[error("The gradient norm was NaN or infinity, indicating numerical instability.")]
916    GradientIsNaN,
917    #[error(
918        "The line search step size became smaller than machine epsilon, indicating that the algorithm is stuck."
919    )]
920    StepSizeTooSmall,
921}
922
923#[derive(Debug, thiserror::Error)]
924pub enum ConfigError {
925    #[error("tolerance must be finite and > 0")]
926    InvalidTolerance,
927    #[error("max_iterations must be >= 1")]
928    InvalidMaxIterations,
929}
930
931#[derive(Debug, thiserror::Error)]
932pub enum MatrixError {
933    #[error("matrix must be square; got {rows}x{cols}")]
934    NonSquare { rows: usize, cols: usize },
935    #[error("matrix must be symmetric")]
936    NotSymmetric,
937}
938
939fn ensure_square(a: &Array2<f64>) -> Result<usize, MatrixError> {
940    if a.nrows() == a.ncols() {
941        Ok(a.nrows())
942    } else {
943        Err(MatrixError::NonSquare {
944            rows: a.nrows(),
945            cols: a.ncols(),
946        })
947    }
948}
949
950fn ensure_symmetric(a: &Array2<f64>) -> Result<(), MatrixError> {
951    let n = ensure_square(a)?;
952    for i in 0..n {
953        for j in 0..i {
954            if !a[[i, j]].is_finite()
955                || !a[[j, i]].is_finite()
956                || (a[[i, j]] - a[[j, i]]).abs()
957                    > 1e-10 * (1.0 + a[[i, j]].abs().max(a[[j, i]].abs()))
958            {
959                return Err(MatrixError::NotSymmetric);
960            }
961        }
962    }
963    Ok(())
964}
965
966#[derive(Debug, Clone)]
967struct SymmetricMatrix {
968    data: Array2<f64>,
969}
970
971impl SymmetricMatrix {
972    fn from_verified(data: Array2<f64>) -> Self {
973        Self { data }
974    }
975
976    fn as_array(&self) -> &Array2<f64> {
977        &self.data
978    }
979}
980
981#[derive(Debug, Clone)]
982struct SpdInverseHessian {
983    data: SymmetricMatrix,
984}
985
986impl SpdInverseHessian {
987    fn from_verified(data: Array2<f64>) -> Self {
988        Self {
989            data: SymmetricMatrix::from_verified(data),
990        }
991    }
992
993    fn into_inner(self) -> Array2<f64> {
994        self.data.data
995    }
996}
997
998pub struct SymmetricHessianMut<'a> {
999    data: &'a mut Array2<f64>,
1000}
1001
1002impl<'a> SymmetricHessianMut<'a> {
1003    pub fn new(data: &'a mut Array2<f64>) -> Result<Self, MatrixError> {
1004        ensure_square(data)?;
1005        Ok(Self { data })
1006    }
1007
1008    pub fn fill(&mut self, value: f64) {
1009        self.data.fill(value);
1010    }
1011
1012    pub fn set(&mut self, i: usize, j: usize, value: f64) {
1013        self.data[[i, j]] = value;
1014        self.data[[j, i]] = value;
1015    }
1016
1017    pub fn assign_dense(&mut self, dense: &Array2<f64>) -> Result<(), MatrixError> {
1018        ensure_symmetric(dense)?;
1019        if dense.raw_dim() != self.data.raw_dim() {
1020            return Err(MatrixError::NonSquare {
1021                rows: dense.nrows(),
1022                cols: dense.ncols(),
1023            });
1024        }
1025        self.data.assign(dense);
1026        Ok(())
1027    }
1028}
1029
1030#[derive(Debug, Clone, Copy)]
1031pub struct Tolerance(f64);
1032
1033impl Tolerance {
1034    pub const DEFAULT: Self = Self(1e-5);
1035
1036    pub fn new(value: f64) -> Result<Self, ConfigError> {
1037        if value.is_finite() && value > 0.0 {
1038            Ok(Self(value))
1039        } else {
1040            Err(ConfigError::InvalidTolerance)
1041        }
1042    }
1043
1044    fn get(self) -> f64 {
1045        self.0
1046    }
1047}
1048
1049#[derive(Debug, Clone, Copy)]
1050pub struct MaxIterations(usize);
1051
1052impl MaxIterations {
1053    pub const DEFAULT: Self = Self(100);
1054
1055    pub fn new(value: usize) -> Result<Self, ConfigError> {
1056        if value >= 1 {
1057            Ok(Self(value))
1058        } else {
1059            Err(ConfigError::InvalidMaxIterations)
1060        }
1061    }
1062
1063    fn get(self) -> usize {
1064        self.0
1065    }
1066}
1067
1068#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1069pub enum Profile {
1070    Robust,
1071    Deterministic,
1072    Aggressive,
1073}
1074
1075#[derive(Debug, Clone)]
1076pub struct FirstOrderSample {
1077    pub value: f64,
1078    pub gradient: Array1<f64>,
1079}
1080
1081#[derive(Debug, Clone)]
1082pub struct SecondOrderSample {
1083    pub value: f64,
1084    pub gradient: Array1<f64>,
1085    pub hessian: Option<Array2<f64>>,
1086}
1087
1088#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1089pub enum FixedPointStatus {
1090    Continue,
1091    Stop,
1092}
1093
1094#[derive(Debug, Clone)]
1095pub struct FixedPointSample {
1096    pub value: f64,
1097    pub step: Array1<f64>,
1098    pub status: FixedPointStatus,
1099}
1100
1101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1102pub enum StationarityKind {
1103    ProjectedGradient,
1104    StepNorm,
1105}
1106
1107/// A summary of a successful solver run.
1108///
1109/// Note that for non-convex functions, convergence to a local minimum is not guaranteed.
1110#[derive(Debug, Clone)]
1111pub struct Solution {
1112    /// The point at which the minimum value was found.
1113    pub final_point: Array1<f64>,
1114    /// The minimum value of the objective function.
1115    pub final_value: f64,
1116    /// The gradient at the final point when the solver is gradient-based.
1117    pub final_gradient: Option<Array1<f64>>,
1118    /// The Hessian at the final point when the solver tracks one.
1119    pub final_hessian: Option<Array2<f64>>,
1120    /// The projected gradient norm at the final point when available.
1121    pub final_gradient_norm: Option<f64>,
1122    /// The final accepted fixed-point step norm when available.
1123    pub final_step_norm: Option<f64>,
1124    /// The meaning of the stationarity metric for this solution.
1125    pub stationarity_kind: StationarityKind,
1126    /// The total number of iterations performed.
1127    pub iterations: usize,
1128    /// The total number of times the objective function was evaluated.
1129    pub func_evals: usize,
1130    /// The total number of times the gradient was evaluated.
1131    pub grad_evals: usize,
1132    /// The total number of times a Hessian was supplied directly by the objective.
1133    pub hess_evals: usize,
1134}
1135
1136impl Solution {
1137    fn gradient_based(
1138        final_point: Array1<f64>,
1139        final_value: f64,
1140        final_gradient: Array1<f64>,
1141        final_gradient_norm: f64,
1142        final_hessian: Option<Array2<f64>>,
1143        iterations: usize,
1144        func_evals: usize,
1145        grad_evals: usize,
1146        hess_evals: usize,
1147    ) -> Self {
1148        Self {
1149            final_point,
1150            final_value,
1151            final_gradient: Some(final_gradient),
1152            final_hessian,
1153            final_gradient_norm: Some(final_gradient_norm),
1154            final_step_norm: None,
1155            stationarity_kind: StationarityKind::ProjectedGradient,
1156            iterations,
1157            func_evals,
1158            grad_evals,
1159            hess_evals,
1160        }
1161    }
1162
1163    fn fixed_point(
1164        final_point: Array1<f64>,
1165        final_value: f64,
1166        final_step_norm: f64,
1167        iterations: usize,
1168        func_evals: usize,
1169    ) -> Self {
1170        Self {
1171            final_point,
1172            final_value,
1173            final_gradient: None,
1174            final_hessian: None,
1175            final_gradient_norm: None,
1176            final_step_norm: Some(final_step_norm),
1177            stationarity_kind: StationarityKind::StepNorm,
1178            iterations,
1179            func_evals,
1180            grad_evals: 0,
1181            hess_evals: 0,
1182        }
1183    }
1184}
1185
1186#[derive(Debug, Clone)]
1187pub enum ObjectiveEvalError {
1188    Recoverable { message: String },
1189    Fatal { message: String },
1190}
1191
1192impl ObjectiveEvalError {
1193    pub fn recoverable(message: impl Into<String>) -> Self {
1194        Self::Recoverable {
1195            message: message.into(),
1196        }
1197    }
1198
1199    pub fn fatal(message: impl Into<String>) -> Self {
1200        Self::Fatal {
1201            message: message.into(),
1202        }
1203    }
1204}
1205
1206pub trait ZerothOrderObjective {
1207    fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError>;
1208}
1209
1210pub trait FirstOrderObjective: ZerothOrderObjective {
1211    fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError>;
1212
1213    fn set_finite_difference_bounds(&mut self, _bounds: Option<&Bounds>) {}
1214}
1215
1216pub trait SecondOrderObjective: FirstOrderObjective {
1217    fn eval_hessian(&mut self, x: &Array1<f64>) -> Result<SecondOrderSample, ObjectiveEvalError>;
1218}
1219
1220pub trait FixedPointObjective {
1221    fn eval_step(&mut self, x: &Array1<f64>) -> Result<FixedPointSample, ObjectiveEvalError>;
1222}
1223
1224pub struct FiniteDiffGradient<ObjFn> {
1225    inner: ObjFn,
1226    step: f64,
1227    bounds: Option<Bounds>,
1228}
1229
1230impl<ObjFn> FiniteDiffGradient<ObjFn> {
1231    pub fn new(inner: ObjFn) -> Self {
1232        Self {
1233            inner,
1234            step: 1e-4,
1235            bounds: None,
1236        }
1237    }
1238
1239    pub fn with_step(mut self, step: f64) -> Self {
1240        self.step = step;
1241        self
1242    }
1243
1244    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1245        self.bounds = Some(bounds);
1246        self
1247    }
1248}
1249
1250impl<ObjFn> ZerothOrderObjective for FiniteDiffGradient<ObjFn>
1251where
1252    ObjFn: ZerothOrderObjective,
1253{
1254    fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
1255        self.inner.eval_cost(x)
1256    }
1257}
1258
1259impl<ObjFn> FirstOrderObjective for FiniteDiffGradient<ObjFn>
1260where
1261    ObjFn: ZerothOrderObjective,
1262{
1263    fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
1264        if !self.step.is_finite() || self.step <= 0.0 {
1265            return Err(ObjectiveEvalError::fatal(
1266                "finite-difference gradient step must be positive and finite",
1267            ));
1268        }
1269        let value = recover_on_nonfinite_cost(self.inner.eval_cost(x)?)?;
1270        let mut gradient = Array1::<f64>::zeros(x.len());
1271        for i in 0..x.len() {
1272            let h = self.step * (1.0 + x[i].abs());
1273            match finite_difference_stencil(self.bounds.as_ref().map(|b| &b.spec), x, i, h) {
1274                FiniteDiffStencil::Central { h } => {
1275                    let mut xp = x.clone();
1276                    xp[i] += h;
1277                    let fp = recover_on_nonfinite_cost(self.inner.eval_cost(&xp)?)?;
1278                    let mut xm = x.clone();
1279                    xm[i] -= h;
1280                    let fm = recover_on_nonfinite_cost(self.inner.eval_cost(&xm)?)?;
1281                    gradient[i] = (fp - fm) / (2.0 * h);
1282                }
1283                FiniteDiffStencil::Forward { h } => {
1284                    let mut xp = x.clone();
1285                    xp[i] += h;
1286                    let fp = recover_on_nonfinite_cost(self.inner.eval_cost(&xp)?)?;
1287                    gradient[i] = (fp - value) / h;
1288                }
1289                FiniteDiffStencil::Backward { h } => {
1290                    let mut xm = x.clone();
1291                    xm[i] -= h;
1292                    let fm = recover_on_nonfinite_cost(self.inner.eval_cost(&xm)?)?;
1293                    gradient[i] = (value - fm) / h;
1294                }
1295                FiniteDiffStencil::Fixed => {
1296                    gradient[i] = 0.0;
1297                }
1298            }
1299        }
1300        Ok(FirstOrderSample { value, gradient })
1301    }
1302
1303    fn set_finite_difference_bounds(&mut self, bounds: Option<&Bounds>) {
1304        self.bounds = bounds.map(|bounds| Bounds {
1305            spec: bounds.spec.clone(),
1306        });
1307    }
1308}
1309
1310pub struct Problem<ObjFn> {
1311    x0: Array1<f64>,
1312    objective: ObjFn,
1313    bounds: Option<Bounds>,
1314    tolerance: Tolerance,
1315    max_iterations: MaxIterations,
1316    profile: Profile,
1317}
1318
1319impl<ObjFn> Problem<ObjFn>
1320where
1321    ObjFn: FirstOrderObjective,
1322{
1323    pub fn new(x0: Array1<f64>, objective: ObjFn) -> Self {
1324        Self {
1325            x0,
1326            objective,
1327            bounds: None,
1328            tolerance: Tolerance::DEFAULT,
1329            max_iterations: MaxIterations::DEFAULT,
1330            profile: Profile::Robust,
1331        }
1332    }
1333
1334    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1335        self.objective.set_finite_difference_bounds(Some(&bounds));
1336        self.bounds = Some(bounds);
1337        self
1338    }
1339
1340    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
1341        self.tolerance = tolerance;
1342        self
1343    }
1344
1345    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
1346        self.max_iterations = max_iterations;
1347        self
1348    }
1349
1350    pub fn with_profile(mut self, profile: Profile) -> Self {
1351        self.profile = profile;
1352        self
1353    }
1354}
1355
1356pub struct SecondOrderProblem<ObjFn> {
1357    x0: Array1<f64>,
1358    objective: ObjFn,
1359    bounds: Option<Bounds>,
1360    tolerance: Tolerance,
1361    max_iterations: MaxIterations,
1362    profile: Profile,
1363    fd_hessian_step: f64,
1364}
1365
1366impl<ObjFn> SecondOrderProblem<ObjFn>
1367where
1368    ObjFn: SecondOrderObjective,
1369{
1370    pub fn new(x0: Array1<f64>, objective: ObjFn) -> Self {
1371        Self {
1372            x0,
1373            objective,
1374            bounds: None,
1375            tolerance: Tolerance::DEFAULT,
1376            max_iterations: MaxIterations::DEFAULT,
1377            profile: Profile::Robust,
1378            fd_hessian_step: 1e-4,
1379        }
1380    }
1381
1382    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1383        self.objective.set_finite_difference_bounds(Some(&bounds));
1384        self.bounds = Some(bounds);
1385        self
1386    }
1387
1388    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
1389        self.tolerance = tolerance;
1390        self
1391    }
1392
1393    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
1394        self.max_iterations = max_iterations;
1395        self
1396    }
1397
1398    pub fn with_profile(mut self, profile: Profile) -> Self {
1399        self.profile = profile;
1400        self
1401    }
1402
1403    pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
1404        self.fd_hessian_step = fd_hessian_step;
1405        self
1406    }
1407}
1408
1409pub enum AutoSecondOrderSolver<ObjFn> {
1410    NewtonTrustRegion(NewtonTrustRegion<ObjFn>),
1411    Arc(Arc<ObjFn>),
1412}
1413
1414impl<ObjFn> AutoSecondOrderSolver<ObjFn>
1415where
1416    ObjFn: SecondOrderObjective,
1417{
1418    pub fn run(&mut self) -> Result<Solution, AutoSecondOrderError> {
1419        match self {
1420            Self::NewtonTrustRegion(solver) => solver
1421                .run()
1422                .map_err(AutoSecondOrderError::NewtonTrustRegion),
1423            Self::Arc(solver) => solver.run().map_err(AutoSecondOrderError::Arc),
1424        }
1425    }
1426}
1427
1428#[derive(Debug, thiserror::Error)]
1429pub enum AutoSecondOrderError {
1430    #[error(transparent)]
1431    NewtonTrustRegion(#[from] NewtonTrustRegionError),
1432    #[error(transparent)]
1433    Arc(#[from] ArcError),
1434}
1435
1436#[doc(hidden)]
1437pub trait IntoAutoSolver {
1438    type Solver;
1439
1440    fn into_auto_solver(self) -> Self::Solver;
1441}
1442
1443impl<ObjFn> IntoAutoSolver for Problem<ObjFn>
1444where
1445    ObjFn: FirstOrderObjective,
1446{
1447    type Solver = Bfgs<ObjFn>;
1448
1449    fn into_auto_solver(self) -> Self::Solver {
1450        let mut solver = Bfgs::new(self.x0, self.objective)
1451            .with_tolerance(self.tolerance)
1452            .with_max_iterations(self.max_iterations)
1453            .with_profile(self.profile);
1454        if let Some(bounds) = self.bounds {
1455            solver = solver.with_bounds(bounds);
1456        }
1457        solver
1458    }
1459}
1460
1461impl<ObjFn> IntoAutoSolver for SecondOrderProblem<ObjFn>
1462where
1463    ObjFn: SecondOrderObjective,
1464{
1465    type Solver = AutoSecondOrderSolver<ObjFn>;
1466
1467    fn into_auto_solver(self) -> Self::Solver {
1468        let SecondOrderProblem {
1469            x0,
1470            objective,
1471            bounds,
1472            tolerance,
1473            max_iterations,
1474            profile,
1475            fd_hessian_step,
1476        } = self;
1477        let use_arc = matches!(profile, Profile::Aggressive);
1478        if use_arc {
1479            let mut solver = Arc::new(x0, objective)
1480                .with_tolerance(tolerance)
1481                .with_max_iterations(max_iterations)
1482                .with_profile(profile)
1483                .with_fd_hessian_step(fd_hessian_step);
1484            if let Some(bounds) = bounds {
1485                solver = solver.with_bounds(bounds);
1486            }
1487            AutoSecondOrderSolver::Arc(solver)
1488        } else {
1489            let mut solver = NewtonTrustRegion::new(x0, objective)
1490                .with_tolerance(tolerance)
1491                .with_max_iterations(max_iterations)
1492                .with_profile(profile)
1493                .with_fd_hessian_step(fd_hessian_step);
1494            if let Some(bounds) = bounds {
1495                solver = solver.with_bounds(bounds);
1496            }
1497            AutoSecondOrderSolver::NewtonTrustRegion(solver)
1498        }
1499    }
1500}
1501
1502pub fn optimize<P>(problem: P) -> P::Solver
1503where
1504    P: IntoAutoSolver,
1505{
1506    problem.into_auto_solver()
1507}
1508
1509const CACHE_POINT_EPS: f64 = 1e-14;
1510
1511#[inline]
1512fn approx_scalar(lhs: f64, rhs: f64) -> bool {
1513    (lhs - rhs).abs() <= CACHE_POINT_EPS * (1.0 + lhs.abs().max(rhs.abs()))
1514}
1515
1516#[inline]
1517fn approx_point(lhs: &Array1<f64>, rhs: &Array1<f64>) -> bool {
1518    lhs.len() == rhs.len()
1519        && lhs
1520            .iter()
1521            .zip(rhs.iter())
1522            .all(|(&l, &r)| approx_scalar(l, r))
1523}
1524
1525fn recover_on_nonfinite_cost(cost: f64) -> Result<f64, ObjectiveEvalError> {
1526    if cost.is_finite() {
1527        Ok(cost)
1528    } else {
1529        Err(ObjectiveEvalError::recoverable(
1530            "objective returned a non-finite cost",
1531        ))
1532    }
1533}
1534
1535fn recover_on_nonfinite_gradient(gradient: &Array1<f64>) -> Result<(), ObjectiveEvalError> {
1536    if gradient.iter().all(|value| value.is_finite()) {
1537        Ok(())
1538    } else {
1539        Err(ObjectiveEvalError::recoverable(
1540            "objective returned a non-finite gradient",
1541        ))
1542    }
1543}
1544
1545fn sanitize_first_order_sample(
1546    sample: FirstOrderSample,
1547) -> Result<FirstOrderSample, ObjectiveEvalError> {
1548    recover_on_nonfinite_cost(sample.value)?;
1549    recover_on_nonfinite_gradient(&sample.gradient)?;
1550    Ok(sample)
1551}
1552
1553fn sanitize_second_order_sample(
1554    sample: SecondOrderSample,
1555) -> Result<SecondOrderSample, ObjectiveEvalError> {
1556    let value = recover_on_nonfinite_cost(sample.value)?;
1557    recover_on_nonfinite_gradient(&sample.gradient)?;
1558    let hessian = sample
1559        .hessian
1560        .filter(|h| h.iter().all(|value| value.is_finite()));
1561    Ok(SecondOrderSample {
1562        value,
1563        gradient: sample.gradient,
1564        hessian,
1565    })
1566}
1567
1568struct BorrowedSecondOrderAsFirstOrder<'a, O> {
1569    inner: &'a mut O,
1570}
1571
1572impl<'a, O> BorrowedSecondOrderAsFirstOrder<'a, O> {
1573    fn new(inner: &'a mut O) -> Self {
1574        Self { inner }
1575    }
1576}
1577
1578impl<O> FirstOrderObjective for BorrowedSecondOrderAsFirstOrder<'_, O>
1579where
1580    O: SecondOrderObjective,
1581{
1582    fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
1583        self.inner.eval_grad(x)
1584    }
1585
1586    fn set_finite_difference_bounds(&mut self, bounds: Option<&Bounds>) {
1587        self.inner.set_finite_difference_bounds(bounds);
1588    }
1589}
1590
1591impl<O> ZerothOrderObjective for BorrowedSecondOrderAsFirstOrder<'_, O>
1592where
1593    O: SecondOrderObjective,
1594{
1595    fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
1596        self.inner.eval_cost(x)
1597    }
1598}
1599
1600struct FirstOrderCache {
1601    last_x: Option<Array1<f64>>,
1602    last_cost: Option<f64>,
1603    last_grad: Array1<f64>,
1604    have_last_grad: bool,
1605}
1606
1607impl FirstOrderCache {
1608    fn new(n: usize) -> Self {
1609        Self {
1610            last_x: None,
1611            last_cost: None,
1612            last_grad: Array1::zeros(n),
1613            have_last_grad: false,
1614        }
1615    }
1616
1617    fn eval_cost<ObjFn>(
1618        &mut self,
1619        obj_fn: &mut ObjFn,
1620        x: &Array1<f64>,
1621        func_evals: &mut usize,
1622    ) -> Result<f64, ObjectiveEvalError>
1623    where
1624        ObjFn: FirstOrderObjective,
1625    {
1626        if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1627            && approx_point(last_x, x)
1628        {
1629            return Ok(last_cost);
1630        }
1631        let cost = recover_on_nonfinite_cost(obj_fn.eval_cost(x)?)?;
1632        *func_evals += 1;
1633        self.last_x = Some(x.clone());
1634        self.last_cost = Some(cost);
1635        self.have_last_grad = false;
1636        Ok(cost)
1637    }
1638
1639    fn eval_cost_grad<ObjFn>(
1640        &mut self,
1641        obj_fn: &mut ObjFn,
1642        x: &Array1<f64>,
1643        func_evals: &mut usize,
1644        grad_evals: &mut usize,
1645    ) -> Result<(f64, Array1<f64>), ObjectiveEvalError>
1646    where
1647        ObjFn: FirstOrderObjective,
1648    {
1649        if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1650            && self.have_last_grad
1651            && approx_point(last_x, x)
1652        {
1653            return Ok((last_cost, self.last_grad.clone()));
1654        }
1655        let sample = sanitize_first_order_sample(obj_fn.eval_grad(x)?)?;
1656        *func_evals += 1;
1657        *grad_evals += 1;
1658        self.last_x = Some(x.clone());
1659        self.last_cost = Some(sample.value);
1660        self.last_grad.assign(&sample.gradient);
1661        self.have_last_grad = true;
1662        Ok((sample.value, self.last_grad.clone()))
1663    }
1664}
1665
1666struct SecondOrderCache {
1667    last_x: Option<Array1<f64>>,
1668    last_cost: Option<f64>,
1669    last_grad: Array1<f64>,
1670    last_hessian: SymmetricMatrix,
1671    have_last_sample: bool,
1672    fd_hessian_step: f64,
1673}
1674
1675impl SecondOrderCache {
1676    fn new(n: usize, fd_hessian_step: f64) -> Self {
1677        Self {
1678            last_x: None,
1679            last_cost: None,
1680            last_grad: Array1::zeros(n),
1681            last_hessian: SymmetricMatrix::from_verified(Array2::zeros((n, n))),
1682            have_last_sample: false,
1683            fd_hessian_step,
1684        }
1685    }
1686
1687    fn finite_difference_hessian<ObjFn>(
1688        &mut self,
1689        obj_fn: &mut ObjFn,
1690        x: &Array1<f64>,
1691        center_gradient: &Array1<f64>,
1692        bounds: Option<&BoxSpec>,
1693        func_evals: &mut usize,
1694        grad_evals: &mut usize,
1695    ) -> Result<Array2<f64>, ObjectiveEvalError>
1696    where
1697        ObjFn: SecondOrderObjective,
1698    {
1699        if !self.fd_hessian_step.is_finite() || self.fd_hessian_step <= 0.0 {
1700            return Err(ObjectiveEvalError::fatal(
1701                "finite-difference Hessian step must be positive and finite",
1702            ));
1703        }
1704        let n = x.len();
1705        let mut hessian = Array2::<f64>::zeros((n, n));
1706        for j in 0..n {
1707            let h = self.fd_hessian_step * (1.0 + x[j].abs());
1708            let column = match finite_difference_stencil(bounds, x, j, h) {
1709                FiniteDiffStencil::Central { h } => {
1710                    let mut xp = x.clone();
1711                    xp[j] += h;
1712                    let gp = sanitize_first_order_sample(obj_fn.eval_grad(&xp)?)?;
1713                    *func_evals += 1;
1714                    *grad_evals += 1;
1715
1716                    let mut xm = x.clone();
1717                    xm[j] -= h;
1718                    let gm = sanitize_first_order_sample(obj_fn.eval_grad(&xm)?)?;
1719                    *func_evals += 1;
1720                    *grad_evals += 1;
1721
1722                    (&gp.gradient - &gm.gradient) / (2.0 * h)
1723                }
1724                FiniteDiffStencil::Forward { h } => {
1725                    let mut xp = x.clone();
1726                    xp[j] += h;
1727                    let gp = sanitize_first_order_sample(obj_fn.eval_grad(&xp)?)?;
1728                    *func_evals += 1;
1729                    *grad_evals += 1;
1730                    (&gp.gradient - center_gradient) / h
1731                }
1732                FiniteDiffStencil::Backward { h } => {
1733                    let mut xm = x.clone();
1734                    xm[j] -= h;
1735                    let gm = sanitize_first_order_sample(obj_fn.eval_grad(&xm)?)?;
1736                    *func_evals += 1;
1737                    *grad_evals += 1;
1738                    (center_gradient - &gm.gradient) / h
1739                }
1740                FiniteDiffStencil::Fixed => Array1::zeros(n),
1741            };
1742            hessian.column_mut(j).assign(&column);
1743        }
1744        Ok(0.5 * (&hessian + &hessian.t().to_owned()))
1745    }
1746
1747    fn eval_cost_grad_hessian<ObjFn>(
1748        &mut self,
1749        obj_fn: &mut ObjFn,
1750        x: &Array1<f64>,
1751        bounds: Option<&BoxSpec>,
1752        func_evals: &mut usize,
1753        grad_evals: &mut usize,
1754        hess_evals: &mut usize,
1755    ) -> Result<(f64, Array1<f64>, Array2<f64>), ObjectiveEvalError>
1756    where
1757        ObjFn: SecondOrderObjective,
1758    {
1759        if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1760            && self.have_last_sample
1761            && approx_point(last_x, x)
1762        {
1763            return Ok((
1764                last_cost,
1765                self.last_grad.clone(),
1766                self.last_hessian.as_array().clone(),
1767            ));
1768        }
1769        let sample = sanitize_second_order_sample(obj_fn.eval_hessian(x)?)?;
1770        *func_evals += 1;
1771        *grad_evals += 1;
1772        let hessian = match sample.hessian {
1773            Some(hessian) => {
1774                *hess_evals += 1;
1775                hessian
1776            }
1777            None => self.finite_difference_hessian(
1778                obj_fn,
1779                x,
1780                &sample.gradient,
1781                bounds,
1782                func_evals,
1783                grad_evals,
1784            )?,
1785        };
1786        self.last_x = Some(x.clone());
1787        self.last_cost = Some(sample.value);
1788        self.last_grad.assign(&sample.gradient);
1789        self.last_hessian = SymmetricMatrix::from_verified(hessian.clone());
1790        self.have_last_sample = true;
1791        Ok((sample.value, self.last_grad.clone(), hessian))
1792    }
1793}
1794
1795#[derive(Debug, thiserror::Error)]
1796pub enum NewtonTrustRegionError {
1797    #[error(
1798        "Objective returned a Hessian with shape {got_rows}x{got_cols}; expected {expected}x{expected}"
1799    )]
1800    HessianShapeMismatch {
1801        expected: usize,
1802        got_rows: usize,
1803        got_cols: usize,
1804    },
1805    #[error("Objective returned non-finite values.")]
1806    NonFiniteObjective,
1807    #[error("Objective evaluation failed: {message}")]
1808    ObjectiveFailed { message: String },
1809    #[error("Failed to form a positive-definite trust-region model Hessian.")]
1810    ModelHessianNotSpd,
1811    #[error(
1812        "Maximum number of iterations reached without converging. The best solution found is returned."
1813    )]
1814    MaxIterationsReached { last_solution: Box<Solution> },
1815}
1816
1817struct NewtonTrustRegionCore {
1818    x0: Array1<f64>,
1819    tolerance: f64,
1820    max_iterations: usize,
1821    fd_hessian_step: f64,
1822    bounds: Option<BoxSpec>,
1823    trust_radius: f64,
1824    trust_radius_max: f64,
1825    eta_accept: f64,
1826    fallback_policy: FallbackPolicy,
1827    history_cap: usize,
1828}
1829
1830pub struct NewtonTrustRegion<ObjFn> {
1831    core: NewtonTrustRegionCore,
1832    obj_fn: ObjFn,
1833}
1834
1835#[derive(Debug, thiserror::Error)]
1836pub enum ArcError {
1837    #[error(
1838        "Objective returned a Hessian with shape {got_rows}x{got_cols}; expected {expected}x{expected}"
1839    )]
1840    HessianShapeMismatch {
1841        expected: usize,
1842        got_rows: usize,
1843        got_cols: usize,
1844    },
1845    #[error("Objective returned non-finite values.")]
1846    NonFiniteObjective,
1847    #[error("Objective evaluation failed: {message}")]
1848    ObjectiveFailed { message: String },
1849    #[error("ARC subproblem solver failed to produce a usable step.")]
1850    SubproblemFailed,
1851    #[error(
1852        "Maximum number of iterations reached without converging. The best solution found is returned."
1853    )]
1854    MaxIterationsReached { last_solution: Box<Solution> },
1855}
1856
1857struct ArcCore {
1858    x0: Array1<f64>,
1859    tolerance: f64,
1860    max_iterations: usize,
1861    fd_hessian_step: f64,
1862    bounds: Option<BoxSpec>,
1863    theta: f64,
1864    sigma: f64,
1865    sigma_min: f64,
1866    sigma_max: f64,
1867    eta1: f64,
1868    eta2: f64,
1869    gamma1: f64,
1870    gamma2: f64,
1871    gamma3: f64,
1872    fallback_policy: FallbackPolicy,
1873    history_cap: usize,
1874    subproblem_max_iterations: usize,
1875}
1876
1877/// A configurable Adaptive Regularization with Cubics (ARC) solver.
1878pub struct Arc<ObjFn> {
1879    core: ArcCore,
1880    obj_fn: ObjFn,
1881}
1882
1883impl NewtonTrustRegionCore {
1884    fn new(x0: Array1<f64>) -> Self {
1885        Self {
1886            x0,
1887            tolerance: 1e-5,
1888            max_iterations: 100,
1889            fd_hessian_step: 1e-4,
1890            bounds: None,
1891            trust_radius: 1.0,
1892            trust_radius_max: 1e6,
1893            eta_accept: 0.1,
1894            fallback_policy: FallbackPolicy::AutoBfgs,
1895            history_cap: 12,
1896        }
1897    }
1898
1899    fn apply_profile(&mut self, profile: Profile) {
1900        match profile {
1901            Profile::Robust => {
1902                self.eta_accept = 0.1;
1903                self.fallback_policy = FallbackPolicy::AutoBfgs;
1904                self.history_cap = 12;
1905            }
1906            Profile::Deterministic => {
1907                self.eta_accept = 0.1;
1908                self.fallback_policy = FallbackPolicy::Never;
1909                self.history_cap = 2;
1910            }
1911            Profile::Aggressive => {
1912                self.eta_accept = 0.05;
1913                self.fallback_policy = FallbackPolicy::AutoBfgs;
1914                self.history_cap = 20;
1915            }
1916        }
1917    }
1918
1919    #[inline]
1920    fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
1921        if let Some(bounds) = &self.bounds {
1922            bounds.project(x)
1923        } else {
1924            x.clone()
1925        }
1926    }
1927
1928    #[inline]
1929    fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
1930        if let Some(bounds) = &self.bounds {
1931            bounds.projected_gradient(x, g)
1932        } else {
1933            g.clone()
1934        }
1935    }
1936
1937    fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
1938        if let Some(bounds) = &self.bounds {
1939            bounds.active_mask(x, g)
1940        } else {
1941            vec![false; x.len()]
1942        }
1943    }
1944
1945    fn predicted_decrease(h_model: &Array2<f64>, g_proj: &Array1<f64>, step: &Array1<f64>) -> f64 {
1946        let hs = h_model.dot(step);
1947        -(g_proj.dot(step) + 0.5 * step.dot(&hs))
1948    }
1949
1950    fn boundary_tau(p: &Array1<f64>, d: &Array1<f64>, delta: f64) -> Option<f64> {
1951        let a = d.dot(d);
1952        if !a.is_finite() || a <= 0.0 {
1953            return None;
1954        }
1955        let b = 2.0 * p.dot(d);
1956        let c = p.dot(p) - delta * delta;
1957        let disc = b * b - 4.0 * a * c;
1958        if !disc.is_finite() || disc < 0.0 {
1959            return None;
1960        }
1961        let sqrt_disc = disc.sqrt();
1962        let t1 = (-b - sqrt_disc) / (2.0 * a);
1963        let t2 = (-b + sqrt_disc) / (2.0 * a);
1964        let mut tau = None;
1965        if t1.is_finite() && t1 >= 0.0 {
1966            tau = Some(t1);
1967        }
1968        if t2.is_finite() && t2 >= 0.0 {
1969            tau = Some(tau.map(|v| v.min(t2)).unwrap_or(t2));
1970        }
1971        tau
1972    }
1973
1974    fn steihaug_toint_step(
1975        &self,
1976        h_model: &Array2<f64>,
1977        g_proj: &Array1<f64>,
1978        trust_radius: f64,
1979        active: Option<&[bool]>,
1980    ) -> Option<(Array1<f64>, f64)> {
1981        let n = g_proj.len();
1982        let g_norm = g_proj.dot(g_proj).sqrt();
1983        if !g_norm.is_finite() || g_norm <= 0.0 {
1984            return None;
1985        }
1986        let active = active.unwrap_or(&[]);
1987        let use_mask = !active.is_empty();
1988        if use_mask && !any_free_variables(active) {
1989            return None;
1990        }
1991        if prefer_dense_direct(n) {
1992            return dense_trust_region_step(
1993                h_model,
1994                g_proj,
1995                trust_radius,
1996                if use_mask { Some(active) } else { None },
1997            );
1998        }
1999
2000        let mut p = Array1::<f64>::zeros(n);
2001        let mut r = g_proj.clone();
2002        if use_mask {
2003            mask_vector_inplace(&mut r, active);
2004        }
2005        let mut d = r.mapv(|v| -v);
2006        if use_mask {
2007            mask_vector_inplace(&mut d, active);
2008        }
2009        let mut rtr = r.dot(&r);
2010        let cg_tol = (1e-6 * g_norm).max(1e-12);
2011        let max_iter = (2 * n).max(10);
2012        let mut bd = Array1::<f64>::zeros(n);
2013
2014        for _ in 0..max_iter {
2015            if use_mask {
2016                masked_hv_inplace(h_model, &d, active, &mut bd);
2017            } else {
2018                bd.assign(&h_model.dot(&d));
2019            }
2020            let d_bd = d.dot(&bd);
2021
2022            // Negative/near-zero curvature: move to trust-region boundary along d.
2023            if !d_bd.is_finite() || d_bd <= 1e-14 * d.dot(&d).max(1.0) {
2024                let tau = Self::boundary_tau(&p, &d, trust_radius)?;
2025                let mut p_nc = p.clone();
2026                p_nc.scaled_add(tau, &d);
2027                let pred = Self::predicted_decrease(h_model, g_proj, &p_nc);
2028                if pred.is_finite() && pred > 0.0 {
2029                    return Some((p_nc, pred));
2030                }
2031                break;
2032            }
2033
2034            let alpha = rtr / d_bd;
2035            if !alpha.is_finite() || alpha <= 0.0 {
2036                break;
2037            }
2038
2039            let mut p_next = p.clone();
2040            p_next.scaled_add(alpha, &d);
2041            let p_next_norm = p_next.dot(&p_next).sqrt();
2042            if p_next_norm >= trust_radius {
2043                let tau = Self::boundary_tau(&p, &d, trust_radius)?;
2044                let mut p_b = p.clone();
2045                p_b.scaled_add(tau, &d);
2046                let pred = Self::predicted_decrease(h_model, g_proj, &p_b);
2047                if pred.is_finite() && pred > 0.0 {
2048                    return Some((p_b, pred));
2049                }
2050                break;
2051            }
2052
2053            r.scaled_add(alpha, &bd);
2054            let r_next_norm = r.dot(&r).sqrt();
2055            if !r_next_norm.is_finite() {
2056                break;
2057            }
2058
2059            p = p_next;
2060            if r_next_norm <= cg_tol {
2061                let pred = Self::predicted_decrease(h_model, g_proj, &p);
2062                if pred.is_finite() && pred > 0.0 {
2063                    return Some((p, pred));
2064                }
2065                break;
2066            }
2067
2068            let rtr_next = r.dot(&r);
2069            let beta = rtr_next / rtr;
2070            if !beta.is_finite() || beta < 0.0 {
2071                break;
2072            }
2073            d *= beta;
2074            d -= &r;
2075            if use_mask {
2076                mask_vector_inplace(&mut d, active);
2077            }
2078            rtr = rtr_next;
2079        }
2080
2081        // Conservative fallback: steepest-descent boundary step.
2082        let g_norm2 = g_proj.dot(g_proj);
2083        if g_norm2.is_finite() && g_norm2 > 0.0 {
2084            let mut p_sd = g_proj.clone();
2085            p_sd *= -(trust_radius / g_norm2.sqrt());
2086            let pred = Self::predicted_decrease(h_model, g_proj, &p_sd);
2087            if pred.is_finite() && pred > 0.0 {
2088                return Some((p_sd, pred));
2089            }
2090        }
2091        None
2092    }
2093
2094    fn warm_inverse_from_history(
2095        &self,
2096        n: usize,
2097        history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2098    ) -> Array2<f64> {
2099        let mut h_inv = Array2::<f64>::eye(n);
2100        let mut backup = Array2::<f64>::zeros((n, n));
2101        if let Some((s_last, y_last)) = history.back() {
2102            let sy = s_last.dot(y_last);
2103            let yy = y_last.dot(y_last);
2104            if sy.is_finite() && yy.is_finite() && sy > 1e-16 && yy > 1e-16 {
2105                let gamma = (sy / yy).clamp(1e-8, 1e8);
2106                h_inv = scaled_identity(n, gamma);
2107            }
2108        }
2109        for (s, y) in history {
2110            let sty = s.dot(y);
2111            if !sty.is_finite() || sty <= 1e-12 {
2112                continue;
2113            }
2114            if !apply_inverse_bfgs_update_in_place(&mut h_inv, s, y, &mut backup) {
2115                h_inv.assign(&backup);
2116            }
2117        }
2118        h_inv
2119    }
2120
2121    fn run_bfgs_fallback<ObjFn>(
2122        &self,
2123        obj_fn: &mut ObjFn,
2124        x_start: Array1<f64>,
2125        history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2126        iter_used: usize,
2127        mut func_evals: usize,
2128        mut grad_evals: usize,
2129    ) -> Result<Solution, NewtonTrustRegionError>
2130    where
2131        ObjFn: SecondOrderObjective,
2132    {
2133        eprintln!(
2134            "[OPT-TRACE] NewtonTrustRegion -> BFGS fallback (iter_used={}, dim={})",
2135            iter_used,
2136            x_start.len()
2137        );
2138        let n = x_start.len();
2139        let h0_inv = self.warm_inverse_from_history(n, history);
2140        let bounds = self.bounds.as_ref().map(|b| Bounds { spec: b.clone() });
2141
2142        let mut bfgs = Bfgs::new(x_start, BorrowedSecondOrderAsFirstOrder::new(obj_fn))
2143            .with_tolerance(Tolerance::new(self.tolerance).expect("core tolerance must be valid"))
2144            .with_max_iterations(
2145                MaxIterations::new(self.max_iterations.saturating_sub(iter_used).max(1))
2146                    .expect("core max_iterations must be valid"),
2147            );
2148        bfgs.core.initial_b_inv = Some(SpdInverseHessian::from_verified(h0_inv).into_inner());
2149
2150        if let Some(bounds) = bounds {
2151            bfgs = bfgs.with_bounds(bounds);
2152        }
2153
2154        let fallback_sol = match bfgs.run() {
2155            Ok(sol) => sol,
2156            Err(BfgsError::LineSearchFailed { last_solution, .. }) => *last_solution,
2157            Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
2158            Err(BfgsError::ObjectiveFailed { message }) => {
2159                return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2160            }
2161            Err(_) => return Err(NewtonTrustRegionError::ModelHessianNotSpd),
2162        };
2163        func_evals += fallback_sol.func_evals;
2164        grad_evals += fallback_sol.grad_evals;
2165        Ok(Solution {
2166            iterations: iter_used + fallback_sol.iterations,
2167            func_evals,
2168            grad_evals,
2169            ..fallback_sol
2170        })
2171    }
2172
2173    fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, NewtonTrustRegionError>
2174    where
2175        ObjFn: SecondOrderObjective,
2176    {
2177        let n = self.x0.len();
2178        let mut x_k = self.project_point(&self.x0);
2179        let mut func_evals = 0usize;
2180        let mut grad_evals = 0usize;
2181        let mut hess_evals = 0usize;
2182        let mut oracle = SecondOrderCache::new(n, self.fd_hessian_step);
2183        let initial = oracle.eval_cost_grad_hessian(
2184            obj_fn,
2185            &x_k,
2186            self.bounds.as_ref(),
2187            &mut func_evals,
2188            &mut grad_evals,
2189            &mut hess_evals,
2190        );
2191        let mut history: VecDeque<(Array1<f64>, Array1<f64>)> =
2192            VecDeque::with_capacity(self.history_cap.max(2));
2193        let (mut f_k, mut g_k, mut h_k) = match initial {
2194            Ok(sample) => sample,
2195            Err(ObjectiveEvalError::Recoverable { .. }) => {
2196                if matches!(self.fallback_policy, FallbackPolicy::AutoBfgs) {
2197                    return self.run_bfgs_fallback(
2198                        obj_fn,
2199                        x_k.clone(),
2200                        &history,
2201                        0,
2202                        func_evals,
2203                        grad_evals,
2204                    );
2205                }
2206                return Err(NewtonTrustRegionError::NonFiniteObjective);
2207            }
2208            Err(ObjectiveEvalError::Fatal { message }) => {
2209                return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2210            }
2211        };
2212        if h_k.nrows() != n || h_k.ncols() != n {
2213            return Err(NewtonTrustRegionError::HessianShapeMismatch {
2214                expected: n,
2215                got_rows: h_k.nrows(),
2216                got_cols: h_k.ncols(),
2217            });
2218        }
2219        let mut trust_radius = self.trust_radius.max(1e-8);
2220        let mut g_proj_k = self.projected_gradient(&x_k, &g_k);
2221        let mut h_model_workspace = Array2::<f64>::zeros((n, n));
2222
2223        for k in 0..self.max_iterations {
2224            let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2225            if g_norm.is_finite() && g_norm <= self.tolerance {
2226                return Ok(Solution::gradient_based(
2227                    x_k,
2228                    f_k,
2229                    g_k,
2230                    g_norm,
2231                    Some(h_k),
2232                    k,
2233                    func_evals,
2234                    grad_evals,
2235                    hess_evals,
2236                ));
2237            }
2238
2239            let h_model = if hessian_is_effectively_symmetric(&h_k) {
2240                &h_k
2241            } else {
2242                symmetrize_into(&mut h_model_workspace, &h_k);
2243                &h_model_workspace
2244            };
2245            let active = self.active_mask(&x_k, &g_k);
2246            let any_active = active.iter().copied().any(|v| v);
2247            let (trial_step, pred_dec_free) = if any_active {
2248                if !any_free_variables(&active) {
2249                    trust_radius = (trust_radius * 0.5).max(1e-12);
2250                    continue;
2251                }
2252                match self.steihaug_toint_step(h_model, &g_proj_k, trust_radius, Some(&active)) {
2253                    Some(v) => v,
2254                    None => {
2255                        trust_radius = (trust_radius * 0.5).max(1e-12);
2256                        continue;
2257                    }
2258                }
2259            } else {
2260                match self.steihaug_toint_step(h_model, &g_proj_k, trust_radius, None) {
2261                    Some(v) => v,
2262                    None => {
2263                        trust_radius = (trust_radius * 0.5).max(1e-12);
2264                        continue;
2265                    }
2266                }
2267            };
2268
2269            let x_trial_raw = &x_k + &trial_step;
2270            let x_trial = self.project_point(&x_trial_raw);
2271            let s_trial = &x_trial - &x_k;
2272            let s_norm = s_trial.dot(&s_trial).sqrt();
2273            if !s_norm.is_finite() || s_norm <= 1e-16 {
2274                trust_radius = (trust_radius * 0.5).max(1e-12);
2275                continue;
2276            }
2277            let pred_dec = if (&s_trial - &trial_step)
2278                .dot(&(&s_trial - &trial_step))
2279                .sqrt()
2280                > 1e-8 * (1.0 + trial_step.dot(&trial_step).sqrt())
2281            {
2282                Self::predicted_decrease(h_model, &g_proj_k, &s_trial)
2283            } else {
2284                pred_dec_free
2285            };
2286            if !pred_dec.is_finite() || pred_dec <= 0.0 {
2287                trust_radius = (trust_radius * 0.5).max(1e-12);
2288                continue;
2289            }
2290
2291            let (f_trial, g_trial, h_trial) = match oracle.eval_cost_grad_hessian(
2292                obj_fn,
2293                &x_trial,
2294                self.bounds.as_ref(),
2295                &mut func_evals,
2296                &mut grad_evals,
2297                &mut hess_evals,
2298            ) {
2299                Ok(sample) => sample,
2300                Err(ObjectiveEvalError::Recoverable { .. }) => {
2301                    trust_radius = (trust_radius * 0.2).max(1e-12);
2302                    continue;
2303                }
2304                Err(ObjectiveEvalError::Fatal { message }) => {
2305                    return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2306                }
2307            };
2308            let act_dec = f_k - f_trial;
2309            let rho = act_dec / pred_dec;
2310            if rho > 0.75 && s_norm > 0.99 * trust_radius {
2311                trust_radius = (trust_radius * 2.0).min(self.trust_radius_max.max(1.0));
2312            } else if rho < 0.25 {
2313                trust_radius = (trust_radius * 0.5).max(1e-12);
2314            }
2315
2316            if rho > self.eta_accept {
2317                if h_trial.nrows() != n || h_trial.ncols() != n {
2318                    return Err(NewtonTrustRegionError::HessianShapeMismatch {
2319                        expected: n,
2320                        got_rows: h_trial.nrows(),
2321                        got_cols: h_trial.ncols(),
2322                    });
2323                }
2324                x_k = x_trial;
2325                f_k = f_trial;
2326                let y_k = &g_trial - &g_k;
2327                if s_trial.dot(&s_trial).sqrt() > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
2328                    if history.len() == self.history_cap.max(2) {
2329                        history.pop_front();
2330                    }
2331                    history.push_back((s_trial.clone(), y_k));
2332                }
2333                g_k = g_trial;
2334                h_k = h_trial;
2335                g_proj_k = self.projected_gradient(&x_k, &g_k);
2336            }
2337        }
2338
2339        let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2340        Err(NewtonTrustRegionError::MaxIterationsReached {
2341            last_solution: Box::new(Solution::gradient_based(
2342                x_k,
2343                f_k,
2344                g_k,
2345                g_norm,
2346                Some(h_k),
2347                self.max_iterations,
2348                func_evals,
2349                grad_evals,
2350                hess_evals,
2351            )),
2352        })
2353    }
2354}
2355
2356impl ArcCore {
2357    fn new(x0: Array1<f64>) -> Self {
2358        Self {
2359            x0,
2360            tolerance: 1e-5,
2361            max_iterations: 100,
2362            fd_hessian_step: 1e-4,
2363            bounds: None,
2364            theta: 1.0,
2365            sigma: 1.0,
2366            sigma_min: 1e-10,
2367            sigma_max: 1e12,
2368            eta1: 0.1,
2369            eta2: 0.9,
2370            // ARC defaults tuned to reduce regularization aggressively on very
2371            // successful iterations while keeping conservative growth otherwise.
2372            gamma1: 0.1,
2373            gamma2: 2.0,
2374            gamma3: 2.0,
2375            fallback_policy: FallbackPolicy::AutoBfgs,
2376            history_cap: 12,
2377            subproblem_max_iterations: 80,
2378        }
2379    }
2380
2381    fn apply_profile(&mut self, profile: Profile) {
2382        match profile {
2383            Profile::Robust => {
2384                self.theta = 1.0;
2385                self.eta1 = 0.1;
2386                self.eta2 = 0.9;
2387                self.gamma1 = 0.1;
2388                self.gamma2 = 2.0;
2389                self.gamma3 = 2.0;
2390                self.fallback_policy = FallbackPolicy::AutoBfgs;
2391                self.history_cap = 12;
2392                self.subproblem_max_iterations = 80;
2393            }
2394            Profile::Deterministic => {
2395                self.theta = 1.0;
2396                self.eta1 = 0.1;
2397                self.eta2 = 0.9;
2398                self.gamma1 = 0.1;
2399                self.gamma2 = 2.0;
2400                self.gamma3 = 2.0;
2401                self.fallback_policy = FallbackPolicy::Never;
2402                self.history_cap = 2;
2403                self.subproblem_max_iterations = 80;
2404            }
2405            Profile::Aggressive => {
2406                self.theta = 1.25;
2407                self.eta1 = 0.05;
2408                self.eta2 = 0.8;
2409                self.gamma1 = 0.2;
2410                self.gamma2 = 1.5;
2411                self.gamma3 = 2.5;
2412                self.fallback_policy = FallbackPolicy::AutoBfgs;
2413                self.history_cap = 20;
2414                self.subproblem_max_iterations = 120;
2415            }
2416        }
2417    }
2418
2419    #[inline]
2420    fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
2421        if let Some(bounds) = &self.bounds {
2422            bounds.project(x)
2423        } else {
2424            x.clone()
2425        }
2426    }
2427
2428    #[inline]
2429    fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
2430        if let Some(bounds) = &self.bounds {
2431            bounds.projected_gradient(x, g)
2432        } else {
2433            g.clone()
2434        }
2435    }
2436
2437    fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
2438        if let Some(bounds) = &self.bounds {
2439            bounds.active_mask(x, g)
2440        } else {
2441            vec![false; x.len()]
2442        }
2443    }
2444
2445    fn warm_inverse_from_history(
2446        &self,
2447        n: usize,
2448        history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2449    ) -> Array2<f64> {
2450        let mut h_inv = Array2::<f64>::eye(n);
2451        let mut backup = Array2::<f64>::zeros((n, n));
2452        if let Some((s_last, y_last)) = history.back() {
2453            let sy = s_last.dot(y_last);
2454            let yy = y_last.dot(y_last);
2455            if sy.is_finite() && yy.is_finite() && sy > 1e-16 && yy > 1e-16 {
2456                let gamma = (sy / yy).clamp(1e-8, 1e8);
2457                h_inv = scaled_identity(n, gamma);
2458            }
2459        }
2460        for (s, y) in history {
2461            let sty = s.dot(y);
2462            if !sty.is_finite() || sty <= 1e-12 {
2463                continue;
2464            }
2465            if !apply_inverse_bfgs_update_in_place(&mut h_inv, s, y, &mut backup) {
2466                h_inv.assign(&backup);
2467            }
2468        }
2469        h_inv
2470    }
2471
2472    fn run_bfgs_fallback<ObjFn>(
2473        &self,
2474        obj_fn: &mut ObjFn,
2475        x_start: Array1<f64>,
2476        history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2477        iter_used: usize,
2478        mut func_evals: usize,
2479        mut grad_evals: usize,
2480    ) -> Result<Solution, ArcError>
2481    where
2482        ObjFn: SecondOrderObjective,
2483    {
2484        eprintln!(
2485            "[OPT-TRACE] ARC -> BFGS fallback (iter_used={}, dim={})",
2486            iter_used,
2487            x_start.len()
2488        );
2489        let n = x_start.len();
2490        let h0_inv = self.warm_inverse_from_history(n, history);
2491        let bounds = self.bounds.as_ref().map(|b| Bounds { spec: b.clone() });
2492
2493        let mut bfgs = Bfgs::new(x_start, BorrowedSecondOrderAsFirstOrder::new(obj_fn))
2494            .with_tolerance(Tolerance::new(self.tolerance).expect("core tolerance must be valid"))
2495            .with_max_iterations(
2496                MaxIterations::new(self.max_iterations.saturating_sub(iter_used).max(1))
2497                    .expect("core max_iterations must be valid"),
2498            );
2499        bfgs.core.initial_b_inv = Some(SpdInverseHessian::from_verified(h0_inv).into_inner());
2500
2501        if let Some(bounds) = bounds {
2502            bfgs = bfgs.with_bounds(bounds);
2503        }
2504
2505        let fallback_sol = match bfgs.run() {
2506            Ok(sol) => sol,
2507            Err(BfgsError::LineSearchFailed { last_solution, .. }) => *last_solution,
2508            Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
2509            Err(BfgsError::ObjectiveFailed { message }) => {
2510                return Err(ArcError::ObjectiveFailed { message });
2511            }
2512            Err(_) => return Err(ArcError::SubproblemFailed),
2513        };
2514        func_evals += fallback_sol.func_evals;
2515        grad_evals += fallback_sol.grad_evals;
2516        Ok(Solution {
2517            iterations: iter_used + fallback_sol.iterations,
2518            func_evals,
2519            grad_evals,
2520            ..fallback_sol
2521        })
2522    }
2523
2524    fn arc_model_value(
2525        &self,
2526        g: &Array1<f64>,
2527        h: &Array2<f64>,
2528        sigma: f64,
2529        s: &Array1<f64>,
2530        active: Option<&[bool]>,
2531    ) -> (f64, f64, Array1<f64>) {
2532        // Cubic model:
2533        // m(s) = g^T s + (1/2) s^T H s + (sigma/3) ||s||^3
2534        // and gradient:
2535        // ∇m(s) = g + Hs + sigma ||s|| s.
2536        let mut hs = Array1::<f64>::zeros(s.len());
2537        if let Some(active) = active {
2538            masked_hv_inplace(h, s, active, &mut hs);
2539        } else {
2540            hs.assign(&h.dot(s));
2541        }
2542        let s_norm = s.dot(s).sqrt();
2543        let cubic = (sigma / 3.0) * s_norm.powi(3);
2544        let model_delta = g.dot(s) + 0.5 * s.dot(&hs) + cubic;
2545        let mut grad_m = g + &hs + &(s * (sigma * s_norm));
2546        if let Some(active) = active {
2547            mask_vector_inplace(&mut grad_m, active);
2548        }
2549        (model_delta, s_norm, grad_m)
2550    }
2551
2552    fn cauchy_arc_step(
2553        &self,
2554        g: &Array1<f64>,
2555        h: &Array2<f64>,
2556        sigma: f64,
2557        active: Option<&[bool]>,
2558    ) -> Option<Array1<f64>> {
2559        let g_norm = g.dot(g).sqrt();
2560        if !g_norm.is_finite() || g_norm <= 0.0 {
2561            return Some(Array1::<f64>::zeros(g.len()));
2562        }
2563        let mut d = -g.clone();
2564        if let Some(active) = active {
2565            mask_vector_inplace(&mut d, active);
2566        }
2567        let g2 = g.dot(g);
2568        let mut hd = Array1::<f64>::zeros(d.len());
2569        if let Some(active) = active {
2570            masked_hv_inplace(h, &d, active, &mut hd);
2571        } else {
2572            hd.assign(&h.dot(&d));
2573        }
2574        let d_hd = d.dot(&hd);
2575        let c = sigma * g_norm.powi(3);
2576        let mut alpha = if c > 1e-16 {
2577            let disc = d_hd * d_hd + 4.0 * c * g2;
2578            let sqrt_disc = disc.max(0.0).sqrt();
2579            (-d_hd + sqrt_disc) / (2.0 * c)
2580        } else if d_hd > 1e-16 {
2581            g2 / d_hd
2582        } else {
2583            1.0 / g_norm.max(1.0)
2584        };
2585        if !alpha.is_finite() || alpha <= 0.0 {
2586            alpha = 1.0 / g_norm.max(1.0);
2587        }
2588        let mut s = d * alpha;
2589        let mut m = self.arc_model_value(g, h, sigma, &s, active).0;
2590        for _ in 0..8 {
2591            if m <= 0.0 {
2592                return Some(s);
2593            }
2594            s *= 0.5;
2595            m = self.arc_model_value(g, h, sigma, &s, active).0;
2596        }
2597        if m <= 0.0 { Some(s) } else { None }
2598    }
2599
2600    #[inline]
2601    fn escalate_sigma_on_failure(&mut self, failure_streak: &mut usize) {
2602        // Two-stage escalation:
2603        // - early failures: use gamma2 to avoid overreacting to transient noise,
2604        // - repeated failures: switch to gamma3 for stronger regularization.
2605        *failure_streak += 1;
2606        let growth = if *failure_streak >= 3 {
2607            self.gamma3
2608        } else {
2609            self.gamma2
2610        };
2611        self.sigma = (self.sigma * growth).min(self.sigma_max);
2612    }
2613
2614    fn solve_arc_subproblem(
2615        &self,
2616        h: &Array2<f64>,
2617        g: &Array1<f64>,
2618        sigma: f64,
2619        active: Option<&[bool]>,
2620    ) -> Option<Array1<f64>> {
2621        let g_norm = g.dot(g).sqrt();
2622        if !g_norm.is_finite() {
2623            return None;
2624        }
2625        if g_norm <= 1e-16 {
2626            return Some(Array1::<f64>::zeros(g.len()));
2627        }
2628
2629        let rhs = -g.clone();
2630        let n = g.len();
2631        let cg_base_iter = (n / 2).clamp(25, 120);
2632        let active_opt = active;
2633        let active = active.unwrap_or(&[]);
2634        let use_mask = !active.is_empty();
2635        if use_mask && !any_free_variables(active) {
2636            return Some(Array1::<f64>::zeros(g.len()));
2637        }
2638        let direct_small_dense = prefer_dense_direct(n);
2639        let (effective_h, effective_rhs) = if direct_small_dense {
2640            build_masked_subproblem_system(h, &rhs, if use_mask { Some(active) } else { None })
2641        } else {
2642            (Array2::<f64>::zeros((0, 0)), Array1::<f64>::zeros(0))
2643        };
2644        // Solve (H + lambda I)s = -g while steering lambda toward sigma*||s||.
2645        // This tracks the cubic first-order stationarity condition.
2646        let mut lambda = (sigma * g_norm.sqrt()).max(1e-8);
2647        let mut best: Option<(f64, Array1<f64>)> = None;
2648        let mut hs = Array1::<f64>::zeros(n);
2649
2650        for _ in 0..self.subproblem_max_iterations {
2651            let mut s = if direct_small_dense {
2652                match dense_solve_shifted(&effective_h, &effective_rhs, lambda) {
2653                    Some(v) => v,
2654                    None => {
2655                        lambda = (2.0 * lambda).max(1e-8);
2656                        continue;
2657                    }
2658                }
2659            } else if use_mask {
2660                let mut s = Array1::<f64>::zeros(n);
2661                let mut r = rhs.clone();
2662                mask_vector_inplace(&mut r, active);
2663                let mut p = r.clone();
2664                let mut rtr = r.dot(&r);
2665                if !rtr.is_finite() {
2666                    return None;
2667                }
2668                for _ in 0..cg_base_iter {
2669                    masked_hv_inplace(h, &p, active, &mut hs);
2670                    hs.scaled_add(lambda, &p);
2671                    let denom = p.dot(&hs);
2672                    if !denom.is_finite() || denom <= 1e-14 * p.dot(&p).max(1.0) {
2673                        s.fill(f64::NAN);
2674                        break;
2675                    }
2676                    let alpha = rtr / denom;
2677                    if !alpha.is_finite() || alpha <= 0.0 {
2678                        s.fill(f64::NAN);
2679                        break;
2680                    }
2681                    s.scaled_add(alpha, &p);
2682                    r.scaled_add(-alpha, &hs);
2683                    mask_vector_inplace(&mut s, active);
2684                    mask_vector_inplace(&mut r, active);
2685                    let rtr_next = r.dot(&r);
2686                    if !rtr_next.is_finite() {
2687                        s.fill(f64::NAN);
2688                        break;
2689                    }
2690                    if rtr_next.sqrt() <= 1e-10 * g_norm.max(1.0) {
2691                        break;
2692                    }
2693                    let beta = rtr_next / rtr.max(1e-32);
2694                    if !beta.is_finite() || beta < 0.0 {
2695                        s.fill(f64::NAN);
2696                        break;
2697                    }
2698                    p *= beta;
2699                    p += &r;
2700                    mask_vector_inplace(&mut p, active);
2701                    rtr = rtr_next;
2702                }
2703                s
2704            } else {
2705                match cg_solve_adaptive(h, &rhs, cg_base_iter, 1e-10, lambda) {
2706                    Some(v) => v,
2707                    None => {
2708                        lambda = (2.0 * lambda).max(1e-8);
2709                        continue;
2710                    }
2711                }
2712            };
2713            if use_mask {
2714                mask_vector_inplace(&mut s, active);
2715            }
2716            if s.iter().any(|v| !v.is_finite()) {
2717                lambda = (2.0 * lambda).max(1e-8);
2718                continue;
2719            }
2720
2721            let (m_delta, s_norm, grad_m) =
2722                self.arc_model_value(g, h, sigma, &s, if use_mask { Some(active) } else { None });
2723            if !m_delta.is_finite() || !s_norm.is_finite() {
2724                lambda = (2.0 * lambda).max(1e-8);
2725                continue;
2726            }
2727            let grad_norm = grad_m.dot(&grad_m).sqrt();
2728            let target = self.theta * s_norm * s_norm;
2729            let merit = if target > 0.0 {
2730                grad_norm / target
2731            } else {
2732                grad_norm
2733            };
2734            if best.as_ref().map(|(bm, _)| merit < *bm).unwrap_or(true) {
2735                best = Some((merit, s.clone()));
2736            }
2737
2738            // ARC first-order progress:
2739            // m(s) <= m(0) and ||∇m(s)|| <= theta ||s||^2.
2740            // Also require near-consistency with lambda = sigma||s|| used by the
2741            // cubic first-order optimality system.
2742            let lambda_target = (sigma * s_norm).max(1e-12);
2743            let rel_lam_gap = (lambda - lambda_target).abs() / lambda.max(1.0);
2744            if m_delta <= 0.0 && grad_norm <= target.max(1e-14) && rel_lam_gap <= 0.25 {
2745                return Some(s);
2746            }
2747
2748            if m_delta > 0.0 {
2749                lambda = (2.0 * lambda.max(lambda_target)).max(1e-8);
2750            } else {
2751                // Damped fixed-point tracking of lambda = sigma||s||.
2752                // Restrict per-iteration movement to keep the sequence stable.
2753                let ratio = (lambda_target / lambda.max(1e-16)).clamp(0.25, 4.0);
2754                let lambda_next = lambda * ratio;
2755                let mixed = 0.5 * lambda + 0.5 * lambda_next;
2756                lambda = mixed.max(1e-12);
2757            }
2758        }
2759
2760        if let Some((_, s)) = best {
2761            let (m_delta, s_norm, grad_m) =
2762                self.arc_model_value(g, h, sigma, &s, if use_mask { Some(active) } else { None });
2763            let grad_norm = grad_m.dot(&grad_m).sqrt();
2764            let target = self.theta * s_norm * s_norm;
2765            if m_delta <= 0.0 && grad_norm <= target.max(1e-14) {
2766                return Some(s);
2767            }
2768        }
2769        self.cauchy_arc_step(
2770            g,
2771            h,
2772            sigma,
2773            if use_mask { Some(active) } else { active_opt },
2774        )
2775    }
2776
2777    fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, ArcError>
2778    where
2779        ObjFn: SecondOrderObjective,
2780    {
2781        let n = self.x0.len();
2782        let mut x_k = self.project_point(&self.x0);
2783        let mut func_evals = 0usize;
2784        let mut grad_evals = 0usize;
2785        let mut hess_evals = 0usize;
2786        let mut oracle = SecondOrderCache::new(n, self.fd_hessian_step);
2787        let initial = oracle.eval_cost_grad_hessian(
2788            obj_fn,
2789            &x_k,
2790            self.bounds.as_ref(),
2791            &mut func_evals,
2792            &mut grad_evals,
2793            &mut hess_evals,
2794        );
2795        let mut history: VecDeque<(Array1<f64>, Array1<f64>)> =
2796            VecDeque::with_capacity(self.history_cap.max(2));
2797        let (mut f_k, mut g_k, mut h_k) = match initial {
2798            Ok(sample) => sample,
2799            Err(ObjectiveEvalError::Recoverable { .. }) => {
2800                if matches!(self.fallback_policy, FallbackPolicy::AutoBfgs) {
2801                    return self.run_bfgs_fallback(
2802                        obj_fn,
2803                        x_k.clone(),
2804                        &history,
2805                        0,
2806                        func_evals,
2807                        grad_evals,
2808                    );
2809                }
2810                return Err(ArcError::NonFiniteObjective);
2811            }
2812            Err(ObjectiveEvalError::Fatal { message }) => {
2813                return Err(ArcError::ObjectiveFailed { message });
2814            }
2815        };
2816        if h_k.nrows() != n || h_k.ncols() != n {
2817            return Err(ArcError::HessianShapeMismatch {
2818                expected: n,
2819                got_rows: h_k.nrows(),
2820                got_cols: h_k.ncols(),
2821            });
2822        }
2823        let mut model_failure_streak = 0usize;
2824        let mut h_model_workspace = Array2::<f64>::zeros((n, n));
2825
2826        for k in 0..self.max_iterations {
2827            let g_proj_k = self.projected_gradient(&x_k, &g_k);
2828            let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2829            if g_norm.is_finite() && g_norm <= self.tolerance {
2830                return Ok(Solution::gradient_based(
2831                    x_k,
2832                    f_k,
2833                    g_k,
2834                    g_norm,
2835                    Some(h_k),
2836                    k,
2837                    func_evals,
2838                    grad_evals,
2839                    hess_evals,
2840                ));
2841            }
2842
2843            let h_model = if hessian_is_effectively_symmetric(&h_k) {
2844                &h_k
2845            } else {
2846                symmetrize_into(&mut h_model_workspace, &h_k);
2847                &h_model_workspace
2848            };
2849            let active = self.active_mask(&x_k, &g_k);
2850            let any_active = active.iter().copied().any(|v| v);
2851            // Solve the cubic model in the full space while masking bound-active
2852            // coordinates instead of materializing reduced subspaces.
2853            let step = if any_active {
2854                if !any_free_variables(&active) {
2855                    // All coordinates are active at their bounds: increase sigma and retry.
2856                    self.escalate_sigma_on_failure(&mut model_failure_streak);
2857                    continue;
2858                }
2859                match self.solve_arc_subproblem(h_model, &g_proj_k, self.sigma, Some(&active)) {
2860                    Some(s) => s,
2861                    None => {
2862                        // Failed subproblem solve: moderate growth first, stronger only
2863                        // after repeated failures.
2864                        self.escalate_sigma_on_failure(&mut model_failure_streak);
2865                        continue;
2866                    }
2867                }
2868            } else {
2869                match self.solve_arc_subproblem(h_model, &g_proj_k, self.sigma, None) {
2870                    Some(s) => s,
2871                    None => {
2872                        // Failed subproblem solve: moderate growth first, stronger only
2873                        // after repeated failures.
2874                        self.escalate_sigma_on_failure(&mut model_failure_streak);
2875                        continue;
2876                    }
2877                }
2878            };
2879
2880            let x_trial_raw = &x_k + &step;
2881            let x_trial = self.project_point(&x_trial_raw);
2882            let s_trial = &x_trial - &x_k;
2883            let s_norm = s_trial.dot(&s_trial).sqrt();
2884            if !s_norm.is_finite() || s_norm <= 1e-16 {
2885                self.escalate_sigma_on_failure(&mut model_failure_streak);
2886                continue;
2887            }
2888            let step_distortion = (&s_trial - &step).dot(&(&s_trial - &step)).sqrt();
2889            let step_norm_ref = step.dot(&step).sqrt();
2890            let proj_changed = step_distortion > 1e-8 * (1.0 + step_norm_ref);
2891            if proj_changed {
2892                // The unconstrained cubic model was solved for `step`, not the clipped
2893                // projected step `s_trial`. Do not use ARC's rho/sigma update on the
2894                // distorted step. Instead, refresh a coherent sample at the projected
2895                // point and accept it only as a bound-activation progress step.
2896                let projected = oracle.eval_cost_grad_hessian(
2897                    obj_fn,
2898                    &x_trial,
2899                    self.bounds.as_ref(),
2900                    &mut func_evals,
2901                    &mut grad_evals,
2902                    &mut hess_evals,
2903                );
2904                let (f_trial, g_trial, h_trial) = match projected {
2905                    Ok(sample) => sample,
2906                    Err(ObjectiveEvalError::Recoverable { .. }) => {
2907                        self.escalate_sigma_on_failure(&mut model_failure_streak);
2908                        continue;
2909                    }
2910                    Err(ObjectiveEvalError::Fatal { message }) => {
2911                        return Err(ArcError::ObjectiveFailed { message });
2912                    }
2913                };
2914                if h_trial.nrows() != n || h_trial.ncols() != n {
2915                    return Err(ArcError::HessianShapeMismatch {
2916                        expected: n,
2917                        got_rows: h_trial.nrows(),
2918                        got_cols: h_trial.ncols(),
2919                    });
2920                }
2921                let g_proj_trial = self.projected_gradient(&x_trial, &g_trial);
2922                let g_proj_trial_norm = g_proj_trial.dot(&g_proj_trial).sqrt();
2923                if f_trial <= f_k
2924                    && (g_proj_trial_norm <= g_norm || g_proj_trial_norm <= self.tolerance)
2925                {
2926                    let y_k = &g_trial - &g_k;
2927                    if s_norm > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
2928                        if history.len() == self.history_cap.max(2) {
2929                            history.pop_front();
2930                        }
2931                        history.push_back((s_trial.clone(), y_k));
2932                    }
2933                    x_k = x_trial;
2934                    f_k = f_trial;
2935                    g_k = g_trial;
2936                    h_k = h_trial;
2937                    model_failure_streak = 0;
2938                    // Bias the next cubic solve toward smaller feasible steps after
2939                    // a bound-clipped move.
2940                    self.sigma = (self.sigma * self.gamma2).min(self.sigma_max);
2941                } else {
2942                    self.escalate_sigma_on_failure(&mut model_failure_streak);
2943                }
2944                continue;
2945            }
2946            let (m_delta_trial, _, grad_m_trial) =
2947                self.arc_model_value(&g_proj_k, h_model, self.sigma, &s_trial, Some(&active));
2948
2949            // Enforce ARC first-order subproblem progress on the actual trial step
2950            // (after possible box projection):
2951            // m(s) <= m(0) and ||∇m(s)|| <= theta ||s||^2.
2952            let grad_m_norm = grad_m_trial.dot(&grad_m_trial).sqrt();
2953            let target_m = self.theta * s_norm * s_norm;
2954            if !m_delta_trial.is_finite()
2955                || !grad_m_norm.is_finite()
2956                || m_delta_trial > 0.0
2957                || grad_m_norm > target_m.max(1e-14)
2958            {
2959                self.escalate_sigma_on_failure(&mut model_failure_streak);
2960                continue;
2961            }
2962
2963            // Standard ARC predicted reduction is m(0) - m(s) = -m(s),
2964            // where `m_delta_trial` already includes the cubic term.
2965            let denom = -m_delta_trial;
2966            if !denom.is_finite() || denom <= 0.0 {
2967                self.escalate_sigma_on_failure(&mut model_failure_streak);
2968                continue;
2969            }
2970
2971            let (f_trial, g_trial, h_trial) = match oracle.eval_cost_grad_hessian(
2972                obj_fn,
2973                &x_trial,
2974                self.bounds.as_ref(),
2975                &mut func_evals,
2976                &mut grad_evals,
2977                &mut hess_evals,
2978            ) {
2979                Ok(sample) => sample,
2980                Err(ObjectiveEvalError::Recoverable { .. }) => {
2981                    self.escalate_sigma_on_failure(&mut model_failure_streak);
2982                    continue;
2983                }
2984                Err(ObjectiveEvalError::Fatal { message }) => {
2985                    return Err(ArcError::ObjectiveFailed { message });
2986                }
2987            };
2988            let rho = (f_k - f_trial) / denom;
2989            model_failure_streak = 0;
2990            // ARC accept/reject decision:
2991            // accept trial point iff rho >= eta1.
2992            if rho >= self.eta1 {
2993                if h_trial.nrows() != n || h_trial.ncols() != n {
2994                    return Err(ArcError::HessianShapeMismatch {
2995                        expected: n,
2996                        got_rows: h_trial.nrows(),
2997                        got_cols: h_trial.ncols(),
2998                    });
2999                }
3000                let y_k = &g_trial - &g_k;
3001                if s_norm > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
3002                    if history.len() == self.history_cap.max(2) {
3003                        history.pop_front();
3004                    }
3005                    history.push_back((s_trial.clone(), y_k));
3006                }
3007                x_k = x_trial;
3008                f_k = f_trial;
3009                g_k = g_trial;
3010                h_k = h_trial;
3011            }
3012
3013            // Canonical ARC sigma update:
3014            // very successful -> decrease; successful -> keep; unsuccessful -> increase.
3015            if rho >= self.eta2 {
3016                self.sigma = (self.sigma * self.gamma1).max(self.sigma_min);
3017            } else if rho >= self.eta1 {
3018                self.sigma = self.sigma.max(self.sigma_min);
3019            } else if rho.is_finite() {
3020                self.sigma = (self.sigma * self.gamma2).min(self.sigma_max);
3021            } else {
3022                // Numerically pathological ratio: use the stronger growth factor.
3023                self.sigma = (self.sigma * self.gamma3).min(self.sigma_max);
3024            }
3025        }
3026
3027        let g_proj_k = self.projected_gradient(&x_k, &g_k);
3028        let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
3029        Err(ArcError::MaxIterationsReached {
3030            last_solution: Box::new(Solution::gradient_based(
3031                x_k,
3032                f_k,
3033                g_k,
3034                g_norm,
3035                Some(h_k),
3036                self.max_iterations,
3037                func_evals,
3038                grad_evals,
3039                hess_evals,
3040            )),
3041        })
3042    }
3043}
3044
3045/// Core configuration and adaptive state for the BFGS solver.
3046struct BfgsCore {
3047    x0: Array1<f64>,
3048    // --- Configuration ---
3049    tolerance: f64,
3050    max_iterations: usize,
3051    c1: f64,
3052    c2: f64,
3053    tau_f: f64,
3054    tau_g: f64,
3055    bounds: Option<BoxSpec>,
3056    flat_step_policy: FlatStepPolicy,
3057    rng_state: u64,
3058    flat_accept_streak: usize,
3059    rescue_policy: RescuePolicy,
3060    stall_policy: StallPolicy,
3061    stall_noimprove_streak: usize,
3062    // Curvature slack scaling under noise
3063    curv_slack_scale: f64,
3064    // Gradient drop factor (adapts after flats)
3065    grad_drop_factor: f64,
3066    // No-improvement termination guard
3067    tol_f_rel: f64,
3068    max_no_improve: usize,
3069    no_improve_streak: usize,
3070    // --- Private adaptive state (no API change) ---
3071    gll: GllWindow,
3072    c1_adapt: f64,
3073    c2_adapt: f64,
3074    wolfe_fail_streak: usize,
3075    primary_strategy: LineSearchStrategy,
3076    trust_radius: f64,
3077    global_best: Option<ProbeBest>,
3078    // Diagnostics counters
3079    nonfinite_seen: bool,
3080    wolfe_clean_successes: usize,
3081    bt_clean_successes: usize,
3082    ls_failures_in_row: usize,
3083    chol_fail_iters: usize,
3084    spd_fail_seen: bool,
3085    initial_b_inv: Option<Array2<f64>>,
3086    initial_grad_norm: f64,
3087    local_mode: bool,
3088}
3089
3090/// A configurable BFGS solver.
3091pub struct Bfgs<ObjFn> {
3092    core: BfgsCore,
3093    obj_fn: ObjFn,
3094}
3095
3096impl BfgsCore {
3097    const FALLBACK_THRESHOLD: usize = 3;
3098
3099    fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
3100        if let Some(bounds) = &self.bounds {
3101            bounds.projected_gradient(x, g)
3102        } else {
3103            g.clone()
3104        }
3105    }
3106
3107    fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
3108        if let Some(bounds) = &self.bounds {
3109            bounds.active_mask(x, g)
3110        } else {
3111            vec![false; x.len()]
3112        }
3113    }
3114
3115    fn project_with_step(
3116        &self,
3117        x: &Array1<f64>,
3118        d: &Array1<f64>,
3119        alpha: f64,
3120    ) -> (Array1<f64>, Array1<f64>, bool) {
3121        let trial = x + alpha * d;
3122        let x_new = self.project_point(&trial);
3123        let kinked = (&x_new - &trial)
3124            .iter()
3125            .zip(trial.iter())
3126            .any(|(dv, tv)| dv.abs() > 1e-12 * (1.0 + tv.abs()));
3127        let s = &x_new - x;
3128        (x_new, s, kinked)
3129    }
3130
3131    #[inline]
3132    fn step_tolerance(&self, x: &Array1<f64>) -> f64 {
3133        1e-12 * (1.0 + x.dot(x).sqrt()) + 1e-16
3134    }
3135
3136    #[inline]
3137    fn feasible_step_small(&self, x_prev: &Array1<f64>, x_next: &Array1<f64>) -> bool {
3138        let s = x_next - x_prev;
3139        self.projected_step_small(x_prev, &s)
3140    }
3141
3142    #[inline]
3143    fn projected_step_small(&self, x_prev: &Array1<f64>, s: &Array1<f64>) -> bool {
3144        s.dot(s).sqrt() <= self.step_tolerance(x_prev)
3145    }
3146
3147    #[inline]
3148    fn stagnation_converged(
3149        &self,
3150        x_prev: &Array1<f64>,
3151        x_next: &Array1<f64>,
3152        g_proj_next: &Array1<f64>,
3153    ) -> bool {
3154        let gnorm = g_proj_next.dot(g_proj_next).sqrt();
3155        gnorm < self.tolerance || self.feasible_step_small(x_prev, x_next)
3156    }
3157
3158    #[inline]
3159    fn update_no_improve_streak(&mut self, rel_impr: f64) -> bool {
3160        if rel_impr <= self.tol_f_rel {
3161            self.no_improve_streak += 1;
3162        } else {
3163            self.no_improve_streak = 0;
3164        }
3165        self.no_improve_streak >= self.max_no_improve
3166    }
3167
3168    // Attempt one trust-region dogleg step. Updates trust radius and, on success,
3169    // returns new (x, f, g) and updates `b_inv` cautiously. On failure, may shrink Δ.
3170    fn try_trust_region_step<ObjFn>(
3171        &mut self,
3172        obj_fn: &mut ObjFn,
3173        oracle: &mut FirstOrderCache,
3174        b_inv: &mut Array2<f64>,
3175        x_k: &Array1<f64>,
3176        f_k: f64,
3177        g_k: &Array1<f64>,
3178        func_evals: &mut usize,
3179        grad_evals: &mut usize,
3180    ) -> Option<(Array1<f64>, f64, Array1<f64>)>
3181    where
3182        ObjFn: FirstOrderObjective,
3183    {
3184        let n = b_inv.nrows();
3185        let mut b_inv_backup = Array2::<f64>::zeros((n, n));
3186        let delta = self.trust_radius;
3187        let g_proj_k = self.projected_gradient(x_k, g_k);
3188        let active = self.active_mask(x_k, g_k);
3189        let active_before = active.clone();
3190        let active_opt = if active.iter().copied().any(|v| v) {
3191            if !any_free_variables(&active) {
3192                self.trust_radius = (delta * 0.5).max(1e-12);
3193                return None;
3194            }
3195            Some(active.as_slice())
3196        } else {
3197            None
3198        };
3199        let (p_tr, pred_dec_tr) = self.trust_region_dogleg(b_inv, &g_proj_k, delta, active_opt)?;
3200        let raw_try = x_k + &p_tr;
3201        let x_try = self.project_point(&raw_try);
3202        let s_tr = &x_try - x_k;
3203        let g_old = g_k.clone();
3204        let (f_try, g_try) =
3205            bfgs_eval_cost_grad(oracle, obj_fn, &x_try, func_evals, grad_evals).ok()?;
3206        let act_dec = f_k - f_try;
3207        let p_diff = &s_tr - &p_tr;
3208        let p_diff_norm = p_diff.dot(&p_diff).sqrt();
3209        let p_norm = p_tr.dot(&p_tr).sqrt();
3210        let proj_changed = p_diff_norm > 1e-6 * (1.0 + p_norm);
3211        if proj_changed {
3212            // If projection materially changes the step, require descent at x_k.
3213            let descent_ok = g_proj_k.dot(&s_tr) <= -eps_g(&g_proj_k, &s_tr, self.tau_g);
3214            if !descent_ok {
3215                self.trust_radius = (delta * 0.5).max(1e-12);
3216                return None;
3217            }
3218        }
3219        let pred_dec = if proj_changed {
3220            self.trust_region_predicted_decrease(b_inv, &g_proj_k, &s_tr, active_opt)?
3221        } else {
3222            pred_dec_tr
3223        };
3224        if !pred_dec.is_finite() || pred_dec <= 0.0 {
3225            self.trust_radius = (delta * 0.5).max(1e-12);
3226            return None;
3227        }
3228        let rho = act_dec / pred_dec;
3229        if rho > 0.75 && s_tr.dot(&s_tr).sqrt() > 0.99 * delta {
3230            self.trust_radius = (delta * 2.0).min(1e6);
3231        } else if rho < 0.25 {
3232            self.trust_radius = (delta * 0.5).max(1e-12);
3233        }
3234        if rho <= 0.1 || !f_try.is_finite() || g_try.iter().any(|v| !v.is_finite()) {
3235            return None;
3236        }
3237        // Accept TR step
3238        // Update GLL window and global best
3239        self.gll.push(f_try);
3240        let maybe_f = self.global_best.as_ref().map(|b| b.f);
3241        if let Some(bf) = maybe_f {
3242            if f_try < bf - eps_f(bf, self.tau_f) {
3243                self.global_best = Some(ProbeBest {
3244                    f: f_try,
3245                    x: x_try.clone(),
3246                    g: g_try.clone(),
3247                });
3248            }
3249        } else {
3250            self.global_best = Some(ProbeBest::new(&x_try, f_try, &g_try));
3251        }
3252
3253        // Inverse update: skip on poor model; otherwise cautious Powell-damped.
3254        let poor_model = rho <= 0.25;
3255        let mut s_update = s_tr.clone();
3256        let mut y_update = &g_try - &g_old;
3257        if let Some(bounds) = &self.bounds {
3258            let active_after = bounds.active_mask(&x_try, &g_try);
3259            for i in 0..n {
3260                let tiny_step = s_update[i].abs() <= 1e-14 * (1.0 + x_k[i].abs());
3261                if (active_before[i] && active_after[i]) || tiny_step {
3262                    s_update[i] = 0.0;
3263                    y_update[i] = 0.0;
3264                }
3265            }
3266        }
3267        let s_norm_tr = s_update.dot(&s_update).sqrt();
3268        let mut update_status = "applied";
3269        if !poor_model && s_norm_tr > 1e-14 {
3270            let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3271            let ridge = (1e-10 * mean_diag).max(1e-16);
3272            // Compute B s via CG on H (since H = B^{-1}) for Powell damping.
3273            if let Some(h_s) = cg_solve_adaptive(b_inv, &s_update, 25, 1e-10, ridge) {
3274                let s_h_s = s_update.dot(&h_s);
3275                let sy_tr = s_update.dot(&y_update);
3276                let denom_raw = s_h_s - sy_tr;
3277                let denom = if denom_raw <= 0.0 { 1e-16 } else { denom_raw };
3278                let theta_raw = if sy_tr < 0.2 * s_h_s {
3279                    (0.8 * s_h_s) / denom
3280                } else {
3281                    1.0
3282                };
3283                let theta = theta_raw.clamp(0.0, 1.0);
3284                let mut y_tilde = &y_update * theta + &h_s * (1.0 - theta);
3285                let mut sty = s_update.dot(&y_tilde);
3286                let mut y_norm = y_tilde.dot(&y_tilde).sqrt();
3287                let kappa = 1e-4;
3288                let min_curv = kappa * s_norm_tr * y_norm;
3289                if sty < min_curv {
3290                    let beta = (min_curv - sty) / (s_norm_tr * s_norm_tr);
3291                    y_tilde = &y_tilde + &s_update * beta;
3292                    sty = s_update.dot(&y_tilde);
3293                    y_norm = y_tilde.dot(&y_tilde).sqrt();
3294                }
3295                let rel = if s_norm_tr > 0.0 && y_norm > 0.0 {
3296                    sty / (s_norm_tr * y_norm)
3297                } else {
3298                    0.0
3299                };
3300                if !sty.is_finite() || rel < 1e-8 {
3301                    update_status = "skipped";
3302                    for i in 0..n {
3303                        b_inv[[i, i]] *= 1.0 + 1e-3;
3304                    }
3305                } else {
3306                    if !apply_inverse_bfgs_update_in_place(
3307                        b_inv,
3308                        &s_update,
3309                        &y_tilde,
3310                        &mut b_inv_backup,
3311                    ) {
3312                        b_inv.assign(&b_inv_backup);
3313                        for i in 0..n {
3314                            b_inv[[i, i]] += 1e-6;
3315                        }
3316                        update_status = "reverted";
3317                    }
3318                }
3319                if !has_finite_positive_diagonal(b_inv) {
3320                    for i in 0..n {
3321                        b_inv[[i, i]] += 1e-12;
3322                    }
3323                }
3324            } else {
3325                self.spd_fail_seen = true;
3326                self.chol_fail_iters = self.chol_fail_iters + 1;
3327                update_status = "skipped";
3328            }
3329            if self.spd_fail_seen && self.chol_fail_iters >= 2 {
3330                let sy = s_update.dot(&y_update);
3331                let yy = y_update.dot(&y_update);
3332                let mut lambda = if yy > 0.0 { (sy / yy).abs() } else { 1.0 };
3333                lambda = lambda.clamp(1e-6, 1e6);
3334                *b_inv = scaled_identity(n, lambda);
3335                self.chol_fail_iters = 0;
3336                update_status = "reverted";
3337            }
3338        } else {
3339            update_status = "skipped";
3340        }
3341        log::info!(
3342            "[BFGS] step accepted via {:?}; inverse update {}",
3343            AcceptKind::TrustRegion,
3344            update_status
3345        );
3346        Some((x_try, f_try, g_try))
3347    }
3348
3349    /// Creates a new BFGS core configuration.
3350    fn new(x0: Array1<f64>) -> Self {
3351        Self {
3352            x0,
3353            tolerance: 1e-5,
3354            max_iterations: 100,
3355            c1: 1e-4, // Standard value for sufficient decrease
3356            c2: 0.9,  // Standard value for curvature condition
3357            tau_f: 1e3,
3358            tau_g: 1e2,
3359            bounds: None,
3360            flat_step_policy: FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 },
3361            rng_state: 0xB5F0_D00D_1234_5678u64,
3362            flat_accept_streak: 0,
3363            rescue_policy: RescuePolicy::CoordinateHybrid {
3364                pool_mult: 4.0,
3365                heads: 2,
3366            },
3367            stall_policy: StallPolicy::On { window: 3 },
3368            stall_noimprove_streak: 0,
3369            curv_slack_scale: 1.0,
3370            grad_drop_factor: 0.9,
3371            tol_f_rel: 1e-8,
3372            max_no_improve: 5,
3373            no_improve_streak: 0,
3374            gll: GllWindow::new(8),
3375            c1_adapt: 1e-4,
3376            c2_adapt: 0.9,
3377            wolfe_fail_streak: 0,
3378            primary_strategy: LineSearchStrategy::StrongWolfe,
3379            trust_radius: 1.0,
3380            global_best: None,
3381            nonfinite_seen: false,
3382            wolfe_clean_successes: 0,
3383            bt_clean_successes: 0,
3384            ls_failures_in_row: 0,
3385            chol_fail_iters: 0,
3386            spd_fail_seen: false,
3387            initial_b_inv: None,
3388            initial_grad_norm: 0.0,
3389            local_mode: false,
3390        }
3391    }
3392
3393    fn apply_profile(&mut self, profile: Profile) {
3394        match profile {
3395            Profile::Robust => {
3396                self.tau_f = 1e3;
3397                self.tau_g = 1e2;
3398                self.flat_step_policy = FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 };
3399                self.rescue_policy = RescuePolicy::CoordinateHybrid {
3400                    pool_mult: 4.0,
3401                    heads: 2,
3402                };
3403                self.stall_policy = StallPolicy::On { window: 3 };
3404                self.curv_slack_scale = 1.0;
3405                self.tol_f_rel = 1e-8;
3406                self.max_no_improve = 5;
3407            }
3408            Profile::Deterministic => {
3409                self.tau_f = 1e2;
3410                self.tau_g = 1e2;
3411                self.flat_step_policy = FlatStepPolicy::Strict;
3412                self.rescue_policy = RescuePolicy::Off;
3413                self.stall_policy = StallPolicy::On { window: 3 };
3414                self.curv_slack_scale = 1.0;
3415                self.tol_f_rel = 1e-8;
3416                self.max_no_improve = 5;
3417            }
3418            Profile::Aggressive => {
3419                self.tau_f = 1e4;
3420                self.tau_g = 1e3;
3421                self.flat_step_policy = FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 };
3422                self.rescue_policy = RescuePolicy::CoordinateHybrid {
3423                    pool_mult: 6.0,
3424                    heads: 4,
3425                };
3426                self.stall_policy = StallPolicy::Off;
3427                self.curv_slack_scale = 2.0;
3428                self.tol_f_rel = 1e-10;
3429                self.max_no_improve = 10;
3430            }
3431        }
3432    }
3433
3434    #[inline]
3435    fn accept_armijo(&self, f_k: f64, gk_ts: f64, f_i: f64) -> bool {
3436        let c1 = self.c1_adapt;
3437        let epsf_k = eps_f(f_k, self.tau_f);
3438        f_i <= f_k + c1 * gk_ts + epsf_k
3439    }
3440
3441    #[inline]
3442    fn accept_gll_nonmonotone(&self, fmax: f64, gk_ts: f64, f_i: f64) -> bool {
3443        !self.local_mode && {
3444            let c1 = self.c1_adapt;
3445            let epsf_max = eps_f(fmax, self.tau_f);
3446            f_i <= fmax + c1 * gk_ts + epsf_max
3447        }
3448    }
3449
3450    #[inline]
3451    fn relaxed_acceptors_enabled(&self) -> bool {
3452        !self.local_mode
3453    }
3454
3455    #[inline]
3456    fn jiggle_enabled(&self) -> bool {
3457        matches!(
3458            self.flat_step_policy,
3459            FlatStepPolicy::MidpointWithJiggle { .. }
3460        ) && !self.local_mode
3461    }
3462
3463    #[inline]
3464    fn jiggle_scale(&self) -> f64 {
3465        match self.flat_step_policy {
3466            FlatStepPolicy::MidpointWithJiggle { scale } => scale,
3467            FlatStepPolicy::Strict => 0.0,
3468        }
3469    }
3470
3471    #[inline]
3472    fn rescue_enabled(&self) -> bool {
3473        !matches!(self.rescue_policy, RescuePolicy::Off) && !self.local_mode
3474    }
3475
3476    #[inline]
3477    fn refresh_local_mode(&mut self, g_norm: f64) {
3478        let baseline = self.initial_grad_norm.max(self.tolerance).max(1e-16);
3479        let gradient_small = g_norm <= 1e-2 * baseline;
3480        let clean_successes = self.wolfe_clean_successes + self.bt_clean_successes;
3481        self.local_mode = gradient_small || clean_successes >= 5;
3482        if self.local_mode {
3483            self.primary_strategy = LineSearchStrategy::StrongWolfe;
3484            self.c1_adapt = self.c1;
3485            self.c2_adapt = self.c2;
3486            self.flat_accept_streak = 0;
3487            self.curv_slack_scale = 1.0;
3488            self.grad_drop_factor = 0.9;
3489            self.gll.set_cap(1);
3490        }
3491    }
3492
3493    fn trust_region_dogleg(
3494        &self,
3495        b_inv: &Array2<f64>,
3496        g: &Array1<f64>,
3497        delta: f64,
3498        active: Option<&[bool]>,
3499    ) -> Option<(Array1<f64>, f64)> {
3500        // Solve H z = g without full factorization (H = B_inv).
3501        let n = b_inv.nrows();
3502        let active = active.unwrap_or(&[]);
3503        let use_mask = !active.is_empty();
3504        if use_mask && !any_free_variables(active) {
3505            return None;
3506        }
3507        let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3508        let ridge = (1e-10 * mean_diag).max(1e-16);
3509        let z = if use_mask {
3510            cg_solve_masked_adaptive(b_inv, g, active, 50, 1e-10, ridge)?
3511        } else {
3512            cg_solve_adaptive(b_inv, g, 50, 1e-10, ridge)?
3513        };
3514        let gnorm2 = g.dot(g);
3515        if !gnorm2.is_finite() || gnorm2 <= 0.0 {
3516            return None;
3517        }
3518        let gHg = g.dot(&z).max(1e-16);
3519        // Cauchy step
3520        let tau = gnorm2 / gHg;
3521        let p_u = -&(g * tau);
3522        // Newton/BFGS step
3523        let mut h_g = Array1::<f64>::zeros(n);
3524        if use_mask {
3525            masked_hv_inplace(b_inv, g, active, &mut h_g);
3526        } else {
3527            h_g.assign(&b_inv.dot(g));
3528        }
3529        let p_b = -h_g;
3530        let p_b_norm = p_b.dot(&p_b).sqrt();
3531        if p_b_norm <= delta {
3532            let pred_dec = self.trust_region_predicted_decrease(
3533                b_inv,
3534                g,
3535                &p_b,
3536                if use_mask { Some(active) } else { None },
3537            )?;
3538            return Some((p_b, pred_dec));
3539        }
3540        let p_u_norm = p_u.dot(&p_u).sqrt();
3541        if p_u_norm >= delta {
3542            let p = -g * (delta / gnorm2.sqrt());
3543            let pred_dec = self.trust_region_predicted_decrease(
3544                b_inv,
3545                g,
3546                &p,
3547                if use_mask { Some(active) } else { None },
3548            )?;
3549            return Some((p, pred_dec));
3550        }
3551        // Dogleg along segment from pu to pb hitting boundary.
3552        let s = &p_b - &p_u;
3553        let a = s.dot(&s);
3554        let b = 2.0 * p_u.dot(&s);
3555        let c = p_u.dot(&p_u) - delta * delta;
3556        let disc = b * b - 4.0 * a * c;
3557        if !disc.is_finite() || disc < 0.0 {
3558            return None;
3559        }
3560        let sqrt_disc = disc.sqrt();
3561        let t1 = (-b - sqrt_disc) / (2.0 * a);
3562        let t2 = (-b + sqrt_disc) / (2.0 * a);
3563        // pick valid root in (0,1); if both, choose the smaller (more conservative)
3564        let mut candidates: Vec<f64> = vec![];
3565        if t1.is_finite() && t1 > 0.0 && t1 < 1.0 {
3566            candidates.push(t1);
3567        }
3568        if t2.is_finite() && t2 > 0.0 && t2 < 1.0 {
3569            candidates.push(t2);
3570        }
3571        let t: f64 = if !candidates.is_empty() {
3572            candidates.into_iter().fold(1.0, f64::min)
3573        } else {
3574            0.5
3575        };
3576        let mut p = &p_u + &(s * t);
3577        let p_norm = p.dot(&p).sqrt();
3578        if p_norm.is_finite() && p_norm > delta && delta.is_finite() && delta > 0.0 {
3579            p = p * (delta / p_norm);
3580        }
3581        let pred_dec = self.trust_region_predicted_decrease(
3582            b_inv,
3583            g,
3584            &p,
3585            if use_mask { Some(active) } else { None },
3586        )?;
3587        Some((p, pred_dec))
3588    }
3589
3590    fn trust_region_predicted_decrease(
3591        &self,
3592        b_inv: &Array2<f64>,
3593        g: &Array1<f64>,
3594        s: &Array1<f64>,
3595        active: Option<&[bool]>,
3596    ) -> Option<f64> {
3597        let n = b_inv.nrows();
3598        let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3599        let ridge = (1e-10 * mean_diag).max(1e-16);
3600        let hs = if let Some(active) = active {
3601            cg_solve_masked_adaptive(b_inv, s, active, 50, 1e-10, ridge)?
3602        } else {
3603            cg_solve_adaptive(b_inv, s, 50, 1e-10, ridge)?
3604        };
3605        let pred = g.dot(s) + 0.5 * s.dot(&hs);
3606        let pred_dec = -pred;
3607        if pred_dec.is_finite() && pred_dec > 0.0 {
3608            Some(pred_dec)
3609        } else {
3610            None
3611        }
3612    }
3613
3614    fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
3615        if let Some(bounds) = &self.bounds {
3616            bounds.project(x)
3617        } else {
3618            x.clone()
3619        }
3620    }
3621
3622    // Tiny xorshift64* RNG for jiggling without external deps. Returns in [-1, 1].
3623    fn next_rand_sym(&mut self) -> f64 {
3624        let mut x = self.rng_state;
3625        // xorshift64*
3626        x ^= x >> 12;
3627        x ^= x << 25;
3628        x ^= x >> 27;
3629        x = x.wrapping_mul(0x2545F4914F6CDD1Du64);
3630        self.rng_state = x;
3631        // Map to (0,1): use upper 53 bits to f64 fraction
3632        let u = ((x >> 11) as f64) * (1.0 / (1u64 << 53) as f64);
3633        2.0 * u - 1.0
3634    }
3635
3636    fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, BfgsError>
3637    where
3638        ObjFn: FirstOrderObjective,
3639    {
3640        let n = self.x0.len();
3641        let mut x_k = self.project_point(&self.x0);
3642        let mut oracle = FirstOrderCache::new(x_k.len());
3643        let mut func_evals = 0;
3644        let mut grad_evals = 0;
3645        let mut b_inv_backup = Array2::<f64>::zeros((n, n));
3646        let initial = oracle
3647            .eval_cost_grad(obj_fn, &x_k, &mut func_evals, &mut grad_evals)
3648            .map_err(|err| match err {
3649                ObjectiveEvalError::Recoverable { message }
3650                | ObjectiveEvalError::Fatal { message } => BfgsError::ObjectiveFailed { message },
3651            })?;
3652        let (mut f_k, mut g_k) = initial;
3653        if !f_k.is_finite() || g_k.iter().any(|v| !v.is_finite()) {
3654            return Err(BfgsError::GradientIsNaN);
3655        }
3656        let mut g_proj_k = self.projected_gradient(&x_k, &g_k);
3657        let mut active_mask = if let Some(bounds) = &self.bounds {
3658            bounds.active_mask(&x_k, &g_k)
3659        } else {
3660            vec![false; n]
3661        };
3662
3663        if !matches!(self.primary_strategy, LineSearchStrategy::StrongWolfe)
3664            && self.wolfe_fail_streak != 0
3665        {
3666            return Err(BfgsError::InternalInvariant {
3667                message: "primary strategy mismatch with fail streak".to_string(),
3668            });
3669        }
3670        if !self.gll.buf.is_empty() && self.gll.buf.len() > self.gll.cap {
3671            return Err(BfgsError::InternalInvariant {
3672                message: "GLL window exceeded capacity".to_string(),
3673            });
3674        }
3675        if !self.trust_radius.is_finite() {
3676            return Err(BfgsError::InternalInvariant {
3677                message: "trust radius is non-finite".to_string(),
3678            });
3679        }
3680        self.wolfe_fail_streak = 0;
3681        self.wolfe_clean_successes = 0;
3682        self.bt_clean_successes = 0;
3683        self.ls_failures_in_row = 0;
3684        self.nonfinite_seen = false;
3685        self.chol_fail_iters = 0;
3686        self.spd_fail_seen = false;
3687        self.flat_accept_streak = 0;
3688
3689        let mut b_inv = if let Some(h0) = self.initial_b_inv.clone() {
3690            if h0.nrows() == n && h0.ncols() == n && h0.iter().all(|v| v.is_finite()) {
3691                h0
3692            } else {
3693                Array2::<f64>::eye(n)
3694            }
3695        } else {
3696            Array2::<f64>::eye(n)
3697        };
3698
3699        // Initialize adaptive state
3700        self.gll.clear();
3701        self.gll.push(f_k);
3702        self.global_best = Some(ProbeBest::new(&x_k, f_k, &g_k));
3703        self.c1_adapt = self.c1;
3704        self.c2_adapt = self.c2;
3705        self.primary_strategy = LineSearchStrategy::StrongWolfe;
3706        self.wolfe_fail_streak = 0;
3707        // Initialize trust radius from gradient scale
3708        let g0_norm = g_proj_k.dot(&g_proj_k).sqrt();
3709        self.initial_grad_norm = g0_norm;
3710        self.local_mode = false;
3711        let delta0 = if g0_norm.is_finite() && g0_norm > 0.0 {
3712            (10.0 / g0_norm).min(1.0)
3713        } else {
3714            1.0
3715        };
3716        self.trust_radius = delta0;
3717
3718        let mut f_last_accepted = f_k;
3719        for k in 0..self.max_iterations {
3720            // reset per-iteration state
3721            self.nonfinite_seen = false;
3722            self.chol_fail_iters = 0;
3723            self.spd_fail_seen = false;
3724            g_proj_k = self.projected_gradient(&x_k, &g_k);
3725            let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
3726            if !g_norm.is_finite() {
3727                log::warn!(
3728                    "[BFGS] Non-finite gradient norm at iter {}: g_norm={:?}",
3729                    k,
3730                    g_norm
3731                );
3732                return Err(BfgsError::GradientIsNaN);
3733            }
3734            self.refresh_local_mode(g_norm);
3735            if g_norm < self.tolerance {
3736                let sol = Solution::gradient_based(
3737                    x_k, f_k, g_k, g_norm, None, k, func_evals, grad_evals, 0,
3738                );
3739                log::info!(
3740                    "[BFGS] Converged by gradient: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
3741                    k,
3742                    sol.final_value,
3743                    sol.final_gradient_norm
3744                        .expect("gradient-based solution must report gradient norm"),
3745                    sol.func_evals,
3746                    sol.grad_evals,
3747                    self.trust_radius
3748                );
3749                return Ok(sol);
3750            }
3751
3752            let mut present_d_k = -b_inv.dot(&g_proj_k);
3753            if let Some(bounds) = &self.bounds {
3754                for (i, &active) in active_mask.iter().enumerate() {
3755                    if active {
3756                        present_d_k[i] = 0.0;
3757                    }
3758                }
3759                // prevent stepping outside bounds directly from the current point
3760                for i in 0..present_d_k.len() {
3761                    if present_d_k[i] < 0.0 && x_k[i] <= bounds.lower[i] + bounds.tol {
3762                        present_d_k[i] = 0.0;
3763                    }
3764                    if present_d_k[i] > 0.0 && x_k[i] >= bounds.upper[i] - bounds.tol {
3765                        present_d_k[i] = 0.0;
3766                    }
3767                }
3768            }
3769            // Enforce descent direction; reset if needed
3770            let gdotd = g_proj_k.dot(&present_d_k);
3771            let dnorm = present_d_k.dot(&present_d_k).sqrt();
3772            let tiny_d = dnorm <= 1e-14 * (1.0 + x_k.dot(&x_k).sqrt());
3773            let eps_dir = eps_g(&g_proj_k, &present_d_k, self.tau_g);
3774            if gdotd >= -eps_dir || tiny_d {
3775                log::warn!("[BFGS] Non-descent direction; resetting to -g and B_inv=I.");
3776                b_inv = Array2::eye(n);
3777                present_d_k = -g_proj_k.clone();
3778                if let Some(bounds) = &self.bounds {
3779                    for (i, &active) in active_mask.iter().enumerate() {
3780                        if active {
3781                            present_d_k[i] = 0.0;
3782                        }
3783                    }
3784                    for i in 0..present_d_k.len() {
3785                        if present_d_k[i] < 0.0 && x_k[i] <= bounds.lower[i] + bounds.tol {
3786                            present_d_k[i] = 0.0;
3787                        }
3788                        if present_d_k[i] > 0.0 && x_k[i] >= bounds.upper[i] - bounds.tol {
3789                            present_d_k[i] = 0.0;
3790                        }
3791                    }
3792                }
3793            }
3794
3795            // --- Adaptive Hybrid Line Search Execution ---
3796            let active_before = active_mask.clone();
3797            let (alpha_k, mut f_next, mut g_next, f_evals, g_evals, mut accept_kind) = {
3798                let search_result = match self.primary_strategy {
3799                    LineSearchStrategy::StrongWolfe => line_search(
3800                        self,
3801                        obj_fn,
3802                        &mut oracle,
3803                        &x_k,
3804                        &present_d_k,
3805                        f_k,
3806                        &g_k,
3807                        self.c1_adapt,
3808                        self.c2_adapt,
3809                    ),
3810                    LineSearchStrategy::Backtracking => backtracking_line_search(
3811                        self,
3812                        obj_fn,
3813                        &mut oracle,
3814                        &x_k,
3815                        &present_d_k,
3816                        f_k,
3817                        &g_k,
3818                    ),
3819                };
3820
3821                match search_result {
3822                    Ok(result) => {
3823                        // Reset failure streak and relax toward canonical constants
3824                        self.wolfe_fail_streak = 0;
3825                        self.ls_failures_in_row = 0;
3826                        // Drift c1/c2 back toward canonical quickly on success
3827                        if self.wolfe_clean_successes >= 2 || self.bt_clean_successes >= 2 {
3828                            self.c1_adapt = self.c1;
3829                            self.c2_adapt = self.c2;
3830                        } else {
3831                            self.c1_adapt = (self.c1_adapt * 0.9).max(self.c1);
3832                            self.c2_adapt = (self.c2_adapt * 1.1).min(self.c2);
3833                        }
3834                        match self.primary_strategy {
3835                            LineSearchStrategy::StrongWolfe => {
3836                                self.wolfe_clean_successes += 1;
3837                                self.bt_clean_successes = 0;
3838                                if self.wolfe_clean_successes >= 3 {
3839                                    self.gll.set_cap(8);
3840                                }
3841                            }
3842                            LineSearchStrategy::Backtracking => {
3843                                self.bt_clean_successes += 1;
3844                                self.wolfe_clean_successes = 0;
3845                            }
3846                        }
3847                        result
3848                    }
3849                    Err(e) => {
3850                        // The primary strategy failed.
3851                        match e {
3852                            LineSearchError::StepSizeTooSmall => {
3853                                log::debug!("[BFGS] Line search failed: step size too small.");
3854                            }
3855                            LineSearchError::MaxAttempts(attempts) => {
3856                                log::debug!(
3857                                    "[BFGS] Line search failed: max attempts reached ({attempts})."
3858                                );
3859                            }
3860                            LineSearchError::ObjectiveFailed(message) => {
3861                                return Err(BfgsError::ObjectiveFailed { message });
3862                            }
3863                        }
3864                        // Attempt fallback if the primary strategy was StrongWolfe.
3865                        if matches!(self.primary_strategy, LineSearchStrategy::StrongWolfe) {
3866                            let streak = self.wolfe_fail_streak + 1;
3867                            self.wolfe_fail_streak = streak;
3868                            log::warn!(
3869                                "[BFGS Adaptive] Strong Wolfe failed at iter {}. Falling back to Backtracking.",
3870                                k
3871                            );
3872                            // Adapt c1/c2 on failures
3873                            if streak == 1 {
3874                                self.c2_adapt = 0.5;
3875                            }
3876                            if streak >= 2 {
3877                                self.c2_adapt = 0.1;
3878                                self.c1_adapt = 1e-3;
3879                            }
3880                            self.ls_failures_in_row += 1;
3881                            if self.ls_failures_in_row >= 2 {
3882                                self.gll.set_cap(10);
3883                            }
3884                            let fallback_result = backtracking_line_search(
3885                                self,
3886                                obj_fn,
3887                                &mut oracle,
3888                                &x_k,
3889                                &present_d_k,
3890                                f_k,
3891                                &g_k,
3892                            );
3893                            if let Ok(result) = fallback_result {
3894                                // Fallback succeeded.
3895                                result
3896                            } else {
3897                                // The fallback also failed. Terminate with the informative error.
3898                                let (max_attempts, failure_reason) = match fallback_result {
3899                                    Err(LineSearchError::MaxAttempts(attempts)) => {
3900                                        (attempts, LineSearchFailureReason::MaxAttempts)
3901                                    }
3902                                    Err(LineSearchError::StepSizeTooSmall) => (
3903                                        BACKTRACKING_MAX_ATTEMPTS,
3904                                        LineSearchFailureReason::StepSizeTooSmall,
3905                                    ),
3906                                    Err(LineSearchError::ObjectiveFailed(message)) => {
3907                                        return Err(BfgsError::ObjectiveFailed { message });
3908                                    }
3909                                    Ok(_) => unreachable!(
3910                                        "entered fallback failure branch with Ok line-search result"
3911                                    ),
3912                                };
3913                                // Salvage best point seen during line search if any
3914                                if let Some(b) = self.global_best.clone() {
3915                                    let epsF = eps_f(f_k, self.tau_f);
3916                                    let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
3917                                    let gb_proj = self.projected_gradient(&b.x, &b.g);
3918                                    let gb_norm = gb_proj.dot(&gb_proj).sqrt();
3919                                    let drop_factor = self.grad_drop_factor;
3920                                    if (b.f <= f_k + epsF && gb_norm <= drop_factor * gk_norm)
3921                                        || (b.f < f_k - epsF)
3922                                    {
3923                                        let rel_impr = (f_k - b.f).abs() / (1.0 + f_k.abs());
3924                                        if self.update_no_improve_streak(rel_impr)
3925                                            && self.stagnation_converged(&x_k, &b.x, &gb_proj)
3926                                        {
3927                                            return Ok(Solution::gradient_based(
3928                                                b.x.clone(),
3929                                                b.f,
3930                                                b.g.clone(),
3931                                                gb_norm,
3932                                                None,
3933                                                k,
3934                                                func_evals,
3935                                                grad_evals,
3936                                                0,
3937                                            ));
3938                                        }
3939                                        x_k = self.project_point(&b.x);
3940                                        f_k = b.f;
3941                                        g_k = b.g.clone();
3942                                        g_proj_k = gb_proj;
3943                                        if let Some(bounds) = &self.bounds {
3944                                            active_mask = bounds.active_mask(&x_k, &g_k);
3945                                        }
3946                                        for i in 0..n {
3947                                            b_inv[[i, i]] *= 1.0 + 1e-3;
3948                                        }
3949                                        continue;
3950                                    }
3951                                }
3952                                // Try full trust-region dogleg fallback before giving up
3953                                if let Some((x_new, f_new, g_new)) = self.try_trust_region_step(
3954                                    obj_fn,
3955                                    &mut oracle,
3956                                    &mut b_inv,
3957                                    &x_k,
3958                                    f_k,
3959                                    &g_k,
3960                                    &mut func_evals,
3961                                    &mut grad_evals,
3962                                ) {
3963                                    let g_proj_new = self.projected_gradient(&x_new, &g_new);
3964                                    let rel_impr = (f_k - f_new).abs() / (1.0 + f_k.abs());
3965                                    if self.update_no_improve_streak(rel_impr)
3966                                        && self.stagnation_converged(&x_k, &x_new, &g_proj_new)
3967                                    {
3968                                        return Ok(Solution::gradient_based(
3969                                            x_new,
3970                                            f_new,
3971                                            g_new,
3972                                            g_proj_new.dot(&g_proj_new).sqrt(),
3973                                            None,
3974                                            k + 1,
3975                                            func_evals,
3976                                            grad_evals,
3977                                            0,
3978                                        ));
3979                                    }
3980                                    x_k = x_new;
3981                                    f_k = f_new;
3982                                    g_k = g_new;
3983                                    g_proj_k = g_proj_new;
3984                                    if let Some(bounds) = &self.bounds {
3985                                        active_mask = bounds.active_mask(&x_k, &g_k);
3986                                    }
3987                                    self.ls_failures_in_row = 0;
3988                                    continue;
3989                                }
3990                                self.trust_radius = (self.trust_radius * 0.7).max(1e-12);
3991                                if self.nonfinite_seen {
3992                                    let mut ls = Solution::gradient_based(
3993                                        x_k.clone(),
3994                                        f_k,
3995                                        g_k.clone(),
3996                                        g_norm,
3997                                        None,
3998                                        k,
3999                                        func_evals,
4000                                        grad_evals,
4001                                        0,
4002                                    );
4003                                    if let Some(b) = self.global_best.as_ref()
4004                                        && b.f < f_k - eps_f(f_k, self.tau_f)
4005                                    {
4006                                        let gb_proj = self.projected_gradient(&b.x, &b.g);
4007                                        ls = Solution::gradient_based(
4008                                            b.x.clone(),
4009                                            b.f,
4010                                            b.g.clone(),
4011                                            gb_proj.dot(&gb_proj).sqrt(),
4012                                            None,
4013                                            k,
4014                                            func_evals,
4015                                            grad_evals,
4016                                            0,
4017                                        );
4018                                    }
4019                                    log::warn!(
4020                                        "[BFGS] Line search failed at iter {} (nonfinite seen), fe={}, ge={}, Δ={:.3e}",
4021                                        k,
4022                                        func_evals,
4023                                        grad_evals,
4024                                        self.trust_radius
4025                                    );
4026                                    return Err(BfgsError::LineSearchFailed {
4027                                        last_solution: Box::new(ls),
4028                                        max_attempts,
4029                                        failure_reason,
4030                                    });
4031                                }
4032                                if self.ls_failures_in_row >= 2 {
4033                                    let ls = Solution::gradient_based(
4034                                        x_k.clone(),
4035                                        f_k,
4036                                        g_k.clone(),
4037                                        g_norm,
4038                                        None,
4039                                        k,
4040                                        func_evals,
4041                                        grad_evals,
4042                                        0,
4043                                    );
4044                                    return Err(BfgsError::LineSearchFailed {
4045                                        last_solution: Box::new(ls),
4046                                        max_attempts,
4047                                        failure_reason,
4048                                    });
4049                                }
4050                                continue;
4051                            }
4052                        } else {
4053                            // The robust Backtracking strategy has failed. This is a critical problem.
4054                            // Reset the Hessian and try one last time with a steepest descent direction.
4055                            self.ls_failures_in_row += 1;
4056                            log::error!(
4057                                "[BFGS Adaptive] CRITICAL: Backtracking failed at iter {}. Resetting Hessian.",
4058                                k
4059                            );
4060                            b_inv = Array2::<f64>::eye(n);
4061                            present_d_k = -g_k.clone();
4062                            let fallback_result = backtracking_line_search(
4063                                self,
4064                                obj_fn,
4065                                &mut oracle,
4066                                &x_k,
4067                                &present_d_k,
4068                                f_k,
4069                                &g_k,
4070                            );
4071                            if let Ok(result) = fallback_result {
4072                                result
4073                            } else {
4074                                let (max_attempts, failure_reason) = match fallback_result {
4075                                    Err(LineSearchError::MaxAttempts(attempts)) => {
4076                                        (attempts, LineSearchFailureReason::MaxAttempts)
4077                                    }
4078                                    Err(LineSearchError::StepSizeTooSmall) => (
4079                                        BACKTRACKING_MAX_ATTEMPTS,
4080                                        LineSearchFailureReason::StepSizeTooSmall,
4081                                    ),
4082                                    Err(LineSearchError::ObjectiveFailed(message)) => {
4083                                        return Err(BfgsError::ObjectiveFailed { message });
4084                                    }
4085                                    Ok(_) => unreachable!(
4086                                        "entered fallback failure branch with Ok line-search result"
4087                                    ),
4088                                };
4089                                // Full trust-region dogleg fallback
4090                                if let Some((x_new, f_new, g_new)) = self.try_trust_region_step(
4091                                    obj_fn,
4092                                    &mut oracle,
4093                                    &mut b_inv,
4094                                    &x_k,
4095                                    f_k,
4096                                    &g_k,
4097                                    &mut func_evals,
4098                                    &mut grad_evals,
4099                                ) {
4100                                    let g_proj_new = self.projected_gradient(&x_new, &g_new);
4101                                    let rel_impr = (f_k - f_new).abs() / (1.0 + f_k.abs());
4102                                    if self.update_no_improve_streak(rel_impr)
4103                                        && self.stagnation_converged(&x_k, &x_new, &g_proj_new)
4104                                    {
4105                                        return Ok(Solution::gradient_based(
4106                                            x_new,
4107                                            f_new,
4108                                            g_new,
4109                                            g_proj_new.dot(&g_proj_new).sqrt(),
4110                                            None,
4111                                            k + 1,
4112                                            func_evals,
4113                                            grad_evals,
4114                                            0,
4115                                        ));
4116                                    }
4117                                    x_k = x_new;
4118                                    f_k = f_new;
4119                                    g_k = g_new;
4120                                    g_proj_k = g_proj_new;
4121                                    if let Some(bounds) = &self.bounds {
4122                                        active_mask = bounds.active_mask(&x_k, &g_k);
4123                                    }
4124                                    self.ls_failures_in_row = 0;
4125                                    continue;
4126                                }
4127                                if let Some(b) = self.global_best.clone() {
4128                                    let epsF = eps_f(f_k, self.tau_f);
4129                                    let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
4130                                    let gb_proj = self.projected_gradient(&b.x, &b.g);
4131                                    let gb_norm = gb_proj.dot(&gb_proj).sqrt();
4132                                    let drop_factor = self.grad_drop_factor;
4133                                    if (b.f <= f_k + epsF && gb_norm <= drop_factor * gk_norm)
4134                                        || (b.f < f_k - epsF)
4135                                    {
4136                                        let rel_impr = (f_k - b.f).abs() / (1.0 + f_k.abs());
4137                                        if self.update_no_improve_streak(rel_impr)
4138                                            && self.stagnation_converged(&x_k, &b.x, &gb_proj)
4139                                        {
4140                                            return Ok(Solution::gradient_based(
4141                                                b.x.clone(),
4142                                                b.f,
4143                                                b.g.clone(),
4144                                                gb_norm,
4145                                                None,
4146                                                k,
4147                                                func_evals,
4148                                                grad_evals,
4149                                                0,
4150                                            ));
4151                                        }
4152                                        x_k = self.project_point(&b.x);
4153                                        f_k = b.f;
4154                                        g_k = b.g.clone();
4155                                        g_proj_k = gb_proj;
4156                                        if let Some(bounds) = &self.bounds {
4157                                            active_mask = bounds.active_mask(&x_k, &g_k);
4158                                        }
4159                                        for i in 0..n {
4160                                            b_inv[[i, i]] *= 1.0 + 1e-3;
4161                                        }
4162                                        continue;
4163                                    }
4164                                }
4165                                self.trust_radius = (self.trust_radius * 0.7).max(1e-12);
4166                                if self.nonfinite_seen {
4167                                    let mut ls = Solution::gradient_based(
4168                                        x_k.clone(),
4169                                        f_k,
4170                                        g_k.clone(),
4171                                        g_norm,
4172                                        None,
4173                                        k,
4174                                        func_evals,
4175                                        grad_evals,
4176                                        0,
4177                                    );
4178                                    if let Some(b) = self.global_best.as_ref()
4179                                        && b.f < f_k - eps_f(f_k, self.tau_f)
4180                                    {
4181                                        let b_proj = self.projected_gradient(&b.x, &b.g);
4182                                        ls = Solution::gradient_based(
4183                                            b.x.clone(),
4184                                            b.f,
4185                                            b.g.clone(),
4186                                            b_proj.dot(&b_proj).sqrt(),
4187                                            None,
4188                                            k,
4189                                            func_evals,
4190                                            grad_evals,
4191                                            0,
4192                                        );
4193                                    }
4194                                    log::warn!(
4195                                        "[BFGS] Line search failed at iter {} (nonfinite seen), fe={}, ge={}, Δ={:.3e}",
4196                                        k,
4197                                        func_evals,
4198                                        grad_evals,
4199                                        self.trust_radius
4200                                    );
4201                                    return Err(BfgsError::LineSearchFailed {
4202                                        last_solution: Box::new(ls),
4203                                        max_attempts,
4204                                        failure_reason,
4205                                    });
4206                                }
4207                                if self.ls_failures_in_row >= 2 {
4208                                    let ls = Solution::gradient_based(
4209                                        x_k.clone(),
4210                                        f_k,
4211                                        g_k.clone(),
4212                                        g_norm,
4213                                        None,
4214                                        k,
4215                                        func_evals,
4216                                        grad_evals,
4217                                        0,
4218                                    );
4219                                    return Err(BfgsError::LineSearchFailed {
4220                                        last_solution: Box::new(ls),
4221                                        max_attempts,
4222                                        failure_reason,
4223                                    });
4224                                }
4225                                continue;
4226                            }
4227                        }
4228                    }
4229                }
4230            };
4231
4232            // Optional coordinate rescue after consecutive flat accepts
4233            let mut s_override: Option<Array1<f64>> = None;
4234            let mut rescued = false;
4235            if self.rescue_enabled() {
4236                let epsF_iter = eps_f(f_k, self.tau_f);
4237                let flat_now = (f_next - f_k).abs() <= epsF_iter;
4238                if flat_now && self.flat_accept_streak >= 2 {
4239                    let x_base = self.project_point(&(&x_k + &(alpha_k * &present_d_k)));
4240                    let g_proj_base = self.projected_gradient(&x_base, &g_next);
4241                    let gnext_norm0 = g_proj_base.iter().map(|v| v * v).sum::<f64>().sqrt();
4242                    let delta = self.trust_radius;
4243                    let eta = (0.2 * delta).min(1.0 / (1.0 + gnext_norm0));
4244                    if eta.is_finite() && eta > 0.0 {
4245                        let n = x_k.len();
4246                        let mut best_x = None;
4247                        let mut best_f = f_next;
4248                        let mut best_g = g_next.clone();
4249                        // Budgeted coordinate subset selection
4250                        let k = n.min(8);
4251                        let mut idx: Vec<usize> = (0..n).collect();
4252                        idx.sort_by(|&i, &j| {
4253                            g_next[i]
4254                                .abs()
4255                                .partial_cmp(&g_next[j].abs())
4256                                .unwrap_or(std::cmp::Ordering::Equal)
4257                                .reverse()
4258                        });
4259                        let (use_hybrid, pool_mult, rescue_heads) = match self.rescue_policy {
4260                            RescuePolicy::Off => (false, 1.0, 0),
4261                            RescuePolicy::CoordinateHybrid { pool_mult, heads } => {
4262                                (true, pool_mult, heads)
4263                            }
4264                        };
4265                        let m = (pool_mult * (k as f64)).round() as usize;
4266                        let m = m.min(n).max(k);
4267                        let heads = rescue_heads.min(k).min(m);
4268                        let mut chosen: Vec<usize> = Vec::new();
4269                        // Always include top heads
4270                        for &i in idx.iter().take(heads) {
4271                            chosen.push(i);
4272                        }
4273                        if use_hybrid {
4274                            // Sample remaining from next (heads..m)
4275                            let mut pool: Vec<usize> =
4276                                idx.iter().cloned().skip(heads).take(m - heads).collect();
4277                            while chosen.len() < k && !pool.is_empty() {
4278                                // xorshift-based index
4279                                let r = (self.rng_state >> 1) as usize;
4280                                let t = r % pool.len();
4281                                let pick = pool.swap_remove(t);
4282                                chosen.push(pick);
4283                                // advance rng
4284                                let _ = self.next_rand_sym();
4285                            }
4286                        } else {
4287                            for &i in idx.iter().skip(heads).take(k - heads) {
4288                                chosen.push(i);
4289                            }
4290                        }
4291                        for &i in &chosen {
4292                            for &sgn in &[-1.0, 1.0] {
4293                                let mut x_try = x_base.clone();
4294                                x_try[i] += sgn * eta; // coordinate poke from x_next
4295                                x_try = self.project_point(&x_try);
4296                                let (f_try, g_try) = match bfgs_eval_cost_grad(
4297                                    &mut oracle,
4298                                    obj_fn,
4299                                    &x_try,
4300                                    &mut func_evals,
4301                                    &mut grad_evals,
4302                                ) {
4303                                    Ok(sample) => sample,
4304                                    Err(ObjectiveEvalError::Recoverable { .. }) => continue,
4305                                    Err(ObjectiveEvalError::Fatal { message }) => {
4306                                        return Err(BfgsError::ObjectiveFailed { message });
4307                                    }
4308                                };
4309                                if !f_try.is_finite() || g_try.iter().any(|v| !v.is_finite()) {
4310                                    continue;
4311                                }
4312                                let g_proj_try = self.projected_gradient(&x_try, &g_try);
4313                                let g_try_norm = g_proj_try.dot(&g_proj_try).sqrt();
4314                                let f_thresh = f_k.min(f_next) + epsF_iter;
4315                                let s_trial = &x_try - &x_k;
4316                                let descent_ok = g_proj_k.dot(&s_trial)
4317                                    <= -eps_g(&g_proj_k, &s_trial, self.tau_g);
4318                                let f_ok = f_try <= f_thresh;
4319                                let g_ok = g_try_norm <= self.grad_drop_factor * gnext_norm0;
4320                                if (f_ok || g_ok) && descent_ok && f_try <= best_f {
4321                                    best_f = f_try;
4322                                    best_x = Some(x_try.clone());
4323                                    best_g = g_try.clone();
4324                                }
4325                            }
4326                        }
4327                        if let Some(xb) = best_x {
4328                            // Enforce trust radius on the rescue step
4329                            let mut s_tmp = &xb - &x_k;
4330                            let s_norm = s_tmp.dot(&s_tmp).sqrt();
4331                            let delta = self.trust_radius;
4332                            if s_norm.is_finite()
4333                                && s_norm > delta
4334                                && delta.is_finite()
4335                                && delta > 0.0
4336                            {
4337                                let scale = delta / s_norm;
4338                                let x_scaled = &x_k + &(s_tmp.mapv(|v| v * scale));
4339                                let x_scaled = self.project_point(&x_scaled);
4340                                let (f_s, g_s) = match bfgs_eval_cost_grad(
4341                                    &mut oracle,
4342                                    obj_fn,
4343                                    &x_scaled,
4344                                    &mut func_evals,
4345                                    &mut grad_evals,
4346                                ) {
4347                                    Ok(sample) => sample,
4348                                    Err(ObjectiveEvalError::Recoverable { .. }) => {
4349                                        (f64::NAN, Array1::zeros(x_scaled.len()))
4350                                    }
4351                                    Err(ObjectiveEvalError::Fatal { message }) => {
4352                                        return Err(BfgsError::ObjectiveFailed { message });
4353                                    }
4354                                };
4355                                if f_s.is_finite() && g_s.iter().all(|v| v.is_finite()) {
4356                                    s_tmp = &x_scaled - &x_k;
4357                                    f_next = f_s;
4358                                    g_next = g_s;
4359                                } else {
4360                                    // fall back to original xb
4361                                    f_next = best_f;
4362                                    g_next = best_g.clone();
4363                                }
4364                            } else {
4365                                f_next = best_f;
4366                                g_next = best_g.clone();
4367                            }
4368                            s_override = Some(s_tmp);
4369                            rescued = true;
4370                            accept_kind = AcceptKind::Rescue;
4371                            self.flat_accept_streak = 0;
4372                        }
4373                    }
4374                }
4375            }
4376
4377            // The "Learner" part: promote Backtracking if Wolfe keeps failing.
4378            if self.wolfe_fail_streak >= Self::FALLBACK_THRESHOLD {
4379                log::warn!(
4380                    "[BFGS Adaptive] Fallback streak ({}) reached. Switching primary to Backtracking.",
4381                    self.wolfe_fail_streak
4382                );
4383                self.primary_strategy = LineSearchStrategy::Backtracking;
4384                self.wolfe_fail_streak = 0;
4385            }
4386            // Switch back to StrongWolfe after a run of clean backtracking successes
4387            if matches!(self.primary_strategy, LineSearchStrategy::Backtracking)
4388                && self.bt_clean_successes >= 3
4389                && self.wolfe_fail_streak == 0
4390            {
4391                log::info!(
4392                    "[BFGS Adaptive] Backtracking succeeded cleanly ({} iters); switching back to StrongWolfe.",
4393                    self.bt_clean_successes
4394                );
4395                self.primary_strategy = LineSearchStrategy::StrongWolfe;
4396                self.bt_clean_successes = 0;
4397                self.gll.set_cap(8);
4398            }
4399
4400            func_evals += f_evals;
4401            grad_evals += g_evals;
4402
4403            let mut s_k = if let Some(ref s) = s_override {
4404                s.clone()
4405            } else {
4406                alpha_k * &present_d_k
4407            };
4408            let x_next = self.project_point(&(x_k.clone() + &s_k));
4409            s_k = &x_next - &x_k;
4410            let g_proj_next = self.projected_gradient(&x_next, &g_next);
4411            let active_after = if let Some(bounds) = &self.bounds {
4412                bounds.active_mask(&x_next, &g_next)
4413            } else {
4414                vec![false; n]
4415            };
4416            let step_len = s_k.dot(&s_k).sqrt();
4417            if step_len.is_finite() && step_len > 0.0 {
4418                if step_len >= 0.9 * self.trust_radius {
4419                    self.trust_radius = (self.trust_radius * 1.5).min(1e6);
4420                } else {
4421                    self.trust_radius = (self.trust_radius * 1.1).min(1e6);
4422                }
4423            }
4424
4425            let rel_impr = (f_last_accepted - f_next).abs() / (1.0 + f_last_accepted.abs());
4426            if self.update_no_improve_streak(rel_impr)
4427                && self.stagnation_converged(&x_k, &x_next, &g_proj_next)
4428            {
4429                return Ok(Solution::gradient_based(
4430                    x_next.clone(),
4431                    f_next,
4432                    g_next.clone(),
4433                    g_proj_next.dot(&g_proj_next).sqrt(),
4434                    None,
4435                    k + 1,
4436                    func_evals,
4437                    grad_evals,
4438                    0,
4439                ));
4440            }
4441
4442            // Update adaptive curvature slack scale and gradient drop factor based on flats
4443            let f_ok_flat = (f_next - f_k).abs() <= eps_f(f_k, self.tau_f)
4444                || (f_next - f_k).abs() <= self.tol_f_rel * (1.0 + f_k.abs());
4445            if f_ok_flat {
4446                self.flat_accept_streak += 1;
4447            } else {
4448                self.flat_accept_streak = 0;
4449            }
4450            if self.flat_accept_streak >= 2 {
4451                self.curv_slack_scale = (self.curv_slack_scale * 0.5).max(0.1);
4452                self.grad_drop_factor = 0.95;
4453            } else {
4454                self.curv_slack_scale = 1.0;
4455                self.grad_drop_factor = 0.9;
4456            }
4457
4458            let mut y_k = &g_next - &g_k;
4459
4460            if self.bounds.is_some() {
4461                for i in 0..n {
4462                    let tiny_step = s_k[i].abs() <= 1e-14 * (1.0 + x_k[i].abs());
4463                    if (active_before[i] && active_after[i]) || tiny_step {
4464                        s_k[i] = 0.0;
4465                        y_k[i] = 0.0;
4466                    }
4467                }
4468            }
4469
4470            // --- Cautious Hessian Update ---
4471            let sy = s_k.dot(&y_k);
4472            let mut update_status = "applied";
4473
4474            if k == 0 {
4475                // Improved first-step scaling
4476                let yy = y_k.dot(&y_k);
4477                let mut scale = if sy > 1e-12 && yy > 0.0 { sy / yy } else { 1.0 };
4478                if !scale.is_finite() {
4479                    scale = 1.0;
4480                }
4481                scale = scale.clamp(1e-3, 1e3);
4482                b_inv = Array2::eye(n) * scale;
4483            }
4484
4485            // Powell-damped inverse BFGS update (keep SPD).
4486            let s_norm = s_k.dot(&s_k).sqrt();
4487            if s_norm > 1e-14 {
4488                if !rescued {
4489                    // Compute B s via CG on H (since H = B^{-1}) for Powell damping.
4490                    let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
4491                    let ridge = (1e-10 * mean_diag).max(1e-16);
4492                    if let Some(h_s) = cg_solve_adaptive(&b_inv, &s_k, 25, 1e-10, ridge) {
4493                        let s_h_s = s_k.dot(&h_s);
4494                        let denom_raw = s_h_s - sy;
4495                        let denom = if denom_raw <= 0.0 { 1e-16 } else { denom_raw };
4496                        // Powell damping: blend y and B s so that s^T y_tilde is sufficiently positive.
4497                        let theta_raw = if sy < 0.2 * s_h_s {
4498                            (0.8 * s_h_s) / denom
4499                        } else {
4500                            1.0
4501                        };
4502                        let theta = theta_raw.clamp(0.0, 1.0);
4503                        let mut y_tilde = &y_k * theta + &h_s * (1.0 - theta);
4504                        let mut sty = s_k.dot(&y_tilde);
4505                        let mut y_norm = y_tilde.dot(&y_tilde).sqrt();
4506                        let s_norm2 = s_norm * s_norm;
4507                        let kappa = 1e-4;
4508                        let min_curv = kappa * s_norm * y_norm;
4509                        if sty < min_curv {
4510                            let beta = (min_curv - sty) / s_norm2;
4511                            y_tilde = &y_tilde + &s_k * beta;
4512                            sty = s_k.dot(&y_tilde);
4513                            y_norm = y_tilde.dot(&y_tilde).sqrt();
4514                        }
4515                        let rel = if s_norm > 0.0 && y_norm > 0.0 {
4516                            sty / (s_norm * y_norm)
4517                        } else {
4518                            0.0
4519                        };
4520                        if !sty.is_finite() || rel < 1e-8 {
4521                            log::warn!(
4522                                "[BFGS] s^T y_tilde non-positive/tiny; skipping update and inflating diag."
4523                            );
4524                            update_status = "skipped";
4525                            self.chol_fail_iters = self.chol_fail_iters + 1;
4526                            for i in 0..n {
4527                                b_inv[[i, i]] *= 1.0 + 1e-3;
4528                            }
4529                        } else {
4530                            if !apply_inverse_bfgs_update_in_place(
4531                                &mut b_inv,
4532                                &s_k,
4533                                &y_tilde,
4534                                &mut b_inv_backup,
4535                            ) {
4536                                b_inv.assign(&b_inv_backup);
4537                                for i in 0..n {
4538                                    b_inv[[i, i]] += 1e-6;
4539                                }
4540                                update_status = "reverted";
4541                            }
4542                        }
4543                    } else {
4544                        self.chol_fail_iters = self.chol_fail_iters + 1;
4545                        self.spd_fail_seen = true;
4546                        log::warn!("[BFGS] B_inv not SPD after ridge; skipping update this iter.");
4547                        update_status = "skipped";
4548                    }
4549                } else {
4550                    log::info!("[BFGS] Coordinate rescue used; skipping inverse update this iter.");
4551                    update_status = "skipped";
4552                }
4553
4554                // Enforce symmetry and gentle regularization
4555                for i in 0..n {
4556                    for j in (i + 1)..n {
4557                        let a = b_inv[[i, j]];
4558                        let b = b_inv[[j, i]];
4559                        let v = 0.5 * (a + b);
4560                        b_inv[[i, j]] = v;
4561                        b_inv[[j, i]] = v;
4562                    }
4563                }
4564                let mut diag_min = f64::INFINITY;
4565                for i in 0..n {
4566                    diag_min = diag_min.min(b_inv[[i, i]]);
4567                }
4568                if !diag_min.is_finite() || diag_min <= 0.0 {
4569                    let mut trace = 0.0;
4570                    for i in 0..n {
4571                        trace += b_inv[[i, i]].abs();
4572                    }
4573                    let delta = 1e-12 * trace.max(1.0);
4574                    for i in 0..n {
4575                        b_inv[[i, i]] += delta;
4576                    }
4577                }
4578
4579                if self.spd_fail_seen && self.chol_fail_iters >= 2 {
4580                    let sy = s_k.dot(&y_k);
4581                    let yy = y_k.dot(&y_k);
4582                    let mut lambda = if yy > 0.0 { (sy / yy).abs() } else { 1.0 };
4583                    lambda = lambda.clamp(1e-6, 1e6);
4584                    b_inv = scaled_identity(n, lambda);
4585                    self.chol_fail_iters = 0;
4586                    update_status = "reverted";
4587                }
4588            } else {
4589                update_status = "skipped";
4590            }
4591
4592            log::info!(
4593                "[BFGS] step accepted via {:?}; inverse update {}",
4594                accept_kind,
4595                update_status
4596            );
4597
4598            // Stopping tests: small step and flat f
4599            let step_ok = self.feasible_step_small(&x_k, &x_next);
4600            let f_ok = (f_next - f_k).abs() <= eps_f(f_k, self.tau_f);
4601            let gnext_finite = f_next.is_finite() && g_next.iter().all(|v| v.is_finite());
4602            let gnext_norm = g_proj_next.dot(&g_proj_next).sqrt();
4603            if step_ok && f_ok && gnext_finite && gnext_norm < self.tolerance {
4604                let sol = Solution::gradient_based(
4605                    x_next.clone(),
4606                    f_next,
4607                    g_next.clone(),
4608                    gnext_norm,
4609                    None,
4610                    k + 1,
4611                    func_evals,
4612                    grad_evals,
4613                    0,
4614                );
4615                log::info!(
4616                    "[BFGS] Converged by small step/flat f: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
4617                    sol.iterations,
4618                    sol.final_value,
4619                    sol.final_gradient_norm
4620                        .expect("gradient-based solution must report gradient norm"),
4621                    sol.func_evals,
4622                    sol.grad_evals,
4623                    self.trust_radius
4624                );
4625                return Ok(sol);
4626            }
4627
4628            // Optional stall/flat exit (relative stationarity)
4629            if let StallPolicy::On { window } = self.stall_policy {
4630                let g_inf = g_proj_k.iter().fold(0.0, |acc, &v| f64::max(acc, v.abs()));
4631                let x_inf = x_k.iter().fold(0.0, |acc, &v| f64::max(acc, v.abs()));
4632                let rel_g_ok = g_inf <= self.tolerance * (1.0 + x_inf);
4633                let rel_f_ok = (f_k - f_last_accepted).abs() <= eps_f(f_last_accepted, self.tau_f);
4634                if rel_g_ok && rel_f_ok {
4635                    self.stall_noimprove_streak += 1;
4636                } else {
4637                    self.stall_noimprove_streak = 0;
4638                }
4639                if self.stall_noimprove_streak >= window {
4640                    let sol = Solution::gradient_based(
4641                        x_k.clone(),
4642                        f_k,
4643                        g_k.clone(),
4644                        g_inf,
4645                        None,
4646                        k + 1,
4647                        func_evals,
4648                        grad_evals,
4649                        0,
4650                    );
4651                    log::info!(
4652                        "[BFGS] Converged (flat/stalled): iters={}, f={:.6e}, ||g||={:.3e}",
4653                        sol.iterations,
4654                        sol.final_value,
4655                        sol.final_gradient_norm
4656                            .expect("gradient-based solution must report gradient norm")
4657                    );
4658                    return Ok(sol);
4659                }
4660            }
4661
4662            x_k = x_next;
4663            f_k = f_next;
4664            g_k = g_next;
4665            g_proj_k = g_proj_next;
4666            active_mask = active_after;
4667            // Update GLL window and global best
4668            self.gll.push(f_k);
4669            f_last_accepted = f_k;
4670            let maybe_f = self.global_best.as_ref().map(|b| b.f);
4671            match maybe_f {
4672                Some(bf) => {
4673                    if f_k < bf - eps_f(bf, self.tau_f) {
4674                        self.global_best = Some(ProbeBest {
4675                            f: f_k,
4676                            x: x_k.clone(),
4677                            g: g_k.clone(),
4678                        });
4679                    }
4680                }
4681                None => {
4682                    self.global_best = Some(ProbeBest::new(&x_k, f_k, &g_k));
4683                }
4684            }
4685
4686            // Nonmonotone stickiness countdown
4687            // We return to StrongWolfe only after a run of clean backtracking
4688            // successes (handled above via `bt_clean_successes`).
4689        }
4690
4691        // The loop finished. Construct a solution from the final state.
4692        let final_g_norm = g_proj_k.dot(&g_proj_k).sqrt();
4693        let last_solution = Box::new(Solution::gradient_based(
4694            x_k,
4695            f_k,
4696            g_k,
4697            final_g_norm,
4698            None,
4699            self.max_iterations,
4700            func_evals,
4701            grad_evals,
4702            0,
4703        ));
4704        log::warn!(
4705            "[BFGS] Max iterations reached: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
4706            self.max_iterations,
4707            last_solution.final_value,
4708            last_solution
4709                .final_gradient_norm
4710                .expect("gradient-based solution must report gradient norm"),
4711            last_solution.func_evals,
4712            last_solution.grad_evals,
4713            self.trust_radius
4714        );
4715        Err(BfgsError::MaxIterationsReached { last_solution })
4716    }
4717}
4718
4719impl<ObjFn> Bfgs<ObjFn>
4720where
4721    ObjFn: FirstOrderObjective,
4722{
4723    /// Creates a new BFGS solver.
4724    ///
4725    /// # Arguments
4726    /// * `x0` - The initial guess for the minimum.
4727    /// * `obj_fn` - First-order objective.
4728    pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4729        Self {
4730            core: BfgsCore::new(x0),
4731            obj_fn,
4732        }
4733    }
4734
4735    /// Sets the convergence tolerance (default: 1e-5).
4736    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4737        self.core.tolerance = tolerance.get();
4738        self
4739    }
4740
4741    /// Sets the maximum number of iterations (default: 100).
4742    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4743        self.core.max_iterations = max_iterations.get();
4744        self
4745    }
4746
4747    /// Provides simple box bounds for each coordinate (lower <= x <= upper).
4748    /// Points are projected by coordinate clamping, and the gradient is projected
4749    /// by zeroing active constraints during direction updates.
4750    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4751        self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4752        self.core.bounds = Some(bounds.spec);
4753        self
4754    }
4755
4756    pub fn with_profile(mut self, profile: Profile) -> Self {
4757        self.core.apply_profile(profile);
4758        self
4759    }
4760
4761    /// Executes the BFGS algorithm with the adaptive hybrid line search.
4762    /// Requires `&mut self` to support stateful `FnMut` objectives.
4763    pub fn run(&mut self) -> Result<Solution, BfgsError> {
4764        self.core.run(&mut self.obj_fn)
4765    }
4766
4767    #[cfg(test)]
4768    fn next_rand_sym(&mut self) -> f64 {
4769        self.core.next_rand_sym()
4770    }
4771}
4772
4773impl<ObjFn> NewtonTrustRegion<ObjFn>
4774where
4775    ObjFn: SecondOrderObjective,
4776{
4777    /// Creates a new Newton trust-region solver.
4778    ///
4779    /// # Arguments
4780    /// * `x0` - The initial guess for the minimum.
4781    /// * `obj_fn` - Second-order objective.
4782    pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4783        Self {
4784            core: NewtonTrustRegionCore::new(x0),
4785            obj_fn,
4786        }
4787    }
4788
4789    /// Sets the convergence tolerance on projected gradient norm (default: 1e-5).
4790    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4791        self.core.tolerance = tolerance.get();
4792        self
4793    }
4794
4795    /// Sets the maximum number of iterations (default: 100).
4796    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4797        self.core.max_iterations = max_iterations.get();
4798        self
4799    }
4800
4801    pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
4802        self.core.fd_hessian_step = fd_hessian_step;
4803        self
4804    }
4805
4806    /// Provides simple box bounds for each coordinate (lower <= x <= upper).
4807    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4808        self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4809        self.core.bounds = Some(bounds.spec);
4810        self
4811    }
4812
4813    pub fn with_profile(mut self, profile: Profile) -> Self {
4814        self.core.apply_profile(profile);
4815        self
4816    }
4817
4818    /// Executes the Newton trust-region optimization.
4819    pub fn run(&mut self) -> Result<Solution, NewtonTrustRegionError> {
4820        self.core.run(&mut self.obj_fn)
4821    }
4822}
4823
4824impl<ObjFn> Arc<ObjFn>
4825where
4826    ObjFn: SecondOrderObjective,
4827{
4828    /// Creates a new ARC solver.
4829    ///
4830    /// # Arguments
4831    /// * `x0` - The initial guess for the minimum.
4832    /// * `obj_fn` - Second-order objective.
4833    pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4834        Self {
4835            core: ArcCore::new(x0),
4836            obj_fn,
4837        }
4838    }
4839
4840    /// Sets the convergence tolerance on projected gradient norm (default: 1e-5).
4841    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4842        self.core.tolerance = tolerance.get();
4843        self
4844    }
4845
4846    /// Sets the maximum number of iterations (default: 100).
4847    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4848        self.core.max_iterations = max_iterations.get();
4849        self
4850    }
4851
4852    pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
4853        self.core.fd_hessian_step = fd_hessian_step;
4854        self
4855    }
4856
4857    /// Provides simple box bounds for each coordinate (lower <= x <= upper).
4858    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4859        self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4860        self.core.bounds = Some(bounds.spec);
4861        self
4862    }
4863
4864    pub fn with_profile(mut self, profile: Profile) -> Self {
4865        self.core.apply_profile(profile);
4866        self
4867    }
4868
4869    /// Executes ARC optimization.
4870    ///
4871    /// This implementation follows the practical ARC template in Euclidean spaces.
4872    /// Under standard assumptions (for example lower bounded objective and
4873    /// Lipschitz-continuous Hessian), ARC theory gives an `O(eps^-1.5)` first-order
4874    /// iteration bound; this API does not encode assumptions, but mirrors that
4875    /// algorithmic structure.
4876    pub fn run(&mut self) -> Result<Solution, ArcError> {
4877        self.core.run(&mut self.obj_fn)
4878    }
4879}
4880
4881#[derive(Debug, thiserror::Error)]
4882pub enum FixedPointError {
4883    #[error("Objective evaluation failed: {message}")]
4884    ObjectiveFailed { message: String },
4885    #[error("Fixed-point objective returned a step with length {got}; expected {expected}")]
4886    StepDimensionMismatch { expected: usize, got: usize },
4887    #[error("Fixed-point objective returned a non-finite step")]
4888    NonFiniteStep,
4889    #[error(
4890        "Maximum number of iterations reached without converging. The best solution found is returned."
4891    )]
4892    MaxIterationsReached { last_solution: Box<Solution> },
4893}
4894
4895struct FixedPointCore {
4896    x0: Array1<f64>,
4897    tolerance: f64,
4898    max_iterations: usize,
4899    bounds: Option<BoxSpec>,
4900}
4901
4902impl FixedPointCore {
4903    fn new(x0: Array1<f64>) -> Self {
4904        Self {
4905            x0,
4906            tolerance: 1e-5,
4907            max_iterations: 100,
4908            bounds: None,
4909        }
4910    }
4911
4912    fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
4913        if let Some(bounds) = &self.bounds {
4914            bounds.project(x)
4915        } else {
4916            x.clone()
4917        }
4918    }
4919
4920    fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, FixedPointError>
4921    where
4922        ObjFn: FixedPointObjective,
4923    {
4924        let mut x_k = self.project_point(&self.x0);
4925        let mut func_evals = 0usize;
4926        let mut last_value = f64::INFINITY;
4927        let mut last_step_norm = 0.0;
4928        for k in 0..self.max_iterations {
4929            let sample = match obj_fn.eval_step(&x_k) {
4930                Ok(sample) => sample,
4931                Err(ObjectiveEvalError::Recoverable { message })
4932                | Err(ObjectiveEvalError::Fatal { message }) => {
4933                    return Err(FixedPointError::ObjectiveFailed { message });
4934                }
4935            };
4936            func_evals += 1;
4937            let value = recover_on_nonfinite_cost(sample.value).map_err(|err| match err {
4938                ObjectiveEvalError::Recoverable { message }
4939                | ObjectiveEvalError::Fatal { message } => {
4940                    FixedPointError::ObjectiveFailed { message }
4941                }
4942            })?;
4943            if sample.step.len() != x_k.len() {
4944                return Err(FixedPointError::StepDimensionMismatch {
4945                    expected: x_k.len(),
4946                    got: sample.step.len(),
4947                });
4948            }
4949            if sample.step.iter().any(|value| !value.is_finite()) {
4950                return Err(FixedPointError::NonFiniteStep);
4951            }
4952            if matches!(sample.status, FixedPointStatus::Stop) {
4953                return Ok(Solution::fixed_point(x_k, value, 0.0, k, func_evals));
4954            }
4955            let x_next = self.project_point(&(&x_k + &sample.step));
4956            let applied_step = &x_next - &x_k;
4957            let step_norm = applied_step.dot(&applied_step).sqrt();
4958            if !step_norm.is_finite() {
4959                return Err(FixedPointError::NonFiniteStep);
4960            }
4961            last_value = value;
4962            last_step_norm = step_norm;
4963            x_k = x_next;
4964            if step_norm <= self.tolerance {
4965                return Ok(Solution::fixed_point(
4966                    x_k,
4967                    value,
4968                    step_norm,
4969                    k + 1,
4970                    func_evals,
4971                ));
4972            }
4973        }
4974        Err(FixedPointError::MaxIterationsReached {
4975            last_solution: Box::new(Solution::fixed_point(
4976                x_k,
4977                last_value,
4978                last_step_norm,
4979                self.max_iterations,
4980                func_evals,
4981            )),
4982        })
4983    }
4984}
4985
4986pub struct FixedPoint<ObjFn> {
4987    core: FixedPointCore,
4988    obj_fn: ObjFn,
4989}
4990
4991impl<ObjFn> FixedPoint<ObjFn>
4992where
4993    ObjFn: FixedPointObjective,
4994{
4995    pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4996        Self {
4997            core: FixedPointCore::new(x0),
4998            obj_fn,
4999        }
5000    }
5001
5002    pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
5003        self.core.tolerance = tolerance.get();
5004        self
5005    }
5006
5007    pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
5008        self.core.max_iterations = max_iterations.get();
5009        self
5010    }
5011
5012    pub fn with_bounds(mut self, bounds: Bounds) -> Self {
5013        self.core.bounds = Some(bounds.spec);
5014        self
5015    }
5016
5017    pub fn run(&mut self) -> Result<Solution, FixedPointError> {
5018        self.core.run(&mut self.obj_fn)
5019    }
5020}
5021
5022/// A line search algorithm that finds a step size satisfying the Strong Wolfe conditions.
5023///
5024/// Bracketing + zoom with safeguards and efficient state-passing to avoid re-computation.
5025#[allow(clippy::too_many_arguments)]
5026fn line_search<ObjFn>(
5027    core: &mut BfgsCore,
5028    obj_fn: &mut ObjFn,
5029    oracle: &mut FirstOrderCache,
5030    x_k: &Array1<f64>,
5031    d_k: &Array1<f64>,
5032    f_k: f64,
5033    g_k: &Array1<f64>,
5034    c1: f64,
5035    c2: f64,
5036) -> LsResult
5037where
5038    ObjFn: FirstOrderObjective,
5039{
5040    let mut alpha_i: f64 = 1.0; // Start with a unit step.
5041    let mut alpha_prev = 0.0;
5042
5043    let mut f_prev = f_k;
5044    let g_proj_k = core.projected_gradient(x_k, g_k);
5045    let g_k_dot_d = g_proj_k.dot(d_k); // Initial derivative along the search direction.
5046    if g_k_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5047        log::warn!(
5048            "[BFGS Wolfe] Non-descent direction detected (gᵀd = {:.2e} >= 0).",
5049            g_k_dot_d
5050        );
5051    }
5052    let mut g_prev_dot_d = g_k_dot_d;
5053
5054    let max_attempts = WOLFE_MAX_ATTEMPTS;
5055    let mut func_evals = 0;
5056    let mut grad_evals = 0;
5057    let epsF = eps_f(f_k, core.tau_f);
5058    let mut best = ProbeBest::new(x_k, f_k, g_k);
5059    for _ in 0..max_attempts {
5060        let (x_new, s, kinked) = core.project_with_step(x_k, d_k, alpha_i);
5061        let step_ok = !core.projected_step_small(x_k, &s);
5062        if !step_ok {
5063            return Err(LineSearchError::StepSizeTooSmall);
5064        }
5065        let mut f_i = match bfgs_eval_cost(oracle, obj_fn, &x_new, &mut func_evals) {
5066            Ok(f) => f,
5067            Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5068            Err(ObjectiveEvalError::Fatal { message }) => {
5069                return Err(LineSearchError::ObjectiveFailed(message));
5070            }
5071        };
5072
5073        // Handle any non-finite value early
5074        if !f_i.is_finite() {
5075            core.nonfinite_seen = true;
5076            if alpha_prev == 0.0 {
5077                alpha_i *= 0.5;
5078            } else {
5079                alpha_i = 0.5 * (alpha_prev + alpha_i);
5080            }
5081            if alpha_i <= 1e-18 {
5082                if let Some((a, f, g, kind)) = probe_alphas(
5083                    core,
5084                    obj_fn,
5085                    oracle,
5086                    x_k,
5087                    d_k,
5088                    f_k,
5089                    g_k,
5090                    0.0,
5091                    alpha_i.max(f64::EPSILON),
5092                    core.tau_g,
5093                    core.grad_drop_factor,
5094                    &mut func_evals,
5095                    &mut grad_evals,
5096                ) {
5097                    return Ok((a, f, g, func_evals, grad_evals, kind));
5098                }
5099                return Err(LineSearchError::StepSizeTooSmall);
5100            }
5101            // Back-off attempts when stuck in non-finite region
5102            if func_evals >= 3 {
5103                return Err(LineSearchError::MaxAttempts(max_attempts));
5104            }
5105            continue;
5106        }
5107
5108        // Classic Armijo + previous worsening for bracketing (Strong-Wolfe)
5109        let gkTs = g_proj_k.dot(&s);
5110        let armijo_strict = f_i > f_k + c1 * gkTs + epsF;
5111        let prev_worse = func_evals > 1 && f_i >= f_prev - epsF;
5112        if armijo_strict || prev_worse {
5113            let kink_lo = if alpha_prev > 0.0 {
5114                let (_, _, kink_prev) = core.project_with_step(x_k, d_k, alpha_prev);
5115                kink_prev
5116            } else {
5117                false
5118            };
5119            if kink_lo || kinked {
5120                let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5121                return fallback.map(|(a, f, g, fe, ge, kind)| {
5122                    (a, f, g, fe + func_evals, ge + grad_evals, kind)
5123                });
5124            }
5125            let r = zoom(
5126                core,
5127                obj_fn,
5128                oracle,
5129                x_k,
5130                d_k,
5131                f_k,
5132                g_k,
5133                &g_proj_k,
5134                g_k_dot_d,
5135                c1,
5136                c2,
5137                alpha_prev,
5138                alpha_i,
5139                f_prev,
5140                f_i,
5141                g_prev_dot_d,
5142                f64::NAN,
5143                func_evals,
5144                grad_evals,
5145            );
5146            if r.is_err() {
5147                if best.f.is_finite() {
5148                    core.global_best = Some(best.clone());
5149                }
5150            }
5151            return r;
5152        }
5153
5154        let (f_full, g_i) =
5155            match bfgs_eval_cost_grad(oracle, obj_fn, &x_new, &mut func_evals, &mut grad_evals) {
5156                Ok(sample) => sample,
5157                Err(ObjectiveEvalError::Recoverable { .. }) => {
5158                    core.nonfinite_seen = true;
5159                    if alpha_prev == 0.0 {
5160                        alpha_i *= 0.5;
5161                    } else {
5162                        alpha_i = 0.5 * (alpha_prev + alpha_i);
5163                    }
5164                    if alpha_i <= 1e-18 {
5165                        return Err(LineSearchError::StepSizeTooSmall);
5166                    }
5167                    continue;
5168                }
5169                Err(ObjectiveEvalError::Fatal { message }) => {
5170                    return Err(LineSearchError::ObjectiveFailed(message));
5171                }
5172            };
5173        f_i = f_full;
5174        if !f_i.is_finite() || g_i.iter().any(|v| !v.is_finite()) {
5175            core.nonfinite_seen = true;
5176            if alpha_prev == 0.0 {
5177                alpha_i *= 0.5;
5178            } else {
5179                alpha_i = 0.5 * (alpha_prev + alpha_i);
5180            }
5181            if alpha_i <= 1e-18 {
5182                return Err(LineSearchError::StepSizeTooSmall);
5183            }
5184            continue;
5185        }
5186        best.consider(&x_new, f_i, &g_i);
5187
5188        let armijo_strict = f_i > f_k + c1 * gkTs + epsF;
5189        let prev_worse = func_evals > 1 && f_i >= f_prev - epsF;
5190        if armijo_strict || prev_worse {
5191            let kink_lo = if alpha_prev > 0.0 {
5192                let (_, _, kink_prev) = core.project_with_step(x_k, d_k, alpha_prev);
5193                kink_prev
5194            } else {
5195                false
5196            };
5197            if kink_lo || kinked {
5198                let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5199                return fallback.map(|(a, f, g, fe, ge, kind)| {
5200                    (a, f, g, fe + func_evals, ge + grad_evals, kind)
5201                });
5202            }
5203            let g_proj_i = core.projected_gradient(&x_new, &g_i);
5204            let g_i_dot_d = directional_derivative(&g_proj_i, &s, alpha_i, d_k);
5205            let r = zoom(
5206                core,
5207                obj_fn,
5208                oracle,
5209                x_k,
5210                d_k,
5211                f_k,
5212                g_k,
5213                &g_proj_k,
5214                g_k_dot_d,
5215                c1,
5216                c2,
5217                alpha_prev,
5218                alpha_i,
5219                f_prev,
5220                f_i,
5221                g_prev_dot_d,
5222                g_i_dot_d,
5223                func_evals,
5224                grad_evals,
5225            );
5226            if r.is_err() && best.f.is_finite() {
5227                core.global_best = Some(best.clone());
5228            }
5229            return r;
5230        }
5231
5232        let g_proj_i = core.projected_gradient(&x_new, &g_i);
5233        let g_i_dot_d = directional_derivative(&g_proj_i, &s, alpha_i, d_k);
5234        let g_k_dot_eff = directional_derivative(&g_proj_k, &s, alpha_i, d_k);
5235        let gi_norm = g_proj_i.dot(&g_proj_i).sqrt();
5236        let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
5237        let drop_factor = core.grad_drop_factor;
5238        let fmax = if core.gll.is_empty() {
5239            f_k
5240        } else {
5241            core.gll.fmax()
5242        };
5243        let epsG = eps_g(&g_proj_k, d_k, core.tau_g);
5244        if let Some(kind) = classify_line_search_accept(
5245            core,
5246            step_ok,
5247            f_k,
5248            fmax,
5249            f_i,
5250            gkTs,
5251            g_i_dot_d,
5252            g_k_dot_eff,
5253            gi_norm,
5254            gk_norm,
5255            drop_factor,
5256            epsF,
5257            epsG,
5258            c2,
5259        ) {
5260            if matches!(kind, AcceptKind::StrongWolfe) {
5261                let delta_now = core.trust_radius;
5262                core.trust_radius = (delta_now * 1.25).min(1e6);
5263            }
5264            return Ok((alpha_i, f_i, g_i, func_evals, grad_evals, kind));
5265        }
5266
5267        if g_i_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5268            // The minimum is bracketed between alpha_i and alpha_prev.
5269            // The current point is the best (low) endpoint.
5270            let r = zoom(
5271                core,
5272                obj_fn,
5273                oracle,
5274                x_k,
5275                d_k,
5276                f_k,
5277                g_k,
5278                &g_proj_k,
5279                g_k_dot_d,
5280                c1,
5281                c2,
5282                alpha_i,
5283                alpha_prev,
5284                f_i,
5285                f_prev,
5286                g_i_dot_d,
5287                g_prev_dot_d,
5288                func_evals,
5289                grad_evals,
5290            );
5291            if r.is_err() {
5292                if best.f.is_finite() {
5293                    core.global_best = Some(best.clone());
5294                }
5295            }
5296            return r;
5297        }
5298
5299        // The step is too short, expand the search interval and cache current state.
5300        alpha_prev = alpha_i;
5301        f_prev = f_i;
5302        g_prev_dot_d = g_i_dot_d;
5303        // Expand alpha but respect alpha_max domain
5304        alpha_i *= 2.0;
5305    }
5306
5307    if best.f.is_finite() {
5308        core.global_best = Some(best);
5309    }
5310    // Probing grid before declaring failure
5311    if alpha_i > 0.0
5312        && let Some((a, f, g, kind)) = probe_alphas(
5313            core,
5314            obj_fn,
5315            oracle,
5316            x_k,
5317            d_k,
5318            f_k,
5319            g_k,
5320            0.0,
5321            alpha_i,
5322            core.tau_g,
5323            core.grad_drop_factor,
5324            &mut func_evals,
5325            &mut grad_evals,
5326        )
5327    {
5328        return Ok((a, f, g, func_evals, grad_evals, kind));
5329    }
5330    Err(LineSearchError::MaxAttempts(max_attempts))
5331}
5332
5333/// A backtracking line search that shrinks trial steps until a Wolfe-safe acceptance fires.
5334fn backtracking_line_search<ObjFn>(
5335    core: &mut BfgsCore,
5336    obj_fn: &mut ObjFn,
5337    oracle: &mut FirstOrderCache,
5338    x_k: &Array1<f64>,
5339    d_k: &Array1<f64>,
5340    f_k: f64,
5341    g_k: &Array1<f64>,
5342) -> LsResult
5343where
5344    ObjFn: FirstOrderObjective,
5345{
5346    let mut alpha: f64 = 1.0;
5347    let mut rho = 0.5;
5348    let max_attempts = BACKTRACKING_MAX_ATTEMPTS;
5349
5350    let g_proj_k = core.projected_gradient(x_k, g_k);
5351    let g_k_dot_d = g_proj_k.dot(d_k);
5352    // A backtracking search is only valid on a descent direction.
5353    if g_k_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5354        log::warn!(
5355            "[BFGS Backtracking] Search started with a non-descent direction (gᵀd = {:.2e} > 0). This step will likely fail.",
5356            g_k_dot_d
5357        );
5358    }
5359
5360    let mut func_evals = 0;
5361    let mut grad_evals = 0;
5362    let mut best = ProbeBest::new(x_k, f_k, g_k);
5363    let epsF = eps_f(f_k, core.tau_f);
5364    let mut no_change_count = 0usize;
5365    let mut expanded_once = false;
5366    let dnorm = d_k.dot(d_k).sqrt();
5367    for _ in 0..max_attempts {
5368        let (x_new, s, _) = core.project_with_step(x_k, d_k, alpha);
5369        let step_ok = !core.projected_step_small(x_k, &s);
5370        if !step_ok {
5371            return Err(LineSearchError::StepSizeTooSmall);
5372        }
5373        let mut f_new = match bfgs_eval_cost(oracle, obj_fn, &x_new, &mut func_evals) {
5374            Ok(f) => f,
5375            Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5376            Err(ObjectiveEvalError::Fatal { message }) => {
5377                return Err(LineSearchError::ObjectiveFailed(message));
5378            }
5379        };
5380
5381        // If evaluation is non-finite, shrink alpha and continue (salvage best-so-far)
5382        if !f_new.is_finite() {
5383            core.nonfinite_seen = true;
5384            alpha *= rho;
5385            if alpha < 1e-16 {
5386                return Err(LineSearchError::StepSizeTooSmall);
5387            }
5388            if func_evals >= 3 {
5389                return Err(LineSearchError::MaxAttempts(max_attempts));
5390            }
5391            continue;
5392        }
5393
5394        let gkTs = g_proj_k.dot(&s);
5395        let fmax = if core.gll.is_empty() {
5396            f_k
5397        } else {
5398            core.gll.fmax()
5399        };
5400        let armijo_accept = core.accept_armijo(f_k, gkTs, f_new);
5401        let gll_accept = core.accept_gll_nonmonotone(fmax, gkTs, f_new);
5402        let candidate_for_gradient = armijo_accept
5403            || gll_accept
5404            || (core.relaxed_acceptors_enabled() && f_new <= f_k + epsF);
5405        let mut g_new_opt = None;
5406        if candidate_for_gradient {
5407            let (f_full, g_new) =
5408                match bfgs_eval_cost_grad(oracle, obj_fn, &x_new, &mut func_evals, &mut grad_evals)
5409                {
5410                    Ok(sample) => sample,
5411                    Err(ObjectiveEvalError::Recoverable { .. }) => {
5412                        core.nonfinite_seen = true;
5413                        alpha *= rho;
5414                        if alpha < 1e-16 {
5415                            return Err(LineSearchError::StepSizeTooSmall);
5416                        }
5417                        continue;
5418                    }
5419                    Err(ObjectiveEvalError::Fatal { message }) => {
5420                        return Err(LineSearchError::ObjectiveFailed(message));
5421                    }
5422                };
5423            f_new = f_full;
5424            if !f_new.is_finite() || g_new.iter().any(|v| !v.is_finite()) {
5425                core.nonfinite_seen = true;
5426                alpha *= rho;
5427                if alpha < 1e-16 {
5428                    return Err(LineSearchError::StepSizeTooSmall);
5429                }
5430                continue;
5431            }
5432            best.consider(&x_new, f_new, &g_new);
5433            g_new_opt = Some(g_new);
5434        }
5435
5436        let Some(g_new) = g_new_opt else {
5437            if (f_new - f_k).abs() <= epsF {
5438                no_change_count += 1;
5439            } else {
5440                no_change_count = 0;
5441                expanded_once = false;
5442            }
5443            if no_change_count >= 3 {
5444                rho = 0.8;
5445            }
5446            if no_change_count >= 2 && !expanded_once {
5447                alpha /= rho;
5448                expanded_once = true;
5449            } else {
5450                alpha *= rho;
5451            }
5452            if core.jiggle_enabled() && no_change_count >= 2 {
5453                let jiggle = 1.0 + core.jiggle_scale() * core.next_rand_sym();
5454                alpha = (alpha * jiggle).max(f64::EPSILON);
5455            }
5456            let tol_x = core.step_tolerance(x_k);
5457            if (alpha * dnorm) <= tol_x {
5458                return Err(LineSearchError::StepSizeTooSmall);
5459            }
5460            continue;
5461        };
5462
5463        // Gradient reduction acceptance
5464        let g_proj_new = core.projected_gradient(&x_new, &g_new);
5465        let gk_dot_eff = directional_derivative(&g_proj_k, &s, alpha, d_k);
5466        let gnew_norm = g_proj_new.dot(&g_proj_new).sqrt();
5467        let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
5468        let drop_factor = core.grad_drop_factor;
5469        let g_new_dot_d = directional_derivative(&g_proj_new, &s, alpha, d_k);
5470        let epsG = eps_g(&g_proj_k, d_k, core.tau_g);
5471        if let Some(kind) = classify_line_search_accept(
5472            core,
5473            step_ok,
5474            f_k,
5475            fmax,
5476            f_new,
5477            gkTs,
5478            g_new_dot_d,
5479            gk_dot_eff,
5480            gnew_norm,
5481            gk_norm,
5482            drop_factor,
5483            epsF,
5484            epsG,
5485            core.c2_adapt,
5486        ) {
5487            return Ok((alpha, f_new, g_new, func_evals, grad_evals, kind));
5488        }
5489
5490        if (f_new - f_k).abs() <= epsF {
5491            no_change_count += 1;
5492        } else {
5493            no_change_count = 0;
5494            expanded_once = false;
5495        }
5496        if no_change_count >= 3 {
5497            rho = 0.8;
5498        }
5499        if no_change_count >= 2 && !expanded_once {
5500            // one-time expansion to hop flat plateau
5501            alpha /= rho; // slight expand
5502            expanded_once = true;
5503        } else {
5504            alpha *= rho;
5505        }
5506        // Stochastic jiggling to avoid hitting identical thresholds repeatedly
5507        if core.jiggle_enabled() && no_change_count >= 2 {
5508            let jiggle = 1.0 + core.jiggle_scale() * core.next_rand_sym();
5509            alpha = (alpha * jiggle).max(f64::EPSILON);
5510        }
5511        // Relative step-size stop: ||alpha d|| <= tol_x
5512        let tol_x = core.step_tolerance(x_k);
5513        if (alpha * dnorm) <= tol_x {
5514            return Err(LineSearchError::StepSizeTooSmall);
5515        }
5516    }
5517
5518    // Probing grid before declaring failure
5519    if alpha > 0.0
5520        && let Some((a, f, g, kind)) = probe_alphas(
5521            core,
5522            obj_fn,
5523            oracle,
5524            x_k,
5525            d_k,
5526            f_k,
5527            g_k,
5528            0.0,
5529            alpha,
5530            core.tau_g,
5531            core.grad_drop_factor,
5532            &mut func_evals,
5533            &mut grad_evals,
5534        )
5535    {
5536        return Ok((a, f, g, func_evals, grad_evals, kind));
5537    }
5538
5539    // Stash best seen during backtracking
5540    if best.f.is_finite() {
5541        core.global_best = Some(best);
5542    }
5543    Err(LineSearchError::MaxAttempts(max_attempts))
5544}
5545
5546/// Helper "zoom" function using cubic interpolation.
5547///
5548/// This function is called when a bracketing interval [alpha_lo, alpha_hi] that contains
5549/// a point satisfying the Strong Wolfe conditions is known. It iteratively refines this
5550/// interval until a suitable step size is found.
5551#[allow(clippy::too_many_arguments)]
5552fn zoom<ObjFn>(
5553    core: &mut BfgsCore,
5554    obj_fn: &mut ObjFn,
5555    oracle: &mut FirstOrderCache,
5556    x_k: &Array1<f64>,
5557    d_k: &Array1<f64>,
5558    f_k: f64,
5559    g_k: &Array1<f64>,
5560    g_proj_k: &Array1<f64>,
5561    _g_k_dot_d: f64,
5562    c1: f64,
5563    c2: f64,
5564    mut alpha_lo: f64,
5565    mut alpha_hi: f64,
5566    mut f_lo: f64,
5567    mut f_hi: f64,
5568    mut g_lo_dot_d: f64,
5569    mut g_hi_dot_d: f64,
5570    mut func_evals: usize,
5571    mut grad_evals: usize,
5572) -> LsResult
5573where
5574    ObjFn: FirstOrderObjective,
5575{
5576    let max_zoom_attempts = 15;
5577    let min_alpha_step = 1e-12; // Prevents division by zero or degenerate steps.
5578    let epsF = eps_f(f_k, core.tau_f);
5579    let mut best = ProbeBest::new(x_k, f_k, g_k);
5580    let mut lo_deriv_known = g_lo_dot_d.is_finite();
5581    let mut hi_deriv_known = g_hi_dot_d.is_finite();
5582    for _ in 0..max_zoom_attempts {
5583        let kink_lo = if alpha_lo > 0.0 {
5584            let (_, _, kink) = core.project_with_step(x_k, d_k, alpha_lo);
5585            kink
5586        } else {
5587            false
5588        };
5589        let kink_hi = if alpha_hi > 0.0 {
5590            let (_, _, kink) = core.project_with_step(x_k, d_k, alpha_hi);
5591            kink
5592        } else {
5593            false
5594        };
5595        if kink_lo || kink_hi {
5596            let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5597            return fallback
5598                .map(|(a, f, g, fe, ge, kind)| (a, f, g, fe + func_evals, ge + grad_evals, kind));
5599        }
5600        let tiny_bracket = (alpha_hi - alpha_lo).abs() <= 1e-12;
5601        let flat_f = (f_hi - f_lo).abs() <= epsF;
5602        let similar_slope = lo_deriv_known
5603            && hi_deriv_known
5604            && (g_hi_dot_d.abs() - g_lo_dot_d.abs()).abs()
5605                <= core.curv_slack_scale * eps_g(g_proj_k, d_k, core.tau_g);
5606        // Endpoint rescue on tiny brackets or flat ends with mismatched slopes.
5607        if tiny_bracket || (flat_f && !similar_slope) {
5608            let (mut alpha_j, choose_lo) = match (lo_deriv_known, hi_deriv_known) {
5609                (true, true) => {
5610                    if g_lo_dot_d.abs() <= g_hi_dot_d.abs() {
5611                        (alpha_lo, true)
5612                    } else {
5613                        (alpha_hi, false)
5614                    }
5615                }
5616                (true, false) => (alpha_lo, true),
5617                (false, true) => (alpha_hi, false),
5618                (false, false) => ((alpha_lo + alpha_hi) / 2.0, false),
5619            };
5620            // Avoid zero step; prefer the nonzero endpoint, otherwise midpoint
5621            if alpha_j <= f64::EPSILON {
5622                alpha_j = if choose_lo { alpha_hi } else { alpha_lo };
5623            }
5624            if alpha_j <= f64::EPSILON {
5625                alpha_j = 0.5 * (alpha_lo + alpha_hi);
5626            }
5627            let (x_j, s_j, kink_mid) = core.project_with_step(x_k, d_k, alpha_j);
5628            let step_ok = !core.projected_step_small(x_k, &s_j);
5629            if !step_ok {
5630                return Err(LineSearchError::StepSizeTooSmall);
5631            }
5632            if kink_mid {
5633                let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5634                return fallback.map(|(a, f, g, fe, ge, kind)| {
5635                    (a, f, g, fe + func_evals, ge + grad_evals, kind)
5636                });
5637            }
5638            let (f_j, g_j) =
5639                match bfgs_eval_cost_grad(oracle, obj_fn, &x_j, &mut func_evals, &mut grad_evals) {
5640                    Ok(sample) => sample,
5641                    Err(ObjectiveEvalError::Recoverable { .. }) => {
5642                        (f64::NAN, Array1::zeros(x_j.len()))
5643                    }
5644                    Err(ObjectiveEvalError::Fatal { message }) => {
5645                        return Err(LineSearchError::ObjectiveFailed(message));
5646                    }
5647                };
5648            if !f_j.is_finite() || g_j.iter().any(|&v| !v.is_finite()) {
5649                core.nonfinite_seen = true;
5650                if choose_lo {
5651                    alpha_lo = 0.5 * (alpha_lo + alpha_hi);
5652                    lo_deriv_known = false;
5653                } else {
5654                    alpha_hi = 0.5 * (alpha_lo + alpha_hi);
5655                    hi_deriv_known = false;
5656                }
5657                continue;
5658            }
5659            // Acceptance guard shared with the main search/probing paths.
5660            let g_proj_j = core.projected_gradient(&x_j, &g_j);
5661            let gkTs = g_proj_k.dot(&s_j);
5662            let gk_dot_d_eff = directional_derivative(g_proj_k, &s_j, alpha_j, d_k);
5663            let g_j_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5664            let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5665            let gj_norm = g_proj_j.iter().map(|v| v * v).sum::<f64>().sqrt();
5666            let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
5667            let drop_factor = core.grad_drop_factor;
5668            let fmax = if core.gll.is_empty() {
5669                f_k
5670            } else {
5671                core.gll.fmax()
5672            };
5673            if let Some(kind) = classify_line_search_accept(
5674                core,
5675                step_ok,
5676                f_k,
5677                fmax,
5678                f_j,
5679                gkTs,
5680                g_j_dot_d,
5681                gk_dot_d_eff,
5682                gj_norm,
5683                gk_norm,
5684                drop_factor,
5685                epsF,
5686                epsG,
5687                c2,
5688            ) {
5689                return Ok((alpha_j, f_j, g_j, func_evals, grad_evals, kind));
5690            } else {
5691                // tighten bracket and continue
5692                let mid = 0.5 * (alpha_lo + alpha_hi);
5693                if alpha_j > mid {
5694                    alpha_hi = alpha_j;
5695                    f_hi = f_j;
5696                    g_hi_dot_d = g_j_dot_d;
5697                    hi_deriv_known = true;
5698                } else {
5699                    alpha_lo = alpha_j;
5700                    f_lo = f_j;
5701                    g_lo_dot_d = g_j_dot_d;
5702                    lo_deriv_known = true;
5703                }
5704                continue;
5705            }
5706        }
5707        if flat_f && similar_slope {
5708            let alpha_mid = 0.5 * (alpha_lo + alpha_hi);
5709            let (x_mid, s_mid, kink_mid) = core.project_with_step(x_k, d_k, alpha_mid);
5710            let step_ok = !core.projected_step_small(x_k, &s_mid);
5711            if !step_ok {
5712                return Err(LineSearchError::StepSizeTooSmall);
5713            }
5714            if kink_mid {
5715                let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5716                return fallback.map(|(a, f, g, fe, ge, kind)| {
5717                    (a, f, g, fe + func_evals, ge + grad_evals, kind)
5718                });
5719            }
5720            let (f_mid, g_mid) =
5721                match bfgs_eval_cost_grad(oracle, obj_fn, &x_mid, &mut func_evals, &mut grad_evals)
5722                {
5723                    Ok(sample) => sample,
5724                    Err(ObjectiveEvalError::Recoverable { .. }) => {
5725                        core.nonfinite_seen = true;
5726                        let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5727                        if tighten_lo {
5728                            alpha_lo = alpha_mid;
5729                            lo_deriv_known = false;
5730                        } else {
5731                            alpha_hi = alpha_mid;
5732                            hi_deriv_known = false;
5733                        }
5734                        continue;
5735                    }
5736                    Err(ObjectiveEvalError::Fatal { message }) => {
5737                        return Err(LineSearchError::ObjectiveFailed(message));
5738                    }
5739                };
5740            if f_mid.is_finite() && g_mid.iter().all(|v| v.is_finite()) {
5741                // Midpoint rescue still has to satisfy the same decrease/curvature gates.
5742                let g_proj_mid = core.projected_gradient(&x_mid, &g_mid);
5743                let g_mid_dot_d = directional_derivative(&g_proj_mid, &s_mid, alpha_mid, d_k);
5744                let gkTs = g_proj_k.dot(&s_mid);
5745                let gk_dot_d_eff = directional_derivative(g_proj_k, &s_mid, alpha_mid, d_k);
5746                let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5747                let gmid_norm = g_proj_mid.iter().map(|v| v * v).sum::<f64>().sqrt();
5748                let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
5749                let drop_factor = core.grad_drop_factor;
5750                let fmax = if core.gll.is_empty() {
5751                    f_k
5752                } else {
5753                    core.gll.fmax()
5754                };
5755                if let Some(kind) = classify_line_search_accept(
5756                    core,
5757                    step_ok,
5758                    f_k,
5759                    fmax,
5760                    f_mid,
5761                    gkTs,
5762                    g_mid_dot_d,
5763                    gk_dot_d_eff,
5764                    gmid_norm,
5765                    gk_norm,
5766                    drop_factor,
5767                    epsF,
5768                    epsG,
5769                    c2,
5770                ) {
5771                    return Ok((alpha_mid, f_mid, g_mid, func_evals, grad_evals, kind));
5772                }
5773                let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5774                if tighten_lo {
5775                    alpha_lo = alpha_mid;
5776                    f_lo = f_mid;
5777                    g_lo_dot_d = g_mid_dot_d;
5778                    lo_deriv_known = true;
5779                } else {
5780                    alpha_hi = alpha_mid;
5781                    f_hi = f_mid;
5782                    g_hi_dot_d = g_mid_dot_d;
5783                    hi_deriv_known = true;
5784                }
5785                continue;
5786            } else {
5787                core.nonfinite_seen = true;
5788                let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5789                if tighten_lo {
5790                    alpha_lo = alpha_mid;
5791                    lo_deriv_known = false;
5792                } else {
5793                    alpha_hi = alpha_mid;
5794                    hi_deriv_known = false;
5795                }
5796                continue;
5797            }
5798        }
5799        // --- Use cubic interpolation to find a trial step size `alpha_j` ---
5800        // If the entire bracket is in an unusable (infinite) region, fail immediately.
5801        if !f_lo.is_finite() && !f_hi.is_finite() {
5802            log::warn!("[BFGS Zoom] Line search bracketed an infinite region. Aborting.");
5803            return Err(LineSearchError::MaxAttempts(max_zoom_attempts));
5804        }
5805        let alpha_j = {
5806            let (alpha_lo_i, alpha_hi_i, f_lo_i, f_hi_i, g_lo_i, g_hi_i) = if alpha_lo <= alpha_hi {
5807                (alpha_lo, alpha_hi, f_lo, f_hi, g_lo_dot_d, g_hi_dot_d)
5808            } else {
5809                (alpha_hi, alpha_lo, f_hi, f_lo, g_hi_dot_d, g_lo_dot_d)
5810            };
5811
5812            let alpha_diff = alpha_hi_i - alpha_lo_i;
5813
5814            // Fallback to bisection if the interval is too small, derivatives unknown,
5815            // or if function values at the interval ends are infinite, preventing unstable interpolation.
5816            if alpha_diff < min_alpha_step
5817                || !f_lo_i.is_finite()
5818                || !f_hi_i.is_finite()
5819                || !lo_deriv_known
5820                || !hi_deriv_known
5821            {
5822                (alpha_lo + alpha_hi) / 2.0
5823            } else {
5824                // Cubic interpolation using endpoint function values and directional derivatives.
5825                // d1 and d2 come from the cubic interpolant that matches f and directional
5826                // derivatives at the bracket endpoints.
5827                let d1 = g_lo_i + g_hi_i - 3.0 * (f_hi_i - f_lo_i) / alpha_diff;
5828                let d2_sq = d1 * d1 - g_lo_i * g_hi_i;
5829
5830                if d2_sq >= 0.0 && d2_sq.is_finite() {
5831                    let d2 = d2_sq.sqrt();
5832                    let trial =
5833                        alpha_hi_i - alpha_diff * (g_hi_i + d2 - d1) / (g_hi_i - g_lo_i + 2.0 * d2);
5834
5835                    // If interpolation gives a non-finite value or a point outside
5836                    // the bracket, fall back to bisection.
5837                    if !trial.is_finite() || trial < alpha_lo_i || trial > alpha_hi_i {
5838                        (alpha_lo + alpha_hi) / 2.0
5839                    } else {
5840                        trial
5841                    }
5842                } else {
5843                    (alpha_lo + alpha_hi) / 2.0
5844                }
5845            }
5846        };
5847
5848        // If the trial step is not making sufficient progress, bisect instead.
5849        let alpha_j = if (alpha_j - alpha_lo).abs() < min_alpha_step
5850            || (alpha_j - alpha_hi).abs() < min_alpha_step
5851        {
5852            (alpha_lo + alpha_hi) / 2.0
5853        } else {
5854            alpha_j
5855        };
5856
5857        let (x_j, s_j, kink_j) = core.project_with_step(x_k, d_k, alpha_j);
5858        let step_ok = !core.projected_step_small(x_k, &s_j);
5859        if !step_ok {
5860            return Err(LineSearchError::StepSizeTooSmall);
5861        }
5862        if kink_j {
5863            let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5864            return fallback
5865                .map(|(a, f, g, fe, ge, kind)| (a, f, g, fe + func_evals, ge + grad_evals, kind));
5866        }
5867        let mut f_j = match bfgs_eval_cost(oracle, obj_fn, &x_j, &mut func_evals) {
5868            Ok(f) => f,
5869            Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5870            Err(ObjectiveEvalError::Fatal { message }) => {
5871                return Err(LineSearchError::ObjectiveFailed(message));
5872            }
5873        };
5874
5875        // Handle non-finite by shrinking toward the finite end; keep derivative info intact
5876        if !f_j.is_finite() {
5877            core.nonfinite_seen = true;
5878            // Move the bound closer to alpha_j, prefer shrinking the side that alpha_j is nearer to
5879            let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5880            if to_hi {
5881                alpha_hi = alpha_j;
5882                f_hi = f_j;
5883                hi_deriv_known = false;
5884            } else {
5885                alpha_lo = alpha_j;
5886                f_lo = f_j;
5887                lo_deriv_known = false;
5888            }
5889            continue;
5890        }
5891
5892        // Check if the new point `alpha_j` satisfies the sufficient decrease condition.
5893        // An infinite `f_j` means the step was too large and failed the condition.
5894        let fmax = if core.gll.is_empty() {
5895            f_k
5896        } else {
5897            core.gll.fmax()
5898        };
5899        let gkTs = g_proj_k.dot(&s_j);
5900        let gk_dot_d_eff = directional_derivative(g_proj_k, &s_j, alpha_j, d_k);
5901        let armijo_ok = f_j <= f_k + c1 * gkTs + epsF;
5902        let armijo_gll_ok = f_j <= fmax + c1 * gkTs + epsF;
5903        if (!armijo_ok && !armijo_gll_ok) || f_j >= f_lo - epsF {
5904            alpha_hi = alpha_j;
5905            f_hi = f_j;
5906            hi_deriv_known = false;
5907        } else {
5908            let (f_full, g_j) =
5909                match bfgs_eval_cost_grad(oracle, obj_fn, &x_j, &mut func_evals, &mut grad_evals) {
5910                    Ok(sample) => sample,
5911                    Err(ObjectiveEvalError::Recoverable { .. }) => {
5912                        core.nonfinite_seen = true;
5913                        let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5914                        if to_hi {
5915                            alpha_hi = alpha_j;
5916                            f_hi = f64::NAN;
5917                            hi_deriv_known = false;
5918                        } else {
5919                            alpha_lo = alpha_j;
5920                            f_lo = f64::NAN;
5921                            lo_deriv_known = false;
5922                        }
5923                        continue;
5924                    }
5925                    Err(ObjectiveEvalError::Fatal { message }) => {
5926                        return Err(LineSearchError::ObjectiveFailed(message));
5927                    }
5928                };
5929            f_j = f_full;
5930            if !f_j.is_finite() || g_j.iter().any(|&v| !v.is_finite()) {
5931                core.nonfinite_seen = true;
5932                let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5933                if to_hi {
5934                    alpha_hi = alpha_j;
5935                    f_hi = f_j;
5936                    hi_deriv_known = false;
5937                } else {
5938                    alpha_lo = alpha_j;
5939                    f_lo = f_j;
5940                    lo_deriv_known = false;
5941                }
5942                continue;
5943            }
5944            best.consider(&x_j, f_j, &g_j);
5945            let armijo_ok = f_j <= f_k + c1 * gkTs + epsF;
5946            let armijo_gll_ok = f_j <= fmax + c1 * gkTs + epsF;
5947            if (!armijo_ok && !armijo_gll_ok) || f_j >= f_lo - epsF {
5948                alpha_hi = alpha_j;
5949                f_hi = f_j;
5950                let g_proj_j = core.projected_gradient(&x_j, &g_j);
5951                g_hi_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5952                hi_deriv_known = true;
5953                continue;
5954            }
5955
5956            let g_proj_j = core.projected_gradient(&x_j, &g_j);
5957            let g_j_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5958            let gj_norm = g_proj_j.dot(&g_proj_j).sqrt();
5959            let gk_norm = g_proj_k.dot(g_proj_k).sqrt();
5960            let drop_factor = core.grad_drop_factor;
5961            let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5962            if let Some(kind) = classify_line_search_accept(
5963                core,
5964                step_ok,
5965                f_k,
5966                fmax,
5967                f_j,
5968                gkTs,
5969                g_j_dot_d,
5970                gk_dot_d_eff,
5971                gj_norm,
5972                gk_norm,
5973                drop_factor,
5974                epsF,
5975                epsG,
5976                c2,
5977            ) {
5978                return Ok((alpha_j, f_j, g_j, func_evals, grad_evals, kind));
5979            }
5980
5981            // The minimum is bracketed by a point with a negative derivative
5982            // (alpha_lo) and a point with a positive derivative (alpha_j).
5983            if g_j_dot_d >= -eps_g(g_proj_k, d_k, core.tau_g) {
5984                // The new point has a positive derivative and a lower function value,
5985                // so it becomes the new best (low) point and the old low becomes high.
5986                alpha_hi = alpha_lo;
5987                f_hi = f_lo;
5988                g_hi_dot_d = g_lo_dot_d;
5989                hi_deriv_known = lo_deriv_known;
5990
5991                alpha_lo = alpha_j;
5992                f_lo = f_j;
5993                g_lo_dot_d = g_j_dot_d;
5994                lo_deriv_known = true;
5995            } else {
5996                // The new point has a negative derivative, so it becomes the new
5997                // lower bound of the bracket. The new interval is [alpha_j, alpha_hi].
5998                alpha_lo = alpha_j;
5999                f_lo = f_j;
6000                g_lo_dot_d = g_j_dot_d;
6001                lo_deriv_known = true;
6002            }
6003        }
6004    }
6005    // Probing grid before declaring failure
6006    if let Some((a, f, g, kind)) = probe_alphas(
6007        core,
6008        obj_fn,
6009        oracle,
6010        x_k,
6011        d_k,
6012        f_k,
6013        g_k,
6014        alpha_lo.min(alpha_hi),
6015        alpha_lo.max(alpha_hi),
6016        core.tau_g,
6017        core.grad_drop_factor,
6018        &mut func_evals,
6019        &mut grad_evals,
6020    ) {
6021        return Ok((a, f, g, func_evals, grad_evals, kind));
6022    }
6023    if best.f.is_finite() {
6024        core.global_best = Some(best);
6025    }
6026    Err(LineSearchError::MaxAttempts(max_zoom_attempts))
6027}
6028
6029#[allow(clippy::too_many_arguments)]
6030fn probe_alphas<ObjFn>(
6031    core: &mut BfgsCore,
6032    obj_fn: &mut ObjFn,
6033    oracle: &mut FirstOrderCache,
6034    x_k: &Array1<f64>,
6035    d_k: &Array1<f64>,
6036    f_k: f64,
6037    g_k: &Array1<f64>,
6038    a_lo: f64,
6039    a_hi: f64,
6040    tau_g: f64,
6041    drop_factor: f64,
6042    fe: &mut usize,
6043    ge: &mut usize,
6044) -> Option<(f64, f64, Array1<f64>, AcceptKind)>
6045where
6046    ObjFn: FirstOrderObjective,
6047{
6048    let cands = [0.2, 0.5, 0.8].map(|t| a_lo + t * (a_hi - a_lo));
6049    let g_proj_k = core.projected_gradient(x_k, g_k);
6050    let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
6051    let epsF = eps_f(f_k, core.tau_f);
6052    let epsG = eps_g(&g_proj_k, d_k, tau_g);
6053    let mut best: Option<(f64, f64, Array1<f64>, AcceptKind)> = None;
6054    for &a in &cands {
6055        if !a.is_finite() || a <= 0.0 {
6056            continue;
6057        }
6058        let (x, s, _) = core.project_with_step(x_k, d_k, a);
6059        let step_ok = !core.projected_step_small(x_k, &s);
6060        if !step_ok {
6061            continue;
6062        }
6063        let f = match bfgs_eval_cost(oracle, obj_fn, &x, fe) {
6064            Ok(f) => f,
6065            Err(_) => continue,
6066        };
6067        if !f.is_finite() {
6068            continue;
6069        }
6070        let gkTs = g_proj_k.dot(&s);
6071        let (f, g) = match bfgs_eval_cost_grad(oracle, obj_fn, &x, fe, ge) {
6072            Ok(sample) => sample,
6073            Err(_) => continue,
6074        };
6075        if !f.is_finite() || g.iter().any(|v| !v.is_finite()) {
6076            continue;
6077        }
6078        let g_proj = core.projected_gradient(&x, &g);
6079        let gi_norm = g_proj.dot(&g_proj).sqrt();
6080        let g_trial_dot_d = directional_derivative(&g_proj, &s, a, d_k);
6081        let gk_dot_d_eff = directional_derivative(&g_proj_k, &s, a, d_k);
6082        let fmax = if core.gll.is_empty() {
6083            f_k
6084        } else {
6085            core.gll.fmax()
6086        };
6087        if let Some(kind) = classify_line_search_accept(
6088            core,
6089            step_ok,
6090            f_k,
6091            fmax,
6092            f,
6093            gkTs,
6094            g_trial_dot_d,
6095            gk_dot_d_eff,
6096            gi_norm,
6097            gk_norm,
6098            drop_factor,
6099            epsF,
6100            epsG,
6101            core.c2_adapt,
6102        ) && best.as_ref().map(|(fb, _, _, _)| f < *fb).unwrap_or(true)
6103        {
6104            best = Some((f, a, g, kind));
6105        }
6106    }
6107    best.map(|(f, a, g, kind)| (a, f, g, kind))
6108}
6109
6110#[cfg(test)]
6111mod tests {
6112    // This test suite is structured into three parts:
6113    // 1. Standard Convergence Tests: Verifies that the solver finds the correct
6114    //    minimum for well-known benchmark functions from standard starting points.
6115    // 2. Failure and Edge Case Tests: Ensures the solver handles non-convex
6116    //    functions, pre-solved problems, and iteration limits correctly and returns
6117    //    the appropriate descriptive errors.
6118    // 3. Comparison Tests: Validates the behavior of our implementation against
6119    //    `argmin`, a trusted, state-of-the-art optimization library, ensuring
6120    //    that our results (final point and iteration count) are equivalent.
6121
6122    use super::{
6123        ArcError, AutoSecondOrderSolver, BACKTRACKING_MAX_ATTEMPTS, Bfgs, BfgsError, Bounds,
6124        FiniteDiffGradient, FirstOrderObjective, FirstOrderSample, FixedPoint, FixedPointObjective,
6125        FixedPointSample, FixedPointStatus, LineSearchFailureReason, MaxIterations,
6126        NewtonTrustRegion, ObjectiveEvalError, Problem, Profile, SecondOrderObjective,
6127        SecondOrderProblem, SecondOrderSample, Solution, Tolerance, ZerothOrderObjective, optimize,
6128    };
6129    use ndarray::{Array1, Array2, array};
6130    use spectral::prelude::*;
6131
6132    // --- Test Harness: Python scipy.optimize Comparison Setup ---
6133    use std::path::{Path, PathBuf};
6134    use std::process::Command;
6135    use std::sync::OnceLock;
6136    use std::sync::{Arc, Mutex};
6137
6138    #[derive(serde::Deserialize)]
6139    struct PythonOptResult {
6140        success: bool,
6141        final_point: Option<Vec<f64>>,
6142        final_value: Option<f64>,
6143        final_gradient_norm: Option<f64>,
6144        iterations: Option<usize>,
6145        func_evals: Option<usize>,
6146        grad_evals: Option<usize>,
6147        message: Option<String>,
6148        error: Option<String>,
6149    }
6150
6151    /// Call Python optimization harness and return the result
6152    fn optimize_with_python(
6153        x0: &Array1<f64>,
6154        function_name: &str,
6155        tolerance: f64,
6156        max_iterations: usize,
6157    ) -> Result<PythonOptResult, String> {
6158        let python = ensure_python_deps()?;
6159        let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
6160        let harness = crate_dir.join("optimization_harness.py");
6161        let input_json = serde_json::json!({
6162            "x0": x0.to_vec(),
6163            "function": function_name,
6164            "tolerance": tolerance,
6165            "max_iterations": max_iterations
6166        });
6167
6168        let output = Command::new(python)
6169            .arg(&harness)
6170            .arg(input_json.to_string())
6171            .current_dir(&crate_dir)
6172            .output()
6173            .map_err(|e| format!("Failed to execute Python script: {}", e))?;
6174
6175        if !output.status.success() {
6176            return Err(format!(
6177                "Python script failed: {}",
6178                String::from_utf8_lossy(&output.stderr)
6179            ));
6180        }
6181
6182        let result_str = String::from_utf8(output.stdout)
6183            .map_err(|e| format!("Invalid UTF-8 in Python output: {}", e))?;
6184
6185        serde_json::from_str(&result_str)
6186            .map_err(|e| format!("Failed to parse Python result: {}", e))
6187    }
6188
6189    fn ensure_python_deps() -> Result<String, String> {
6190        static PYTHON_PATH: OnceLock<Result<String, String>> = OnceLock::new();
6191        PYTHON_PATH
6192            .get_or_init(|| {
6193                let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
6194                let venv_python = crate_dir.join(".venv/bin/python");
6195                let venv_python_str = venv_python.display().to_string();
6196                let python = if Path::new(&venv_python).exists() {
6197                    venv_python_str.clone()
6198                } else {
6199                    "python3".to_string()
6200                };
6201
6202                let check = Command::new(&python)
6203                    .arg("-c")
6204                    .arg("import numpy, scipy")
6205                    .output()
6206                    .map_err(|e| format!("Failed to execute Python: {}", e))?;
6207
6208                if check.status.success() {
6209                    return Ok(python);
6210                }
6211
6212                if python != venv_python_str {
6213                    let venv = Command::new("python3")
6214                        .arg("-m")
6215                        .arg("venv")
6216                        .arg(crate_dir.join(".venv"))
6217                        .current_dir(&crate_dir)
6218                        .output()
6219                        .map_err(|e| format!("Failed to create venv: {}", e))?;
6220                    if !venv.status.success() {
6221                        return Err(format!(
6222                            "Failed to create venv: {}",
6223                            String::from_utf8_lossy(&venv.stderr)
6224                        ));
6225                    }
6226                }
6227
6228                let install = Command::new(&venv_python)
6229                    .arg("-m")
6230                    .arg("pip")
6231                    .arg("install")
6232                    .arg("numpy")
6233                    .arg("scipy")
6234                    .current_dir(&crate_dir)
6235                    .output()
6236                    .map_err(|e| format!("Failed to install numpy/scipy: {}", e))?;
6237                if !install.status.success() {
6238                    return Err(format!(
6239                        "Failed to install numpy/scipy: {}",
6240                        String::from_utf8_lossy(&install.stderr)
6241                    ));
6242                }
6243
6244                Ok(venv_python_str)
6245            })
6246            .clone()
6247    }
6248
6249    // --- Test Functions ---
6250
6251    /// A simple convex quadratic function: f(x) = x'x, with minimum at 0.
6252    fn quadratic(x: &Array1<f64>) -> (f64, Array1<f64>) {
6253        (x.dot(x), 2.0 * x)
6254    }
6255
6256    struct FirstOrderFn<F> {
6257        inner: F,
6258    }
6259
6260    impl<F> FirstOrderFn<F> {
6261        fn new(inner: F) -> Self {
6262            Self { inner }
6263        }
6264    }
6265
6266    impl<F> ZerothOrderObjective for FirstOrderFn<F>
6267    where
6268        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6269    {
6270        fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6271            Ok((self.inner)(x).0)
6272        }
6273    }
6274
6275    impl<F> FirstOrderObjective for FirstOrderFn<F>
6276    where
6277        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6278    {
6279        fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6280            let (f, g) = (self.inner)(x);
6281            Ok(FirstOrderSample {
6282                value: f,
6283                gradient: g,
6284            })
6285        }
6286    }
6287
6288    fn bfgs_oracle<F>(fg: F) -> FirstOrderFn<F>
6289    where
6290        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6291    {
6292        FirstOrderFn::new(fg)
6293    }
6294
6295    struct SecondOrderFn<F> {
6296        inner: F,
6297    }
6298
6299    impl<F> SecondOrderFn<F> {
6300        fn new(inner: F) -> Self {
6301            Self { inner }
6302        }
6303    }
6304
6305    impl<F> ZerothOrderObjective for SecondOrderFn<F>
6306    where
6307        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6308    {
6309        fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6310            Ok((self.inner)(x).0)
6311        }
6312    }
6313
6314    impl<F> FirstOrderObjective for SecondOrderFn<F>
6315    where
6316        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6317    {
6318        fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6319            let (f, g, _) = (self.inner)(x);
6320            Ok(FirstOrderSample {
6321                value: f,
6322                gradient: g,
6323            })
6324        }
6325    }
6326
6327    impl<F> SecondOrderObjective for SecondOrderFn<F>
6328    where
6329        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6330    {
6331        fn eval_hessian(
6332            &mut self,
6333            x: &Array1<f64>,
6334        ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6335            let (f, g, h) = (self.inner)(x);
6336            Ok(SecondOrderSample {
6337                value: f,
6338                gradient: g,
6339                hessian: Some(h),
6340            })
6341        }
6342    }
6343
6344    struct CountingSecondOrder<F> {
6345        inner: F,
6346        first_order_calls: Arc<Mutex<usize>>,
6347        second_order_calls: Arc<Mutex<usize>>,
6348    }
6349
6350    impl<F> CountingSecondOrder<F> {
6351        fn new(
6352            inner: F,
6353            first_order_calls: Arc<Mutex<usize>>,
6354            second_order_calls: Arc<Mutex<usize>>,
6355        ) -> Self {
6356            Self {
6357                inner,
6358                first_order_calls,
6359                second_order_calls,
6360            }
6361        }
6362    }
6363
6364    impl<F> ZerothOrderObjective for CountingSecondOrder<F>
6365    where
6366        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6367    {
6368        fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6369            Ok((self.inner)(x).0)
6370        }
6371    }
6372
6373    impl<F> FirstOrderObjective for CountingSecondOrder<F>
6374    where
6375        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6376    {
6377        fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6378            *self
6379                .first_order_calls
6380                .lock()
6381                .expect("lock first-order calls") += 1;
6382            let (f, g, _) = (self.inner)(x);
6383            Ok(FirstOrderSample {
6384                value: f,
6385                gradient: g,
6386            })
6387        }
6388    }
6389
6390    impl<F> SecondOrderObjective for CountingSecondOrder<F>
6391    where
6392        F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6393    {
6394        fn eval_hessian(
6395            &mut self,
6396            x: &Array1<f64>,
6397        ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6398            *self
6399                .second_order_calls
6400                .lock()
6401                .expect("lock second-order calls") += 1;
6402            let (f, g, h) = (self.inner)(x);
6403            Ok(SecondOrderSample {
6404                value: f,
6405                gradient: g,
6406                hessian: Some(h),
6407            })
6408        }
6409    }
6410
6411    fn gradient_norm(solution: &Solution) -> f64 {
6412        solution
6413            .final_gradient_norm
6414            .expect("gradient-based solution should carry a final gradient norm")
6415    }
6416
6417    fn step_norm(solution: &Solution) -> f64 {
6418        solution
6419            .final_step_norm
6420            .expect("fixed-point solution should carry a final step norm")
6421    }
6422
6423    fn tol(value: f64) -> Tolerance {
6424        Tolerance::new(value).unwrap()
6425    }
6426
6427    fn iters(value: usize) -> MaxIterations {
6428        MaxIterations::new(value).unwrap()
6429    }
6430
6431    fn bounds(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Bounds {
6432        Bounds::new(lower, upper, tol).unwrap()
6433    }
6434
6435    /// The Rosenbrock function, a classic non-convex benchmark with a minimum at [1, 1].
6436    fn rosenbrock(x: &Array1<f64>) -> (f64, Array1<f64>) {
6437        let a = 1.0;
6438        let b = 100.0;
6439        let f = (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2);
6440        let g = array![
6441            -2.0 * (a - x[0]) - 4.0 * b * (x[1] - x[0].powi(2)) * x[0],
6442            2.0 * b * (x[1] - x[0].powi(2))
6443        ];
6444        (f, g)
6445    }
6446
6447    fn rosenbrock_with_hessian(x: &Array1<f64>) -> (f64, Array1<f64>, Array2<f64>) {
6448        let a = 1.0;
6449        let b = 100.0;
6450        let f = (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2);
6451        let g = array![
6452            -2.0 * (a - x[0]) - 4.0 * b * (x[1] - x[0].powi(2)) * x[0],
6453            2.0 * b * (x[1] - x[0].powi(2))
6454        ];
6455        let h = array![
6456            [1200.0 * x[0] * x[0] - 400.0 * x[1] + 2.0, -400.0 * x[0]],
6457            [-400.0 * x[0], 200.0]
6458        ];
6459        (f, g, h)
6460    }
6461
6462    fn nonconvex_quartic_with_hessian(x: &Array1<f64>) -> (f64, Array1<f64>, Array2<f64>) {
6463        let f = x[0] * x[0] - x[1] * x[1] + 0.1 * x[1].powi(4);
6464        let g = array![2.0 * x[0], -2.0 * x[1] + 0.4 * x[1].powi(3)];
6465        let h = array![[2.0, 0.0], [0.0, -2.0 + 1.2 * x[1] * x[1]]];
6466        (f, g, h)
6467    }
6468
6469    /// A function with a maximum at 0, guaranteed to fail the Wolfe curvature condition.
6470    fn non_convex_max(x: &Array1<f64>) -> (f64, Array1<f64>) {
6471        (-x.dot(x), -2.0 * x)
6472    }
6473
6474    #[test]
6475    fn probe_best_ignores_nonfinite() {
6476        let x0 = array![0.0];
6477        let g0 = array![1.0];
6478        let mut best = super::ProbeBest::new(&x0, 0.0, &g0);
6479        let x1 = array![1.0];
6480        let g1 = array![f64::NAN];
6481        best.consider(&x1, -1.0, &g1);
6482        assert!(best.f.is_finite());
6483        assert_eq!(best.x[0], 0.0);
6484    }
6485
6486    #[test]
6487    fn second_order_cache_reuses_same_point_full_sample() {
6488        let x = array![1.0, -2.0];
6489        let call_count = Arc::new(Mutex::new(0usize));
6490        let call_count_c = call_count.clone();
6491        let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6492        let mut func_evals = 0usize;
6493        let mut grad_evals = 0usize;
6494        let mut hess_evals = 0usize;
6495        let mut obj = SecondOrderFn::new(move |x: &Array1<f64>| {
6496            *call_count_c.lock().expect("lock call count") += 1;
6497            let f = x.dot(x);
6498            let g = 2.0 * x;
6499            let h = Array2::<f64>::eye(x.len()) * 2.0;
6500            (f, g, h)
6501        });
6502
6503        let first = oracle
6504            .eval_cost_grad_hessian(
6505                &mut obj,
6506                &x,
6507                None,
6508                &mut func_evals,
6509                &mut grad_evals,
6510                &mut hess_evals,
6511            )
6512            .expect("initial full sample should succeed");
6513        let second = oracle
6514            .eval_cost_grad_hessian(
6515                &mut obj,
6516                &x,
6517                None,
6518                &mut func_evals,
6519                &mut grad_evals,
6520                &mut hess_evals,
6521            )
6522            .expect("same-point derivative request should hit cache");
6523
6524        assert_eq!(*call_count.lock().expect("lock call count"), 1);
6525        assert_eq!(func_evals, 1);
6526        assert_eq!(grad_evals, 1);
6527        assert_eq!(hess_evals, 1);
6528        assert_eq!(first.0, second.0);
6529    }
6530
6531    #[test]
6532    fn first_order_cache_merges_same_point_requests() {
6533        let x = array![0.5];
6534        let call_count = Arc::new(Mutex::new(0usize));
6535        let call_count_c = call_count.clone();
6536        let mut oracle = super::FirstOrderCache::new(x.len());
6537        let mut func_evals = 0usize;
6538        let mut grad_evals = 0usize;
6539        let mut obj = FirstOrderFn::new(move |x: &Array1<f64>| {
6540            *call_count_c.lock().expect("lock call count") += 1;
6541            let f = 0.5 * x[0] * x[0];
6542            let g = array![x[0]];
6543            (f, g)
6544        });
6545
6546        let cost_only = oracle
6547            .eval_cost(&mut obj, &x, &mut func_evals)
6548            .expect("cost-only request should succeed");
6549        let full = oracle
6550            .eval_cost_grad(&mut obj, &x, &mut func_evals, &mut grad_evals)
6551            .expect("cost+grad request should succeed");
6552        let cached_grad = oracle
6553            .eval_cost_grad(&mut obj, &x, &mut func_evals, &mut grad_evals)
6554            .expect("merged same-point request should hit cache");
6555
6556        assert_eq!(*call_count.lock().expect("lock call count"), 2);
6557        assert_eq!(func_evals, 2);
6558        assert_eq!(grad_evals, 1);
6559        assert_eq!(cost_only, full.0);
6560        assert_eq!(full.0, cached_grad.0);
6561        assert_eq!(full.1, cached_grad.1);
6562    }
6563
6564    #[test]
6565    fn second_order_cache_fd_fills_nonfinite_hessian() {
6566        struct NonfiniteHessianObjective;
6567
6568        impl ZerothOrderObjective for NonfiniteHessianObjective {
6569            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6570                Ok((x[0] - 1.0).powi(2))
6571            }
6572        }
6573
6574        impl FirstOrderObjective for NonfiniteHessianObjective {
6575            fn eval_grad(
6576                &mut self,
6577                x: &Array1<f64>,
6578            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6579                Ok(FirstOrderSample {
6580                    value: (x[0] - 1.0).powi(2),
6581                    gradient: array![2.0 * (x[0] - 1.0)],
6582                })
6583            }
6584        }
6585
6586        impl SecondOrderObjective for NonfiniteHessianObjective {
6587            fn eval_hessian(
6588                &mut self,
6589                x: &Array1<f64>,
6590            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6591                Ok(SecondOrderSample {
6592                    value: (x[0] - 1.0).powi(2),
6593                    gradient: array![2.0 * (x[0] - 1.0)],
6594                    hessian: Some(array![[f64::NAN]]),
6595                })
6596            }
6597        }
6598
6599        let x = array![2.0];
6600        let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6601        let mut func_evals = 0usize;
6602        let mut grad_evals = 0usize;
6603        let mut hess_evals = 0usize;
6604        let mut obj = NonfiniteHessianObjective;
6605        let (value, gradient, hessian) = oracle
6606            .eval_cost_grad_hessian(
6607                &mut obj,
6608                &x,
6609                None,
6610                &mut func_evals,
6611                &mut grad_evals,
6612                &mut hess_evals,
6613            )
6614            .expect("non-finite Hessian should trigger internal finite differences");
6615
6616        assert_eq!(value, 1.0);
6617        assert_eq!(gradient, array![2.0]);
6618        assert!((hessian[[0, 0]] - 2.0).abs() < 1e-6);
6619        assert_eq!(func_evals, 3);
6620        assert_eq!(grad_evals, 3);
6621        assert_eq!(hess_evals, 0);
6622    }
6623
6624    #[test]
6625    fn finite_diff_gradient_returns_recoverable_on_nonfinite_probe() {
6626        struct WallObjective;
6627
6628        impl ZerothOrderObjective for WallObjective {
6629            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6630                if x[0].abs() >= 0.5 {
6631                    Ok(f64::INFINITY)
6632                } else {
6633                    Ok(x[0] * x[0])
6634                }
6635            }
6636        }
6637
6638        let mut objective = FiniteDiffGradient::new(WallObjective).with_step(1.0);
6639        let err = objective
6640            .eval_grad(&array![0.0])
6641            .expect_err("non-finite finite-difference probes should be recoverable");
6642        assert!(matches!(err, ObjectiveEvalError::Recoverable { .. }));
6643    }
6644
6645    #[test]
6646    fn finite_diff_gradient_respects_bounds_with_one_sided_stencil() {
6647        struct LinearObjective;
6648
6649        impl ZerothOrderObjective for LinearObjective {
6650            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6651                if x[0] < 0.0 || x[0] > 1.0 {
6652                    return Err(ObjectiveEvalError::recoverable(
6653                        "sample left the feasible interval",
6654                    ));
6655                }
6656                Ok(x[0])
6657            }
6658        }
6659
6660        let mut objective = FiniteDiffGradient::new(LinearObjective)
6661            .with_step(1.0)
6662            .with_bounds(bounds(array![0.0], array![1.0], 1e-8));
6663        let sample = objective
6664            .eval_grad(&array![0.0])
6665            .expect("one-sided finite difference should stay feasible");
6666        assert!((sample.gradient[0] - 1.0).abs() < 1e-12);
6667    }
6668
6669    #[test]
6670    fn finite_diff_gradient_prefers_one_sided_stencil_near_bounds() {
6671        struct TrackingObjective {
6672            seen: Arc<Mutex<Vec<f64>>>,
6673        }
6674
6675        impl ZerothOrderObjective for TrackingObjective {
6676            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6677                self.seen.lock().expect("lock seen samples").push(x[0]);
6678                Ok(x[0] * x[0])
6679            }
6680        }
6681
6682        let seen = Arc::new(Mutex::new(Vec::new()));
6683        let mut objective = FiniteDiffGradient::new(TrackingObjective { seen: seen.clone() })
6684            .with_step(0.1)
6685            .with_bounds(bounds(array![0.0], array![1.0], 1e-8));
6686        let x0 = 0.05f64;
6687        let h = 0.1 * (1.0 + x0);
6688        let sample = objective
6689            .eval_grad(&array![x0])
6690            .expect("near-bound gradient should use a feasible one-sided stencil");
6691
6692        let expected = ((x0 + h) * (x0 + h) - x0 * x0) / h;
6693        assert!((sample.gradient[0] - expected).abs() < 1e-12);
6694        let seen = seen.lock().expect("lock seen samples");
6695        assert_eq!(seen.len(), 2);
6696        assert!(seen.iter().any(|&x| (x - x0).abs() < 1e-12));
6697        assert!(seen.iter().any(|&x| (x - (x0 + h)).abs() < 1e-12));
6698        assert!(!seen.iter().any(|&x| x <= 1e-12));
6699    }
6700
6701    #[test]
6702    fn bfgs_with_bounds_wires_finite_diff_gradient_bounds_automatically() {
6703        struct LinearObjective;
6704
6705        impl ZerothOrderObjective for LinearObjective {
6706            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6707                if x[0] < 0.0 || x[0] > 1.0 {
6708                    return Err(ObjectiveEvalError::recoverable(
6709                        "sample left the feasible interval",
6710                    ));
6711                }
6712                Ok(x[0])
6713            }
6714        }
6715
6716        let result = Bfgs::new(
6717            array![0.0],
6718            FiniteDiffGradient::new(LinearObjective).with_step(1.0),
6719        )
6720        .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
6721        .run();
6722
6723        let solution = result.expect("solver should wire bounds into finite differences");
6724        assert!(solution.final_point[0].abs() < 1e-12);
6725        assert!(gradient_norm(&solution) <= 1e-12);
6726    }
6727
6728    #[test]
6729    fn optimize_problem_with_bounds_wires_finite_diff_gradient_automatically() {
6730        struct LinearObjective;
6731
6732        impl ZerothOrderObjective for LinearObjective {
6733            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6734                if x[0] < 0.0 || x[0] > 1.0 {
6735                    return Err(ObjectiveEvalError::recoverable(
6736                        "sample left the feasible interval",
6737                    ));
6738                }
6739                Ok(x[0])
6740            }
6741        }
6742
6743        let mut solver = optimize(
6744            Problem::new(
6745                array![0.0],
6746                FiniteDiffGradient::new(LinearObjective).with_step(1.0),
6747            )
6748            .with_bounds(bounds(array![0.0], array![1.0], 1e-8)),
6749        );
6750
6751        let solution = solver
6752            .run()
6753            .expect("problem wrapper should wire bounds into finite differences");
6754        assert!(solution.final_point[0].abs() < 1e-12);
6755        assert!(gradient_norm(&solution) <= 1e-12);
6756    }
6757
6758    #[test]
6759    fn second_order_cache_fd_hessian_respects_bounds() {
6760        struct NoHessianObjective;
6761
6762        impl ZerothOrderObjective for NoHessianObjective {
6763            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6764                if x[0] < 0.0 || x[0] > 1.0 {
6765                    return Err(ObjectiveEvalError::recoverable(
6766                        "sample left the feasible interval",
6767                    ));
6768                }
6769                Ok((x[0] - 0.25).powi(2))
6770            }
6771        }
6772
6773        impl FirstOrderObjective for NoHessianObjective {
6774            fn eval_grad(
6775                &mut self,
6776                x: &Array1<f64>,
6777            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6778                if x[0] < 0.0 || x[0] > 1.0 {
6779                    return Err(ObjectiveEvalError::recoverable(
6780                        "sample left the feasible interval",
6781                    ));
6782                }
6783                Ok(FirstOrderSample {
6784                    value: (x[0] - 0.25).powi(2),
6785                    gradient: array![2.0 * (x[0] - 0.25)],
6786                })
6787            }
6788        }
6789
6790        impl SecondOrderObjective for NoHessianObjective {
6791            fn eval_hessian(
6792                &mut self,
6793                x: &Array1<f64>,
6794            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6795                Ok(SecondOrderSample {
6796                    value: (x[0] - 0.25).powi(2),
6797                    gradient: array![2.0 * (x[0] - 0.25)],
6798                    hessian: None,
6799                })
6800            }
6801        }
6802
6803        let x = array![0.0];
6804        let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6805        let mut func_evals = 0usize;
6806        let mut grad_evals = 0usize;
6807        let mut hess_evals = 0usize;
6808        let mut obj = NoHessianObjective;
6809        let bounds = bounds(array![0.0], array![1.0], 1e-8);
6810
6811        let (value, gradient, hessian) = oracle
6812            .eval_cost_grad_hessian(
6813                &mut obj,
6814                &x,
6815                Some(&bounds.spec),
6816                &mut func_evals,
6817                &mut grad_evals,
6818                &mut hess_evals,
6819            )
6820            .expect("finite-difference Hessian should stay feasible near bounds");
6821
6822        assert!((value - 0.0625).abs() < 1e-12);
6823        assert!((gradient[0] + 0.5).abs() < 1e-12);
6824        assert!((hessian[[0, 0]] - 2.0).abs() < 1e-6);
6825        assert_eq!(hess_evals, 0);
6826    }
6827
6828    #[test]
6829    fn second_order_cache_fd_hessian_prefers_one_sided_stencil_near_bounds() {
6830        struct NearWallObjective;
6831
6832        impl ZerothOrderObjective for NearWallObjective {
6833            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6834                if x[0] < 0.01 || x[0] > 1.0 {
6835                    return Err(ObjectiveEvalError::recoverable(
6836                        "sample left the finite-difference band",
6837                    ));
6838                }
6839                Ok(x[0] * x[0])
6840            }
6841        }
6842
6843        impl FirstOrderObjective for NearWallObjective {
6844            fn eval_grad(
6845                &mut self,
6846                x: &Array1<f64>,
6847            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6848                if x[0] < 0.01 || x[0] > 1.0 {
6849                    return Err(ObjectiveEvalError::recoverable(
6850                        "sample left the finite-difference band",
6851                    ));
6852                }
6853                Ok(FirstOrderSample {
6854                    value: x[0] * x[0],
6855                    gradient: array![2.0 * x[0]],
6856                })
6857            }
6858        }
6859
6860        impl SecondOrderObjective for NearWallObjective {
6861            fn eval_hessian(
6862                &mut self,
6863                x: &Array1<f64>,
6864            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6865                Ok(SecondOrderSample {
6866                    value: x[0] * x[0],
6867                    gradient: array![2.0 * x[0]],
6868                    hessian: None,
6869                })
6870            }
6871        }
6872
6873        let x = array![0.05];
6874        let mut oracle = super::SecondOrderCache::new(x.len(), 0.1);
6875        let mut func_evals = 0usize;
6876        let mut grad_evals = 0usize;
6877        let mut hess_evals = 0usize;
6878        let mut obj = NearWallObjective;
6879        let bounds = bounds(array![0.0], array![1.0], 1e-8);
6880
6881        let (_, _, hessian) = oracle
6882            .eval_cost_grad_hessian(
6883                &mut obj,
6884                &x,
6885                Some(&bounds.spec),
6886                &mut func_evals,
6887                &mut grad_evals,
6888                &mut hess_evals,
6889            )
6890            .expect("near-bound Hessian should use a feasible one-sided stencil");
6891
6892        assert!((hessian[[0, 0]] - 2.0).abs() < 1e-12);
6893    }
6894
6895    #[test]
6896    fn newton_trust_region_wires_fd_hessian_bounds_automatically() {
6897        struct NoHessianObjective;
6898
6899        impl ZerothOrderObjective for NoHessianObjective {
6900            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6901                if x[0] < 0.0 || x[0] > 1.0 {
6902                    return Err(ObjectiveEvalError::recoverable(
6903                        "sample left the feasible interval",
6904                    ));
6905                }
6906                Ok(x[0])
6907            }
6908        }
6909
6910        impl FirstOrderObjective for NoHessianObjective {
6911            fn eval_grad(
6912                &mut self,
6913                x: &Array1<f64>,
6914            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6915                if x[0] < 0.0 || x[0] > 1.0 {
6916                    return Err(ObjectiveEvalError::recoverable(
6917                        "sample left the feasible interval",
6918                    ));
6919                }
6920                Ok(FirstOrderSample {
6921                    value: x[0],
6922                    gradient: array![1.0],
6923                })
6924            }
6925        }
6926
6927        impl SecondOrderObjective for NoHessianObjective {
6928            fn eval_hessian(
6929                &mut self,
6930                x: &Array1<f64>,
6931            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6932                Ok(SecondOrderSample {
6933                    value: x[0],
6934                    gradient: array![1.0],
6935                    hessian: None,
6936                })
6937            }
6938        }
6939
6940        let result = NewtonTrustRegion::new(array![0.0], NoHessianObjective)
6941            .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
6942            .run();
6943
6944        let solution = result.expect("solver should wire bounds into Hessian finite differences");
6945        assert!(solution.final_point[0].abs() < 1e-12);
6946        assert!(gradient_norm(&solution) <= 1e-12);
6947    }
6948
6949    #[test]
6950    fn optimize_second_order_problem_with_bounds_wires_fd_hessian_automatically() {
6951        struct NoHessianObjective;
6952
6953        impl ZerothOrderObjective for NoHessianObjective {
6954            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6955                if x[0] < 0.0 || x[0] > 1.0 {
6956                    return Err(ObjectiveEvalError::recoverable(
6957                        "sample left the feasible interval",
6958                    ));
6959                }
6960                Ok(x[0])
6961            }
6962        }
6963
6964        impl FirstOrderObjective for NoHessianObjective {
6965            fn eval_grad(
6966                &mut self,
6967                x: &Array1<f64>,
6968            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6969                if x[0] < 0.0 || x[0] > 1.0 {
6970                    return Err(ObjectiveEvalError::recoverable(
6971                        "sample left the feasible interval",
6972                    ));
6973                }
6974                Ok(FirstOrderSample {
6975                    value: x[0],
6976                    gradient: array![1.0],
6977                })
6978            }
6979        }
6980
6981        impl SecondOrderObjective for NoHessianObjective {
6982            fn eval_hessian(
6983                &mut self,
6984                x: &Array1<f64>,
6985            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6986                Ok(SecondOrderSample {
6987                    value: x[0],
6988                    gradient: array![1.0],
6989                    hessian: None,
6990                })
6991            }
6992        }
6993
6994        let mut solver = optimize(
6995            SecondOrderProblem::new(array![0.0], NoHessianObjective).with_bounds(bounds(
6996                array![0.0],
6997                array![1.0],
6998                1e-8,
6999            )),
7000        );
7001
7002        let solution = solver.run().expect(
7003            "second-order problem wrapper should wire bounds into Hessian finite differences",
7004        );
7005        assert!(solution.final_point[0].abs() < 1e-12);
7006        assert!(gradient_norm(&solution) <= 1e-12);
7007    }
7008
7009    #[test]
7010    fn arc_wires_fd_hessian_bounds_automatically() {
7011        struct NoHessianObjective;
7012
7013        impl ZerothOrderObjective for NoHessianObjective {
7014            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7015                if x[0] < 0.0 || x[0] > 1.0 {
7016                    return Err(ObjectiveEvalError::recoverable(
7017                        "sample left the feasible interval",
7018                    ));
7019                }
7020                Ok(x[0])
7021            }
7022        }
7023
7024        impl FirstOrderObjective for NoHessianObjective {
7025            fn eval_grad(
7026                &mut self,
7027                x: &Array1<f64>,
7028            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7029                if x[0] < 0.0 || x[0] > 1.0 {
7030                    return Err(ObjectiveEvalError::recoverable(
7031                        "sample left the feasible interval",
7032                    ));
7033                }
7034                Ok(FirstOrderSample {
7035                    value: x[0],
7036                    gradient: array![1.0],
7037                })
7038            }
7039        }
7040
7041        impl SecondOrderObjective for NoHessianObjective {
7042            fn eval_hessian(
7043                &mut self,
7044                x: &Array1<f64>,
7045            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7046                Ok(SecondOrderSample {
7047                    value: x[0],
7048                    gradient: array![1.0],
7049                    hessian: None,
7050                })
7051            }
7052        }
7053
7054        let result = super::Arc::new(array![0.0], NoHessianObjective)
7055            .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
7056            .run();
7057
7058        let solution = result.expect("solver should wire bounds into Hessian finite differences");
7059        assert!(solution.final_point[0].abs() < 1e-12);
7060        assert!(gradient_norm(&solution) <= 1e-12);
7061    }
7062
7063    #[test]
7064    fn fixed_point_converges_on_linear_contraction() {
7065        struct LinearContraction;
7066
7067        impl FixedPointObjective for LinearContraction {
7068            fn eval_step(
7069                &mut self,
7070                x: &Array1<f64>,
7071            ) -> Result<FixedPointSample, ObjectiveEvalError> {
7072                Ok(FixedPointSample {
7073                    value: x.dot(x),
7074                    step: -0.5 * x,
7075                    status: FixedPointStatus::Continue,
7076                })
7077            }
7078        }
7079
7080        let mut solver = FixedPoint::new(array![2.0, -1.0], LinearContraction)
7081            .with_tolerance(tol(1e-8))
7082            .with_max_iterations(iters(128));
7083        let solution = solver
7084            .run()
7085            .expect("fixed-point solver should converge on a contraction");
7086
7087        assert!(solution.final_point.dot(&solution.final_point).sqrt() < 1e-6);
7088        assert!(step_norm(&solution) < 1e-8);
7089    }
7090
7091    #[test]
7092    fn fixed_point_stop_returns_current_point() {
7093        struct ImmediateStop;
7094
7095        impl FixedPointObjective for ImmediateStop {
7096            fn eval_step(
7097                &mut self,
7098                _x: &Array1<f64>,
7099            ) -> Result<FixedPointSample, ObjectiveEvalError> {
7100                Ok(FixedPointSample {
7101                    value: 7.0,
7102                    step: array![1.0],
7103                    status: FixedPointStatus::Stop,
7104                })
7105            }
7106        }
7107
7108        let solution = FixedPoint::new(array![3.0], ImmediateStop)
7109            .run()
7110            .expect("stop status should finalize immediately");
7111
7112        assert_eq!(solution.final_point, array![3.0]);
7113        assert_eq!(solution.final_value, 7.0);
7114        assert_eq!(step_norm(&solution), 0.0);
7115    }
7116
7117    #[test]
7118    fn dense_solve_shifted_solves_small_system() {
7119        let a = array![[4.0, 1.0], [1.0, 3.0]];
7120        let b = array![1.0, 2.0];
7121        let x = super::dense_solve_shifted(&a, &b, 0.0).expect("dense solve should succeed");
7122        let ax = a.dot(&x);
7123        assert!((&ax - &b).iter().all(|v| v.abs() < 1e-10));
7124    }
7125
7126    #[test]
7127    fn cg_solve_adaptive_uses_direct_path_for_small_dense_systems() {
7128        let n = 8usize;
7129        let mut a = Array2::<f64>::eye(n) * 3.0;
7130        for i in 0..n {
7131            for j in 0..n {
7132                if i != j {
7133                    a[[i, j]] = 0.05 * ((i + j + 1) as f64);
7134                }
7135            }
7136        }
7137        let b = Array1::from_iter((0..n).map(|i| (i + 1) as f64));
7138        let x = super::cg_solve_adaptive(&a, &b, 5, 1e-12, 1e-10)
7139            .expect("small dense system should use the direct solve path");
7140        let mut ax = a.dot(&x);
7141        for i in 0..n {
7142            ax[i] += 1e-10 * x[i];
7143        }
7144        let residual = (&ax - &b).dot(&(&ax - &b)).sqrt();
7145        assert!(residual < 1e-8, "expected small residual, got {residual:e}");
7146    }
7147
7148    #[test]
7149    fn cg_solve_from_refines_existing_iterate() {
7150        let n = 256usize;
7151        let mut a = Array2::<f64>::eye(n) * 4.0;
7152        for i in 0..(n - 1) {
7153            a[[i, i + 1]] = 0.5;
7154            a[[i + 1, i]] = 0.5;
7155        }
7156        let b = Array1::from_elem(n, 1.0);
7157        let first = super::cg_solve_from(&a, &b, Array1::zeros(n), 3, 1e-12, 0.0)
7158            .expect("initial CG stage should succeed");
7159        let second = super::cg_solve_from(&a, &b, first.x.clone(), 3, 1e-12, 0.0)
7160            .expect("refinement CG stage should succeed");
7161        assert!(
7162            second.rel_resid < first.rel_resid,
7163            "continued CG should improve residual"
7164        );
7165    }
7166
7167    #[test]
7168    fn steihaug_toint_uses_exact_small_dense_newton_step_when_feasible() {
7169        let core = super::NewtonTrustRegionCore::new(array![0.0, 0.0]);
7170        let h = array![[4.0, 1.0], [1.0, 3.0]];
7171        let g = array![1.0, 2.0];
7172        let rhs = -g.clone();
7173        let expected =
7174            super::dense_solve_shifted(&h, &rhs, 0.0).expect("direct dense solve should work");
7175        let (step, pred) = core
7176            .steihaug_toint_step(&h, &g, 10.0, None)
7177            .expect("small dense exact step should be accepted");
7178        assert!((&step - &expected).iter().all(|v| v.abs() < 1e-10));
7179        assert!(pred > 0.0);
7180    }
7181
7182    #[test]
7183    fn dense_trust_region_step_handles_small_dense_indefinite_boundary_case() {
7184        let h = array![[-1.0, 0.0], [0.0, 2.0]];
7185        let g = array![1.0, 0.5];
7186        let (step, pred) =
7187            super::dense_trust_region_step(&h, &g, 0.5, None).expect("direct trust-region step");
7188        let norm = step.dot(&step).sqrt();
7189        assert!(norm <= 0.5 + 1e-8, "step norm should respect trust radius");
7190        assert!(pred > 0.0, "predicted decrease should be positive");
7191    }
7192
7193    #[test]
7194    fn arc_small_dense_masked_subproblem_uses_direct_masked_solve() {
7195        let core = super::ArcCore::new(array![0.0, 0.0]);
7196        let h = array![[4.0, 1.0], [1.0, 3.0]];
7197        let g = array![2.0, -3.0];
7198        let active = [true, false];
7199        let step = core
7200            .solve_arc_subproblem(&h, &g, 1.0, Some(&active))
7201            .expect("masked direct ARC subproblem solve should succeed");
7202        assert!(
7203            step[0].abs() < 1e-12,
7204            "active coordinate should remain fixed"
7205        );
7206        assert!(step[1].is_finite(), "free coordinate step should be finite");
7207        let (m_delta, _, grad_m) = core.arc_model_value(&g, &h, 1.0, &step, Some(&active));
7208        assert!(m_delta <= 1e-8, "ARC model should not increase materially");
7209        assert!(grad_m.iter().all(|v| v.is_finite()));
7210    }
7211
7212    #[test]
7213    fn bfgs_local_mode_forces_strict_search_policy() {
7214        let mut core = super::BfgsCore::new(array![0.0, 0.0]);
7215        core.initial_grad_norm = 10.0;
7216        core.primary_strategy = super::LineSearchStrategy::Backtracking;
7217        core.c1_adapt = 1e-3;
7218        core.c2_adapt = 0.1;
7219        core.flat_accept_streak = 3;
7220        core.curv_slack_scale = 0.25;
7221        core.grad_drop_factor = 0.95;
7222        core.gll.set_cap(8);
7223
7224        core.refresh_local_mode(1e-3);
7225
7226        assert!(core.local_mode);
7227        assert!(matches!(
7228            core.primary_strategy,
7229            super::LineSearchStrategy::StrongWolfe
7230        ));
7231        assert!((core.c1_adapt - core.c1).abs() < 1e-16);
7232        assert!((core.c2_adapt - core.c2).abs() < 1e-16);
7233        assert_eq!(core.flat_accept_streak, 0);
7234        assert!((core.curv_slack_scale - 1.0).abs() < 1e-16);
7235        assert!((core.grad_drop_factor - 0.9).abs() < 1e-16);
7236        assert_eq!(core.gll.cap, 1);
7237    }
7238
7239    #[test]
7240    fn probe_alphas_respects_armijo() {
7241        let x_k = array![1.0];
7242        let f_k = 1.0;
7243        let g_k = array![2.0];
7244        let d_k = array![2.0]; // ascent direction
7245        let mut core = super::BfgsCore::new(x_k.clone());
7246        let mut oracle = super::FirstOrderCache::new(x_k.len());
7247        let tau_g = core.tau_g;
7248        let drop_factor = core.grad_drop_factor;
7249        let mut fe = 0usize;
7250        let mut ge = 0usize;
7251        let res = super::probe_alphas(
7252            &mut core,
7253            &mut bfgs_oracle(|x: &Array1<f64>| (x.dot(x), 2.0 * x)),
7254            &mut oracle,
7255            &x_k,
7256            &d_k,
7257            f_k,
7258            &g_k,
7259            0.0,
7260            1.0,
7261            tau_g,
7262            drop_factor,
7263            &mut fe,
7264            &mut ge,
7265        );
7266        assert!(res.is_none());
7267    }
7268
7269    #[test]
7270    fn zoom_tiny_bracket_rejects_armijo_without_curvature() {
7271        let x_k = array![1.0];
7272        let mut core = super::BfgsCore::new(x_k.clone());
7273        let mut oracle = super::FirstOrderCache::new(x_k.len());
7274        let (f_k, g_k) = non_convex_max(&x_k);
7275        let g_proj_k = core.projected_gradient(&x_k, &g_k);
7276        let d_k = array![1.0];
7277        let alpha_lo = 1.0;
7278        let alpha_hi = 1.0 + 5e-13;
7279        let (x_lo, s_lo, _) = core.project_with_step(&x_k, &d_k, alpha_lo);
7280        let (f_lo, g_lo) = non_convex_max(&x_lo);
7281        let g_lo_dot_d = super::directional_derivative(
7282            &core.projected_gradient(&x_lo, &g_lo),
7283            &s_lo,
7284            alpha_lo,
7285            &d_k,
7286        );
7287        let (x_hi, s_hi, _) = core.project_with_step(&x_k, &d_k, alpha_hi);
7288        let (f_hi, g_hi) = non_convex_max(&x_hi);
7289        let g_hi_dot_d = super::directional_derivative(
7290            &core.projected_gradient(&x_hi, &g_hi),
7291            &s_hi,
7292            alpha_hi,
7293            &d_k,
7294        );
7295        let c1 = core.c1;
7296        let c2 = core.c2;
7297
7298        let r = super::zoom(
7299            &mut core,
7300            &mut bfgs_oracle(non_convex_max),
7301            &mut oracle,
7302            &x_k,
7303            &d_k,
7304            f_k,
7305            &g_k,
7306            &g_proj_k,
7307            g_proj_k.dot(&d_k),
7308            c1,
7309            c2,
7310            alpha_lo,
7311            alpha_hi,
7312            f_lo,
7313            f_hi,
7314            g_lo_dot_d,
7315            g_hi_dot_d,
7316            0,
7317            0,
7318        );
7319
7320        assert!(matches!(r, Err(super::LineSearchError::MaxAttempts(_))));
7321    }
7322
7323    #[test]
7324    fn zoom_flat_midpoint_rejects_uphill_descent_only_candidate() {
7325        let x_k = array![0.0];
7326        let mut core = super::BfgsCore::new(x_k.clone());
7327        let mut oracle = super::FirstOrderCache::new(x_k.len());
7328        let slope = 2.0e-13;
7329        let fake_grad = -1.0e-14;
7330        let f_k = 0.0;
7331        let g_k = array![fake_grad];
7332        let g_proj_k = core.projected_gradient(&x_k, &g_k);
7333        let d_k = array![1.0];
7334        let alpha_lo = 1.0;
7335        let alpha_hi = 2.0;
7336        let fg = move |x: &Array1<f64>| (slope * x[0], array![fake_grad]);
7337        let (x_lo, s_lo, _) = core.project_with_step(&x_k, &d_k, alpha_lo);
7338        let (f_lo, g_lo) = fg(&x_lo);
7339        let g_lo_dot_d = super::directional_derivative(
7340            &core.projected_gradient(&x_lo, &g_lo),
7341            &s_lo,
7342            alpha_lo,
7343            &d_k,
7344        );
7345        let (x_hi, s_hi, _) = core.project_with_step(&x_k, &d_k, alpha_hi);
7346        let (f_hi, g_hi) = fg(&x_hi);
7347        let g_hi_dot_d = super::directional_derivative(
7348            &core.projected_gradient(&x_hi, &g_hi),
7349            &s_hi,
7350            alpha_hi,
7351            &d_k,
7352        );
7353        let c1 = core.c1;
7354        let c2 = core.c2;
7355
7356        let r = super::zoom(
7357            &mut core,
7358            &mut bfgs_oracle(fg),
7359            &mut oracle,
7360            &x_k,
7361            &d_k,
7362            f_k,
7363            &g_k,
7364            &g_proj_k,
7365            g_proj_k.dot(&d_k),
7366            c1,
7367            c2,
7368            alpha_lo,
7369            alpha_hi,
7370            f_lo,
7371            f_hi,
7372            g_lo_dot_d,
7373            g_hi_dot_d,
7374            0,
7375            0,
7376        );
7377
7378        assert!(matches!(r, Err(super::LineSearchError::MaxAttempts(_))));
7379    }
7380
7381    #[test]
7382    fn line_search_rejects_fully_clipped_projected_step() {
7383        let x_k = array![1.0];
7384        let lower = array![0.0];
7385        let upper = array![1.0];
7386        let mut core = super::BfgsCore::new(x_k.clone());
7387        core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
7388        let mut oracle = super::FirstOrderCache::new(x_k.len());
7389        let fg = |x: &Array1<f64>| {
7390            let dx = x[0] - 2.0;
7391            (dx * dx, array![2.0 * dx])
7392        };
7393        let (f_k, g_k) = fg(&x_k);
7394        let d_k = array![1.0];
7395        let c1 = core.c1;
7396        let c2 = core.c2;
7397
7398        let r = super::line_search(
7399            &mut core,
7400            &mut bfgs_oracle(fg),
7401            &mut oracle,
7402            &x_k,
7403            &d_k,
7404            f_k,
7405            &g_k,
7406            c1,
7407            c2,
7408        );
7409
7410        assert!(matches!(r, Err(super::LineSearchError::StepSizeTooSmall)));
7411    }
7412
7413    #[test]
7414    fn backtracking_accepts_strong_wolfe_in_local_mode() {
7415        let x_k = array![1.0];
7416        let mut core = super::BfgsCore::new(x_k.clone());
7417        core.local_mode = true;
7418
7419        let mut oracle = super::FirstOrderCache::new(x_k.len());
7420        let f_k = x_k.dot(&x_k);
7421        let g_k = 2.0 * x_k.clone();
7422        let d_k = -g_k.clone();
7423
7424        let (alpha, f_new, g_new, _, _, kind) = super::backtracking_line_search(
7425            &mut core,
7426            &mut bfgs_oracle(|x: &Array1<f64>| (x.dot(x), 2.0 * x)),
7427            &mut oracle,
7428            &x_k,
7429            &d_k,
7430            f_k,
7431            &g_k,
7432        )
7433        .expect("local mode should still accept strong-Wolfe decreases");
7434
7435        assert!((alpha - 0.5).abs() < 1e-12);
7436        assert!(f_new < f_k);
7437        assert!(g_new.iter().all(|v| v.is_finite()));
7438        assert!(matches!(kind, super::AcceptKind::StrongWolfe));
7439    }
7440
7441    #[test]
7442    fn backtracking_rejects_armijo_without_curvature() {
7443        let x_k = array![1.0];
7444        let mut core = super::BfgsCore::new(x_k.clone());
7445        let mut oracle = super::FirstOrderCache::new(x_k.len());
7446        let (f_k, g_k) = non_convex_max(&x_k);
7447        let d_k = array![1.0];
7448
7449        let r = super::backtracking_line_search(
7450            &mut core,
7451            &mut bfgs_oracle(non_convex_max),
7452            &mut oracle,
7453            &x_k,
7454            &d_k,
7455            f_k,
7456            &g_k,
7457        );
7458
7459        assert!(r.is_err());
7460    }
7461
7462    #[test]
7463    fn local_mode_disables_only_gll_extension() {
7464        let mut core = super::BfgsCore::new(array![0.0]);
7465        let fmax = 2.0;
7466        let gk_ts = -0.1;
7467        let f_trial = 1.5;
7468
7469        assert!(!core.accept_armijo(1.0, gk_ts, f_trial));
7470        assert!(core.accept_gll_nonmonotone(fmax, gk_ts, f_trial));
7471
7472        core.local_mode = true;
7473        assert!(!core.accept_gll_nonmonotone(fmax, gk_ts, f_trial));
7474    }
7475
7476    #[test]
7477    fn line_search_ignores_nonfinite_best() {
7478        let x0 = array![0.0];
7479        let mut core = super::BfgsCore::new(x0.clone());
7480        let mut oracle = super::FirstOrderCache::new(x0.len());
7481        let c1 = core.c1;
7482        let c2 = core.c2;
7483        let fg = |x: &Array1<f64>| {
7484            if x[0] > 0.0 {
7485                (f64::NEG_INFINITY, array![1.0])
7486            } else {
7487                (0.0, array![1.0])
7488            }
7489        };
7490        let (f_k, g_k) = fg(&x0);
7491        let mut obj = bfgs_oracle(fg);
7492        core.global_best = Some(super::ProbeBest::new(&x0, f_k, &g_k));
7493        let d_k = array![1.0];
7494        let r = super::line_search(
7495            &mut core,
7496            &mut obj,
7497            &mut oracle,
7498            &x0,
7499            &d_k,
7500            f_k,
7501            &g_k,
7502            c1,
7503            c2,
7504        );
7505        assert!(r.is_err());
7506        assert!(
7507            core.global_best
7508                .as_ref()
7509                .map(|b| b.f.is_finite())
7510                .unwrap_or(false)
7511        );
7512    }
7513
7514    #[test]
7515    fn newton_trust_region_converges_on_rosenbrock() {
7516        let x0 = array![-1.2, 1.0];
7517        let mut solver = NewtonTrustRegion::new(x0, SecondOrderFn::new(rosenbrock_with_hessian))
7518            .with_profile(Profile::Robust)
7519            .with_tolerance(tol(1e-8))
7520            .with_max_iterations(iters(100));
7521        let solution = solver.run().expect("Newton trust-region should converge");
7522        assert!((solution.final_point[0] - 1.0).abs() < 1e-6);
7523        assert!((solution.final_point[1] - 1.0).abs() < 1e-6);
7524        assert!(gradient_norm(&solution) < 1e-6);
7525    }
7526
7527    #[test]
7528    fn newton_trust_region_uses_single_full_trial_requests() {
7529        let x0 = array![-1.2, 1.0];
7530        let first_order_calls = Arc::new(Mutex::new(0usize));
7531        let second_order_calls = Arc::new(Mutex::new(0usize));
7532        let objective = CountingSecondOrder::new(
7533            rosenbrock_with_hessian,
7534            first_order_calls.clone(),
7535            second_order_calls.clone(),
7536        );
7537        let mut solver = NewtonTrustRegion::new(x0, objective)
7538            .with_profile(Profile::Robust)
7539            .with_tolerance(tol(1e-8))
7540            .with_max_iterations(iters(100));
7541        let _ = solver.run().expect("Newton trust-region should converge");
7542        assert_eq!(
7543            *first_order_calls.lock().expect("lock first-order calls"),
7544            0,
7545            "Newton TR should not use first-order-only objective paths"
7546        );
7547        assert!(
7548            *second_order_calls.lock().expect("lock second-order calls") > 0,
7549            "expected Newton TR to use second-order evaluations"
7550        );
7551    }
7552
7553    #[test]
7554    fn newton_trust_region_handles_indefinite_hessian() {
7555        let x0 = array![1.0, 0.5]; // Hessian is indefinite at start.
7556        let mut solver =
7557            NewtonTrustRegion::new(x0, SecondOrderFn::new(nonconvex_quartic_with_hessian))
7558                .with_profile(Profile::Robust)
7559                .with_tolerance(tol(1e-7))
7560                .with_max_iterations(iters(200));
7561
7562        let sol = solver
7563            .run()
7564            .expect("TR-Newton should handle indefinite Hessians");
7565        assert!(sol.final_value.is_finite());
7566        assert!(gradient_norm(&sol) < 1e-4);
7567    }
7568
7569    #[test]
7570    fn newton_trust_region_respects_single_variable_bound() {
7571        // Unconstrained minimizer is x=2, but bounds force x in [0,1].
7572        let x0 = array![0.2];
7573        let lower = array![0.0];
7574        let upper = array![1.0];
7575        let mut solver = NewtonTrustRegion::new(
7576            x0,
7577            SecondOrderFn::new(|x: &Array1<f64>| {
7578                let dx = x[0] - 2.0;
7579                let f = dx * dx;
7580                let g = array![2.0 * dx];
7581                let h = array![[2.0]];
7582                (f, g, h)
7583            }),
7584        )
7585        .with_bounds(bounds(lower, upper, 1e-8))
7586        .with_profile(Profile::Robust)
7587        .with_tolerance(tol(1e-10))
7588        .with_max_iterations(iters(100));
7589
7590        let sol = solver
7591            .run()
7592            .expect("Projected Newton should converge at upper bound");
7593        assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7594        assert!(gradient_norm(&sol) <= 1e-8);
7595    }
7596
7597    #[test]
7598    fn newton_trust_region_active_set_leaves_free_coordinate() {
7599        // x[0] wants to move beyond upper bound, x[1] is free with minimizer at 3.
7600        let x0 = array![0.4, -2.0];
7601        let lower = array![0.0, -10.0];
7602        let upper = array![1.0, 10.0];
7603        let mut solver = NewtonTrustRegion::new(
7604            x0,
7605            SecondOrderFn::new(|x: &Array1<f64>| {
7606                let d0 = x[0] - 2.0;
7607                let d1 = x[1] - 3.0;
7608                let f = d0 * d0 + d1 * d1;
7609                let g = array![2.0 * d0, 2.0 * d1];
7610                let h = array![[2.0, 0.0], [0.0, 2.0]];
7611                (f, g, h)
7612            }),
7613        )
7614        .with_bounds(bounds(lower, upper, 1e-8))
7615        .with_profile(Profile::Robust)
7616        .with_tolerance(tol(1e-9))
7617        .with_max_iterations(iters(100));
7618
7619        let sol = solver.run().expect("Projected Newton should converge");
7620        assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7621        assert!((sol.final_point[1] - 3.0).abs() < 1e-7);
7622        assert!(gradient_norm(&sol) <= 1e-7);
7623    }
7624
7625    #[test]
7626    fn newton_trust_region_retries_on_recoverable_trial_errors() {
7627        struct RecoverableTrialObjective {
7628            calls: usize,
7629        }
7630
7631        impl ZerothOrderObjective for RecoverableTrialObjective {
7632            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7633                Ok(0.5 * (x[0] - 1.0).powi(2))
7634            }
7635        }
7636
7637        impl FirstOrderObjective for RecoverableTrialObjective {
7638            fn eval_grad(
7639                &mut self,
7640                x: &Array1<f64>,
7641            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7642                Ok(FirstOrderSample {
7643                    value: 0.5 * (x[0] - 1.0).powi(2),
7644                    gradient: array![x[0] - 1.0],
7645                })
7646            }
7647        }
7648
7649        impl SecondOrderObjective for RecoverableTrialObjective {
7650            fn eval_hessian(
7651                &mut self,
7652                x: &Array1<f64>,
7653            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7654                self.calls += 1;
7655                if self.calls == 2 {
7656                    return Err(ObjectiveEvalError::recoverable("simulated PIRLS breakdown"));
7657                }
7658                Ok(SecondOrderSample {
7659                    value: 0.5 * (x[0] - 1.0).powi(2),
7660                    gradient: array![x[0] - 1.0],
7661                    hessian: Some(array![[1.0]]),
7662                })
7663            }
7664        }
7665
7666        let x0 = array![2.0];
7667        let mut solver = NewtonTrustRegion::new(x0, RecoverableTrialObjective { calls: 0 })
7668            .with_profile(Profile::Deterministic)
7669            .with_tolerance(tol(1e-8))
7670            .with_max_iterations(iters(200));
7671
7672        let sol = solver
7673            .run()
7674            .expect("recoverable trial errors should shrink trust region and recover");
7675        assert!((sol.final_point[0] - 1.0).abs() < 1e-6);
7676        assert!(gradient_norm(&sol) < 1e-6);
7677    }
7678
7679    #[test]
7680    fn newton_trust_region_surfaces_fatal_objective_errors() {
7681        struct FatalObjective;
7682
7683        impl ZerothOrderObjective for FatalObjective {
7684            fn eval_cost(&mut self, _x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7685                Err(ObjectiveEvalError::fatal(
7686                    "fatal synthetic objective failure",
7687                ))
7688            }
7689        }
7690
7691        impl FirstOrderObjective for FatalObjective {
7692            fn eval_grad(
7693                &mut self,
7694                _x: &Array1<f64>,
7695            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7696                Err(ObjectiveEvalError::fatal(
7697                    "fatal synthetic objective failure",
7698                ))
7699            }
7700        }
7701
7702        impl SecondOrderObjective for FatalObjective {
7703            fn eval_hessian(
7704                &mut self,
7705                _x: &Array1<f64>,
7706            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7707                Err(ObjectiveEvalError::fatal(
7708                    "fatal synthetic objective failure",
7709                ))
7710            }
7711        }
7712
7713        let x0 = array![0.0];
7714        let mut solver = NewtonTrustRegion::new(x0, FatalObjective).with_max_iterations(iters(5));
7715
7716        let err = solver.run().expect_err("fatal errors must propagate");
7717        match err {
7718            super::NewtonTrustRegionError::ObjectiveFailed { message } => {
7719                assert!(message.contains("fatal synthetic objective failure"));
7720            }
7721            other => panic!("unexpected error variant: {other:?}"),
7722        }
7723    }
7724
7725    #[test]
7726    fn arc_converges_on_rosenbrock() {
7727        let x0 = array![-1.2, 1.0];
7728        let mut solver = super::Arc::new(x0, SecondOrderFn::new(rosenbrock_with_hessian))
7729            .with_profile(Profile::Robust)
7730            .with_tolerance(tol(1e-7))
7731            .with_max_iterations(iters(250));
7732
7733        let solution = solver.run().expect("ARC should converge");
7734        assert!((solution.final_point[0] - 1.0).abs() < 1e-4);
7735        assert!((solution.final_point[1] - 1.0).abs() < 1e-4);
7736        assert!(gradient_norm(&solution) < 1e-5);
7737    }
7738
7739    #[test]
7740    fn arc_uses_single_full_trial_requests() {
7741        let x0 = array![-1.2, 1.0];
7742        let first_order_calls = Arc::new(Mutex::new(0usize));
7743        let second_order_calls = Arc::new(Mutex::new(0usize));
7744        let objective = CountingSecondOrder::new(
7745            rosenbrock_with_hessian,
7746            first_order_calls.clone(),
7747            second_order_calls.clone(),
7748        );
7749        let mut solver = super::Arc::new(x0, objective)
7750            .with_profile(Profile::Robust)
7751            .with_tolerance(tol(1e-7))
7752            .with_max_iterations(iters(250));
7753
7754        let _ = solver.run().expect("ARC should converge");
7755        assert_eq!(
7756            *first_order_calls.lock().expect("lock first-order calls"),
7757            0,
7758            "ARC should not use first-order-only objective paths"
7759        );
7760        assert!(
7761            *second_order_calls.lock().expect("lock second-order calls") > 0,
7762            "expected ARC to use second-order evaluations"
7763        );
7764    }
7765
7766    #[test]
7767    fn arc_accepted_step_uses_single_evaluation() {
7768        let first_order_calls = Arc::new(Mutex::new(0usize));
7769        let second_order_calls = Arc::new(Mutex::new(0usize));
7770        let objective = CountingSecondOrder::new(
7771            |x: &Array1<f64>| {
7772                let f = 0.5 * x[0] * x[0];
7773                let g = array![x[0]];
7774                let h = array![[1.0]];
7775                (f, g, h)
7776            },
7777            first_order_calls.clone(),
7778            second_order_calls.clone(),
7779        );
7780        let mut solver = super::Arc::new(array![1.0], objective)
7781            .with_profile(Profile::Deterministic)
7782            .with_tolerance(tol(1e-9))
7783            .with_max_iterations(iters(1));
7784
7785        let err = solver
7786            .run()
7787            .expect_err("one ARC iteration should exhaust the budget after a single accepted step");
7788        match err {
7789            ArcError::MaxIterationsReached { .. } => {}
7790            other => panic!("unexpected error variant: {other:?}"),
7791        }
7792        assert_eq!(
7793            *first_order_calls.lock().expect("lock first-order calls"),
7794            0,
7795            "ARC should not issue first-order-only evaluations"
7796        );
7797        assert_eq!(
7798            *second_order_calls.lock().expect("lock second-order calls"),
7799            2,
7800            "expected one initial and one trial second-order evaluation"
7801        );
7802    }
7803
7804    #[test]
7805    fn arc_rejects_materially_projected_steps() {
7806        let x0 = array![0.8];
7807        let lower = array![0.0];
7808        let upper = array![1.0];
7809        let clipped_counts = Arc::new(Mutex::new((0usize, 0usize)));
7810        let clipped_counts_c = clipped_counts.clone();
7811        struct ProjectedArcObjective {
7812            clipped_counts: Arc<Mutex<(usize, usize)>>,
7813        }
7814
7815        impl ZerothOrderObjective for ProjectedArcObjective {
7816            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7817                if (x[0] - 1.0).abs() < 1e-12 {
7818                    self.clipped_counts.lock().expect("lock clipped counts").0 += 1;
7819                }
7820                let dx = x[0] - 2.0;
7821                Ok(0.5 * dx * dx)
7822            }
7823        }
7824
7825        impl FirstOrderObjective for ProjectedArcObjective {
7826            fn eval_grad(
7827                &mut self,
7828                x: &Array1<f64>,
7829            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7830                let dx = x[0] - 2.0;
7831                Ok(FirstOrderSample {
7832                    value: 0.5 * dx * dx,
7833                    gradient: array![dx],
7834                })
7835            }
7836        }
7837
7838        impl SecondOrderObjective for ProjectedArcObjective {
7839            fn eval_hessian(
7840                &mut self,
7841                x: &Array1<f64>,
7842            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7843                if (x[0] - 1.0).abs() < 1e-12 {
7844                    self.clipped_counts.lock().expect("lock clipped counts").1 += 1;
7845                }
7846                let dx = x[0] - 2.0;
7847                Ok(SecondOrderSample {
7848                    value: 0.5 * dx * dx,
7849                    gradient: array![dx],
7850                    hessian: Some(array![[1.0]]),
7851                })
7852            }
7853        }
7854
7855        let mut solver = super::Arc::new(
7856            x0.clone(),
7857            ProjectedArcObjective {
7858                clipped_counts: clipped_counts_c,
7859            },
7860        )
7861        .with_profile(Profile::Deterministic)
7862        .with_bounds(bounds(lower, upper, 1e-12))
7863        .with_max_iterations(iters(1));
7864        solver.core.sigma_min = 1e-12;
7865        solver.core.sigma = 1e-12;
7866
7867        let err = solver
7868            .run()
7869            .expect_err("single projected iteration should exhaust the budget");
7870        match err {
7871            ArcError::MaxIterationsReached { last_solution } => {
7872                assert!(last_solution.final_point[0] <= 1.0 + 1e-12);
7873            }
7874            other => panic!("unexpected error variant: {other:?}"),
7875        }
7876        let counts = clipped_counts.lock().expect("lock clipped counts");
7877        assert_eq!(
7878            counts.0, 0,
7879            "materially projected ARC steps must not use CostOnly rho evaluation"
7880        );
7881        assert!(
7882            counts.1 > 0,
7883            "materially projected ARC steps should refresh a coherent CostGradientHessian sample"
7884        );
7885    }
7886
7887    #[test]
7888    fn arc_respects_single_variable_bound() {
7889        let x0 = array![0.2];
7890        let lower = array![0.0];
7891        let upper = array![1.0];
7892        let mut solver = super::Arc::new(
7893            x0,
7894            SecondOrderFn::new(|x: &Array1<f64>| {
7895                let dx = x[0] - 2.0;
7896                let f = dx * dx;
7897                let g = array![2.0 * dx];
7898                let h = array![[2.0]];
7899                (f, g, h)
7900            }),
7901        )
7902        .with_profile(Profile::Robust)
7903        .with_bounds(bounds(lower, upper, 1e-8))
7904        .with_tolerance(tol(1e-9))
7905        .with_max_iterations(iters(200));
7906
7907        let sol = solver
7908            .run()
7909            .expect("Projected ARC should converge at upper bound");
7910        assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7911        assert!(gradient_norm(&sol) <= 1e-6);
7912    }
7913
7914    #[test]
7915    fn arc_retries_on_recoverable_trial_errors() {
7916        struct RecoverableArcTrialObjective {
7917            calls: usize,
7918        }
7919
7920        impl ZerothOrderObjective for RecoverableArcTrialObjective {
7921            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7922                Ok(0.5 * (x[0] - 1.0).powi(2))
7923            }
7924        }
7925
7926        impl FirstOrderObjective for RecoverableArcTrialObjective {
7927            fn eval_grad(
7928                &mut self,
7929                x: &Array1<f64>,
7930            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7931                Ok(FirstOrderSample {
7932                    value: 0.5 * (x[0] - 1.0).powi(2),
7933                    gradient: array![x[0] - 1.0],
7934                })
7935            }
7936        }
7937
7938        impl SecondOrderObjective for RecoverableArcTrialObjective {
7939            fn eval_hessian(
7940                &mut self,
7941                x: &Array1<f64>,
7942            ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7943                self.calls += 1;
7944                if self.calls == 2 {
7945                    return Err(ObjectiveEvalError::recoverable(
7946                        "simulated recoverable trial failure",
7947                    ));
7948                }
7949                Ok(SecondOrderSample {
7950                    value: 0.5 * (x[0] - 1.0).powi(2),
7951                    gradient: array![x[0] - 1.0],
7952                    hessian: Some(array![[1.0]]),
7953                })
7954            }
7955        }
7956
7957        let x0 = array![2.0];
7958        let mut solver = super::Arc::new(x0, RecoverableArcTrialObjective { calls: 0 })
7959            .with_profile(Profile::Deterministic)
7960            .with_tolerance(tol(1e-8))
7961            .with_max_iterations(iters(300));
7962
7963        // ARC should survive recoverable trial-evaluation failures by increasing
7964        // regularization and retrying, then still converge to the minimizer.
7965        let sol = solver
7966            .run()
7967            .expect("recoverable ARC trial failures should trigger retries and recover");
7968        assert!((sol.final_point[0] - 1.0).abs() < 1e-6);
7969        assert!(gradient_norm(&sol) < 1e-6);
7970    }
7971
7972    #[test]
7973    fn arc_sigma_escalation_uses_gamma2_then_gamma3() {
7974        let mut core = super::ArcCore::new(array![0.0]);
7975        core.sigma = 1.0;
7976        core.gamma2 = 2.0;
7977        core.gamma3 = 3.0;
7978        let mut streak = 0usize;
7979
7980        // First two failures: moderate growth (gamma2).
7981        core.escalate_sigma_on_failure(&mut streak);
7982        assert_eq!(streak, 1);
7983        assert!((core.sigma - 2.0).abs() < 1e-12);
7984
7985        core.escalate_sigma_on_failure(&mut streak);
7986        assert_eq!(streak, 2);
7987        assert!((core.sigma - 4.0).abs() < 1e-12);
7988
7989        // Third consecutive failure: stronger growth (gamma3).
7990        core.escalate_sigma_on_failure(&mut streak);
7991        assert_eq!(streak, 3);
7992        assert!((core.sigma - 12.0).abs() < 1e-12);
7993    }
7994
7995    /// A function whose gradient is constant, causing `y_k` to be zero.
7996    fn linear_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
7997        (2.0 * x[0] + 3.0 * x[1], array![2.0, 3.0])
7998    }
7999
8000    fn huge_offset_linear_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
8001        (1.0e16 + 2.0 * x[0] + 3.0 * x[1], array![2.0, 3.0])
8002    }
8003
8004    // A highly ill-conditioned quadratic function.
8005    // The "valley" is 1000x longer than it is wide.
8006    fn ill_conditioned_quadratic(x: &Array1<f64>) -> (f64, Array1<f64>) {
8007        let scale = 1000.0;
8008        let f = scale * x[0].powi(2) + x[1].powi(2);
8009        let g = array![2.0 * scale * x[0], 2.0 * x[1]];
8010        (f, g)
8011    }
8012
8013    // This function is minimized anywhere on the line x[0] = -x[1].
8014    // Its Hessian is singular.
8015    fn singular_hessian_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
8016        let val = (x[0] + x[1]).powi(2);
8017        (val, array![2.0 * (x[0] + x[1]), 2.0 * (x[0] + x[1])])
8018    }
8019
8020    // Function with a steep exponential "wall".
8021    fn wall_with_minimum(x: &Array1<f64>) -> (f64, Array1<f64>) {
8022        if x[0] > 70.0 {
8023            // The wall
8024            (f64::INFINITY, array![f64::INFINITY])
8025        } else {
8026            // A simple quadratic with minimum at x=60
8027            ((x[0] - 60.0).powi(2), array![2.0 * (x[0] - 60.0)])
8028        }
8029    }
8030
8031    // --- 1. Standard Convergence Tests ---
8032
8033    #[test]
8034    fn test_quadratic_bowl_converges() {
8035        let x0 = array![10.0, -5.0];
8036        let Solution { final_point, .. } = Bfgs::new(x0, bfgs_oracle(quadratic)).run().unwrap();
8037        assert_that!(&final_point[0]).is_close_to(0.0, 1e-5);
8038        assert_that!(&final_point[1]).is_close_to(0.0, 1e-5);
8039    }
8040
8041    #[test]
8042    fn test_optimize_first_order_picks_bfgs() {
8043        let x0 = array![10.0, -5.0];
8044        let Solution { final_point, .. } = optimize(Problem::new(x0, bfgs_oracle(quadratic)))
8045            .run()
8046            .unwrap();
8047        assert_that!(&final_point[0]).is_close_to(0.0, 1e-5);
8048        assert_that!(&final_point[1]).is_close_to(0.0, 1e-5);
8049    }
8050
8051    #[test]
8052    fn test_optimize_second_order_picks_newton_by_default() {
8053        let x0 = array![-1.2, 1.0];
8054        let Solution { final_point, .. } = optimize(SecondOrderProblem::new(
8055            x0,
8056            SecondOrderFn::new(rosenbrock_with_hessian),
8057        ))
8058        .run()
8059        .unwrap();
8060        assert_that!(&final_point[0]).is_close_to(1.0, 1e-5);
8061        assert_that!(&final_point[1]).is_close_to(1.0, 1e-5);
8062    }
8063
8064    #[test]
8065    fn test_optimize_second_order_uses_arc_for_aggressive_profile() {
8066        let x0 = array![1.0];
8067        let objective = SecondOrderFn::new(|x: &Array1<f64>| {
8068            let f = x[0] * x[0];
8069            let g = array![2.0 * x[0]];
8070            let h = array![[2.0]];
8071            (f, g, h)
8072        });
8073        let solver =
8074            optimize(SecondOrderProblem::new(x0, objective).with_profile(Profile::Aggressive));
8075        assert!(matches!(solver, AutoSecondOrderSolver::Arc(_)));
8076    }
8077
8078    #[test]
8079    fn test_quadratic_still_converges_strongly() {
8080        let x0 = array![20.0, -30.0];
8081        let sol = Bfgs::new(x0, bfgs_oracle(quadratic))
8082            .with_tolerance(tol(1e-8))
8083            .with_max_iterations(iters(1000))
8084            .run()
8085            .unwrap();
8086        assert_that!(&sol.final_point[0]).is_close_to(0.0, 1e-6);
8087        assert_that!(&sol.final_point[1]).is_close_to(0.0, 1e-6);
8088    }
8089
8090    #[test]
8091    fn test_rosenbrock_converges() {
8092        let x0 = array![-1.2, 1.0];
8093        let Solution { final_point, .. } = Bfgs::new(x0, bfgs_oracle(rosenbrock)).run().unwrap();
8094        assert_that!(&final_point[0]).is_close_to(1.0, 1e-5);
8095        assert_that!(&final_point[1]).is_close_to(1.0, 1e-5);
8096    }
8097
8098    // --- 2. Failure and Edge Case Tests ---
8099
8100    #[test]
8101    fn test_begin_at_minimum_terminates_immediately() {
8102        let x0 = array![0.0, 0.0];
8103        let Solution { iterations, .. } = Bfgs::new(x0, bfgs_oracle(quadratic))
8104            .with_tolerance(tol(1e-5))
8105            .run()
8106            .unwrap();
8107        assert_that(&iterations).is_less_than_or_equal_to(1);
8108    }
8109
8110    #[test]
8111    fn test_max_iterations_error_is_returned() {
8112        let x0 = array![-1.2, 1.0];
8113        let max_iterations = 5;
8114        let result = Bfgs::new(x0, bfgs_oracle(rosenbrock))
8115            .with_max_iterations(iters(max_iterations))
8116            .run();
8117
8118        match result {
8119            Err(BfgsError::MaxIterationsReached { last_solution }) => {
8120                assert_eq!(last_solution.iterations, max_iterations);
8121                // Also check that the point is not the origin, i.e., that some work was done.
8122                assert_that!(&last_solution.final_point.dot(&last_solution.final_point))
8123                    .is_greater_than(0.0);
8124            }
8125            _ => panic!("Expected MaxIterationsReached error, but got {:?}", result),
8126        }
8127    }
8128
8129    #[test]
8130    fn test_non_convex_function_is_handled() {
8131        let x0 = array![2.0];
8132        let result = Bfgs::new(x0.clone(), bfgs_oracle(non_convex_max)).run();
8133        eprintln!("non_convex result: {:?}", result);
8134        // The robust solver should not fail. It gets stuck trying to minimize a function with no minimum.
8135        // It will hit the max iteration limit because it can't find steps that satisfy the descent condition.
8136        assert!(matches!(
8137            result,
8138            Err(BfgsError::MaxIterationsReached { .. })
8139                | Err(BfgsError::LineSearchFailed { .. })
8140                | Err(BfgsError::GradientIsNaN)
8141        ));
8142    }
8143
8144    #[test]
8145    fn test_zero_curvature_is_handled() {
8146        let x0 = array![10.0, 10.0];
8147        let result = Bfgs::new(x0, bfgs_oracle(linear_function))
8148            .with_profile(Profile::Deterministic)
8149            .run();
8150        // The solver should skip Hessian updates due to sy=0 and eventually
8151        // terminate gracefully without panicking.
8152        match result {
8153            Ok(sol) => {
8154                assert!(sol.final_value.is_finite());
8155                assert!(gradient_norm(&sol).is_finite());
8156            }
8157            Err(BfgsError::MaxIterationsReached { .. })
8158            | Err(BfgsError::LineSearchFailed { .. })
8159            | Err(BfgsError::StepSizeTooSmall) => {}
8160            Err(other) => panic!("unexpected error: {other:?}"),
8161        }
8162    }
8163
8164    #[test]
8165    fn test_no_improve_streak_requires_stationarity_or_tiny_step() {
8166        let x0 = array![10.0, 10.0];
8167        let result = Bfgs::new(x0, bfgs_oracle(huge_offset_linear_function))
8168            .with_profile(Profile::Deterministic)
8169            .with_max_iterations(iters(8))
8170            .run();
8171
8172        match result {
8173            Ok(sol) => panic!(
8174                "solver falsely reported convergence with ||g||={:.3e}",
8175                gradient_norm(&sol)
8176            ),
8177            Err(BfgsError::MaxIterationsReached { last_solution })
8178            | Err(BfgsError::LineSearchFailed { last_solution, .. }) => {
8179                assert!(gradient_norm(&last_solution) > 1e-3);
8180            }
8181            Err(BfgsError::StepSizeTooSmall) => {}
8182            Err(other) => panic!("unexpected error: {other:?}"),
8183        }
8184    }
8185
8186    #[test]
8187    fn stagnation_guard_requires_gradient_or_tiny_feasible_step() {
8188        let core = super::BfgsCore::new(array![0.0, 0.0]);
8189        let x_prev = array![1.0, 1.0];
8190        let x_far = array![2.0, 2.0];
8191        let x_same = x_prev.clone();
8192        let g_large = array![1.0, -1.0];
8193        let g_small = array![1e-6, 0.0];
8194
8195        assert!(!core.stagnation_converged(&x_prev, &x_far, &g_large));
8196        assert!(core.stagnation_converged(&x_prev, &x_same, &g_large));
8197        assert!(core.stagnation_converged(&x_prev, &x_far, &g_small));
8198    }
8199
8200    #[test]
8201    fn test_nan_gradient_returns_error() {
8202        // This function's gradient becomes NaN when x gets very close to 0.
8203        let nan_fn = |x: &Array1<f64>| {
8204            if x[0].abs() < 1e-12 {
8205                (f64::NAN, array![f64::NAN])
8206            } else {
8207                (x[0].powi(2), array![2.0 * x[0]])
8208            }
8209        };
8210        // Start at a point that will converge towards 0, triggering the NaN condition.
8211        let x0 = array![0.1];
8212        let result = Bfgs::new(x0, bfgs_oracle(nan_fn))
8213            .with_profile(Profile::Deterministic)
8214            .with_tolerance(tol(1e-15)) // Very tight tolerance to force convergence towards 0
8215            .run();
8216
8217        match result {
8218            Ok(sol) => {
8219                assert!(sol.final_value.is_finite());
8220                assert!(sol.final_point[0].abs() < 1e-4);
8221            }
8222            Err(BfgsError::GradientIsNaN)
8223            | Err(BfgsError::LineSearchFailed { .. })
8224            | Err(BfgsError::MaxIterationsReached { .. })
8225            | Err(BfgsError::StepSizeTooSmall) => {}
8226            Err(other) => panic!("unexpected error: {other:?}"),
8227        }
8228    }
8229
8230    #[test]
8231    fn test_linesearch_failed_reports_nonzero_attempts() {
8232        struct AlwaysRecoverableTrials;
8233
8234        impl ZerothOrderObjective for AlwaysRecoverableTrials {
8235            fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
8236                if x.iter().all(|v| *v == 0.0) {
8237                    Ok(833.403058988699)
8238                } else {
8239                    Err(ObjectiveEvalError::recoverable(
8240                        "synthetic recoverable trial failure",
8241                    ))
8242                }
8243            }
8244        }
8245
8246        impl FirstOrderObjective for AlwaysRecoverableTrials {
8247            fn eval_grad(
8248                &mut self,
8249                x: &Array1<f64>,
8250            ) -> Result<FirstOrderSample, ObjectiveEvalError> {
8251                if x.iter().all(|v| *v == 0.0) {
8252                    Ok(FirstOrderSample {
8253                        value: 833.403058988699,
8254                        gradient: array![1.1751972450892738, 0.0, 0.0],
8255                    })
8256                } else {
8257                    Err(ObjectiveEvalError::recoverable(
8258                        "synthetic recoverable trial failure",
8259                    ))
8260                }
8261            }
8262        }
8263
8264        let x0 = array![0.0, 0.0, 0.0];
8265        let f_k = 833.403058988699;
8266        let g_k = array![1.1751972450892738, 0.0, 0.0];
8267        let d_k = -g_k.clone();
8268        let mut core = super::BfgsCore::new(x0.clone());
8269        let mut oracle = super::FirstOrderCache::new(x0.len());
8270        let err = super::backtracking_line_search(
8271            &mut core,
8272            &mut AlwaysRecoverableTrials,
8273            &mut oracle,
8274            &x0,
8275            &d_k,
8276            f_k,
8277            &g_k,
8278        )
8279        .expect_err("line search should fail when every trial is recoverable");
8280
8281        let (max_attempts, failure_reason) = match err {
8282            super::LineSearchError::MaxAttempts(attempts) => {
8283                (attempts, LineSearchFailureReason::MaxAttempts)
8284            }
8285            super::LineSearchError::StepSizeTooSmall => (
8286                BACKTRACKING_MAX_ATTEMPTS,
8287                LineSearchFailureReason::StepSizeTooSmall,
8288            ),
8289            other => panic!("expected backtracking failure, got: {other:?}"),
8290        };
8291
8292        assert!(max_attempts > 0, "max_attempts should never be 0");
8293        let rendered = format!(
8294            "{}",
8295            BfgsError::LineSearchFailed {
8296                last_solution: Box::new(Solution::gradient_based(
8297                    x0,
8298                    f_k,
8299                    g_k.clone(),
8300                    g_k.dot(&g_k).sqrt(),
8301                    None,
8302                    0,
8303                    0,
8304                    0,
8305                    0,
8306                )),
8307                max_attempts,
8308                failure_reason,
8309            }
8310        );
8311        assert!(
8312            rendered.contains("MaxAttempts") || rendered.contains("StepSizeTooSmall"),
8313            "error should include failure reason, got: {rendered}"
8314        );
8315    }
8316
8317    // --- 3. Comparison Tests against a Trusted Library ---
8318
8319    #[test]
8320    fn test_rosenbrock_matches_scipy_behavior() {
8321        let x0 = array![-1.2, 1.0];
8322        let tolerance = 1e-6;
8323
8324        // Run our implementation.
8325        let our_res = Bfgs::new(x0.clone(), bfgs_oracle(rosenbrock))
8326            .with_tolerance(tol(tolerance))
8327            .run()
8328            .unwrap();
8329
8330        // Run scipy's implementation with synchronized settings.
8331        let scipy_res = optimize_with_python(&x0, "rosenbrock", tolerance, 100)
8332            .expect("Python optimization failed");
8333
8334        assert!(
8335            scipy_res.success,
8336            "Scipy optimization failed: {:?}",
8337            scipy_res.error
8338        );
8339        let scipy_point = scipy_res.final_point.unwrap();
8340
8341        // Assert that the final points are virtually identical.
8342        let distance = ((our_res.final_point[0] - scipy_point[0]).powi(2)
8343            + (our_res.final_point[1] - scipy_point[1]).powi(2))
8344        .sqrt();
8345        assert_that!(&distance).is_less_than(1e-5);
8346
8347        // Assert that the number of iterations is very similar. A small difference
8348        // is acceptable due to minor, valid variations in line search implementations.
8349        let iter_diff = (our_res.iterations as i64 - scipy_res.iterations.unwrap() as i64).abs();
8350        assert_that(&iter_diff).is_less_than_or_equal_to(10);
8351
8352        let PythonOptResult {
8353            final_value,
8354            final_gradient_norm,
8355            func_evals,
8356            grad_evals,
8357            message,
8358            ..
8359        } = scipy_res;
8360        if let Some(value) = final_value {
8361            assert!(value.is_finite());
8362        }
8363        if let Some(norm) = final_gradient_norm {
8364            assert!(norm.is_finite());
8365        }
8366        if let Some(count) = func_evals {
8367            assert!(count > 0);
8368        }
8369        if let Some(count) = grad_evals {
8370            assert!(count > 0);
8371        }
8372        if let Some(text) = message {
8373            assert!(!text.is_empty());
8374        }
8375    }
8376
8377    #[test]
8378    fn test_quadratic_matches_scipy_behavior() {
8379        let x0 = array![150.0, -275.5];
8380        let tolerance = 1e-8;
8381
8382        // Run our implementation.
8383        match Bfgs::new(x0.clone(), bfgs_oracle(quadratic))
8384            .with_tolerance(tol(tolerance))
8385            .run()
8386        {
8387            Ok(sol) => sol,
8388            Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
8389            Err(e) => panic!("unexpected error: {:?}", e),
8390        };
8391
8392        // Run scipy's implementation with synchronized settings.
8393        let scipy_res = optimize_with_python(&x0, "quadratic", tolerance, 100)
8394            .expect("Python optimization failed");
8395
8396        assert!(
8397            scipy_res.success,
8398            "Scipy optimization failed: {:?}",
8399            scipy_res.error
8400        );
8401
8402        let PythonOptResult {
8403            final_point,
8404            final_value,
8405            final_gradient_norm,
8406            iterations,
8407            func_evals,
8408            grad_evals,
8409            message,
8410            ..
8411        } = scipy_res;
8412        if let Some(point) = final_point {
8413            assert_eq!(point.len(), 2);
8414        }
8415        if let Some(value) = final_value {
8416            assert!(value.is_finite());
8417        }
8418        if let Some(norm) = final_gradient_norm {
8419            assert!(norm.is_finite());
8420        }
8421        if let Some(iters) = iterations {
8422            assert!(iters <= 100);
8423        }
8424        if let Some(count) = func_evals {
8425            assert!(count > 0);
8426        }
8427        if let Some(count) = grad_evals {
8428            assert!(count > 0);
8429        }
8430        if let Some(text) = message {
8431            assert!(!text.is_empty());
8432        }
8433    }
8434
8435    // --- 4. Robustness Tests ---
8436
8437    #[test]
8438    fn test_ill_conditioned_problem_converges() {
8439        let x0 = array![1.0, 1000.0]; // Start far up the narrow valley
8440        let res = Bfgs::new(x0, bfgs_oracle(ill_conditioned_quadratic)).run();
8441        assert!(res.is_ok() || matches!(res, Err(BfgsError::MaxIterationsReached { .. })));
8442    }
8443
8444    #[test]
8445    fn test_singular_hessian_is_handled_gracefully() {
8446        let x0 = array![10.0, 20.0];
8447        let result = Bfgs::new(x0, bfgs_oracle(singular_hessian_function))
8448            .with_tolerance(tol(1e-8))
8449            .run();
8450
8451        // The goal is to ensure the solver doesn't panic or return a numerical error.
8452        // It can either converge (if it gets lucky) or hit the max iteration limit.
8453        // Both are "graceful" outcomes.
8454        match result {
8455            Ok(soln) => {
8456                // If it did converge, verify it's on the correct line of minima.
8457                assert_that!(&soln.final_point[0]).is_close_to(-soln.final_point[1], 1e-5);
8458                assert_that!(&gradient_norm(&soln)).is_less_than(1e-8);
8459            }
8460            Err(BfgsError::MaxIterationsReached { .. }) => {
8461                // Hitting the iteration limit is an acceptable and expected outcome. Pass.
8462            }
8463            Err(e) => {
8464                // Any other error (like LineSearchFailed, GradientIsNaN) is a failure.
8465                panic!("Solver failed with an unexpected error: {:?}", e);
8466            }
8467        }
8468    }
8469
8470    #[test]
8471    fn test_line_search_handles_inf() {
8472        let x0 = array![10.0]; // Start far from the wall and minimum.
8473        let result = Bfgs::new(x0, bfgs_oracle(wall_with_minimum)).run();
8474        assert!(result.is_ok() || matches!(result, Err(BfgsError::MaxIterationsReached { .. })));
8475    }
8476
8477    #[test]
8478    fn test_trust_region_projection_uses_actual_step() {
8479        let x0 = array![0.9];
8480        let lower = array![0.0];
8481        let upper = array![1.0];
8482        let mut core = super::BfgsCore::new(x0.clone());
8483        core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
8484        core.trust_radius = 10.0;
8485        let fg = |x: &Array1<f64>| {
8486            let f = (x[0] - 2.0).powi(2);
8487            let g = array![2.0 * (x[0] - 2.0)];
8488            (f, g)
8489        };
8490        let mut obj = bfgs_oracle(fg);
8491        let x_k = core.project_point(&x0);
8492        let (f_k, g_k) = fg(&x_k);
8493        let mut b_inv = Array2::eye(1);
8494        let mut oracle = super::FirstOrderCache::new(x0.len());
8495        let mut func_evals = 0;
8496        let mut grad_evals = 0;
8497        let res = core.try_trust_region_step(
8498            &mut obj,
8499            &mut oracle,
8500            &mut b_inv,
8501            &x_k,
8502            f_k,
8503            &g_k,
8504            &mut func_evals,
8505            &mut grad_evals,
8506        );
8507        assert!(res.is_some());
8508        let (x_new, f_new, g_new) = res.unwrap();
8509        assert!((x_new[0] - 1.0).abs() < 1e-12);
8510        assert!(f_new.is_finite());
8511        assert!(g_new[0].is_finite());
8512    }
8513
8514    #[test]
8515    fn test_bfgs_trust_region_predicted_decrease_respects_active_mask() {
8516        let core = super::BfgsCore::new(array![0.0, 0.0]);
8517        let b_inv = array![[2.0, 1.0], [1.0, 2.0]];
8518        let g_proj = array![0.0, -1.0];
8519        let s = array![0.0, 1.0];
8520        let active = vec![true, false];
8521
8522        let pred = core
8523            .trust_region_predicted_decrease(&b_inv, &g_proj, &s, Some(&active))
8524            .expect("masked predicted decrease should be well-defined");
8525
8526        assert!(
8527            (pred - 0.75).abs() < 1e-9,
8528            "unexpected predicted decrease: {pred}"
8529        );
8530    }
8531
8532    #[test]
8533    fn test_bfgs_trust_region_fallback_freezes_active_bound_coordinates() {
8534        let x0 = array![0.0, 0.0];
8535        let lower = array![0.0, -10.0];
8536        let upper = array![10.0, 10.0];
8537        let mut core = super::BfgsCore::new(x0.clone());
8538        core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
8539        core.trust_radius = 10.0;
8540
8541        let fg = |x: &Array1<f64>| {
8542            let f = (x[0] + 1.0).powi(2) + (x[1] - 2.0).powi(2);
8543            let g = array![2.0 * (x[0] + 1.0), 2.0 * (x[1] - 2.0)];
8544            (f, g)
8545        };
8546
8547        let mut obj = bfgs_oracle(fg);
8548        let x_k = core.project_point(&x0);
8549        let (f_k, g_k) = fg(&x_k);
8550        let active = core.active_mask(&x_k, &g_k);
8551        assert_eq!(active, vec![true, false]);
8552
8553        let mut b_inv = array![[5.0, 1.0], [1.0, 0.5]];
8554        let mut oracle = super::FirstOrderCache::new(x0.len());
8555        let mut func_evals = 0;
8556        let mut grad_evals = 0;
8557        let res = core.try_trust_region_step(
8558            &mut obj,
8559            &mut oracle,
8560            &mut b_inv,
8561            &x_k,
8562            f_k,
8563            &g_k,
8564            &mut func_evals,
8565            &mut grad_evals,
8566        );
8567
8568        assert!(
8569            res.is_some(),
8570            "masked trust-region fallback should produce a feasible step"
8571        );
8572        let (x_new, f_new, g_new) = res.unwrap();
8573        assert!(
8574            x_new[0].abs() < 1e-12,
8575            "active coordinate moved: {:?}",
8576            x_new
8577        );
8578        assert!(x_new[1] > x_k[1]);
8579        assert!(f_new < f_k);
8580        assert!(g_new.iter().all(|v| v.is_finite()));
8581    }
8582
8583    #[test]
8584    fn test_flat_with_noise_accepts() {
8585        let f = |x: &Array1<f64>| {
8586            let noise = (x.sum() * 1e6).sin() * 1e-12;
8587            let val = 1.0 + noise;
8588            let g = Array1::from_vec(vec![1e-12; x.len()]);
8589            (val, g)
8590        };
8591        let x0 = array![0.0, 0.0];
8592        let res = Bfgs::new(x0, bfgs_oracle(f))
8593            .with_tolerance(tol(1e-10))
8594            .run();
8595        assert!(res.is_ok() || matches!(res, Err(super::BfgsError::MaxIterationsReached { .. })));
8596    }
8597
8598    #[test]
8599    fn test_piecewise_alpha_jump() {
8600        let f = |x: &Array1<f64>| {
8601            let r = x.dot(x).sqrt();
8602            let val = if r < 1.0 { 1.0 } else { 0.9 };
8603            let g = if r < 1.0 {
8604                Array1::zeros(x.len())
8605            } else {
8606                x.mapv(|v| 1e-6 * v)
8607            };
8608            (val, g)
8609        };
8610        let x0 = array![0.5, 0.5];
8611        let res = Bfgs::new(x0, bfgs_oracle(f)).run();
8612        assert!(res.is_ok() || matches!(res, Err(super::BfgsError::MaxIterationsReached { .. })));
8613    }
8614
8615    #[test]
8616    fn test_rng_symmetry() {
8617        // Ensure the internal RNG produces a roughly symmetric distribution.
8618        let x0 = array![0.0];
8619        let f = |x: &Array1<f64>| (x[0], array![1.0]);
8620        let mut solver = super::Bfgs::new(x0, bfgs_oracle(f));
8621        solver.core.rng_state = 12345;
8622        let mut sum = 0.0f64;
8623        let n = 20_000;
8624        for _ in 0..n {
8625            sum += solver.next_rand_sym();
8626        }
8627        let mean = sum / (n as f64);
8628        assert_that!(&mean.abs()).is_less_than(5e-3);
8629    }
8630}