1#![allow(non_snake_case)]
2use ndarray::{Array1, Array2};
101use std::collections::VecDeque;
102
103const EPS: f64 = f64::EPSILON;
105#[inline]
106fn eps_f(fk: f64, tau: f64) -> f64 {
107 tau * EPS * (1.0 + fk.abs())
108}
109#[inline]
110fn eps_g(gk: &Array1<f64>, dk: &Array1<f64>, tau: f64) -> f64 {
111 tau * EPS * gk.dot(gk).sqrt() * dk.dot(dk).sqrt()
112}
113
114#[inline]
115fn directional_derivative(g: &Array1<f64>, s: &Array1<f64>, alpha: f64, d: &Array1<f64>) -> f64 {
116 if alpha > 0.0 {
117 g.dot(s) / alpha
118 } else {
119 g.dot(d)
120 }
121}
122
123#[inline]
124fn classify_line_search_accept(
125 core: &BfgsCore,
126 step_ok: bool,
127 f_k: f64,
128 fmax: f64,
129 f_trial: f64,
130 gk_ts: f64,
131 g_trial_dot_d: f64,
132 gk_dot_d_eff: f64,
133 g_trial_norm: f64,
134 gk_norm: f64,
135 drop_factor: f64,
136 eps_f_k: f64,
137 eps_g_k: f64,
138 c2: f64,
139) -> Option<AcceptKind> {
140 if !step_ok {
141 return None;
142 }
143 let armijo_ok = core.accept_armijo(f_k, gk_ts, f_trial);
144 let gll_ok = core.accept_gll_nonmonotone(fmax, gk_ts, f_trial);
145 let dir_ok = g_trial_dot_d <= -eps_g_k;
146 let strong_curv_ok = g_trial_dot_d.abs() <= c2 * gk_dot_d_eff.abs();
147 let approx_curv_ok =
148 g_trial_dot_d.abs() <= c2 * gk_dot_d_eff.abs() + core.curv_slack_scale * eps_g_k;
149 let f_flat_ok = f_trial <= f_k + eps_f_k;
150
151 if armijo_ok && strong_curv_ok {
152 Some(AcceptKind::StrongWolfe)
153 } else if armijo_ok && core.relaxed_acceptors_enabled() && f_flat_ok && approx_curv_ok && dir_ok
154 {
155 Some(AcceptKind::ApproxWolfe)
156 } else if gll_ok && approx_curv_ok {
157 Some(AcceptKind::Nonmonotone)
158 } else if core.relaxed_acceptors_enabled()
159 && f_flat_ok
160 && g_trial_norm <= drop_factor * gk_norm
161 && dir_ok
162 {
163 Some(AcceptKind::GradDrop)
164 } else {
165 None
166 }
167}
168
169#[inline]
170fn any_free_variables(active: &[bool]) -> bool {
171 active.iter().any(|&is_active| !is_active)
172}
173
174fn mask_vector_inplace(v: &mut Array1<f64>, active: &[bool]) {
175 for (vi, &is_active) in v.iter_mut().zip(active.iter()) {
176 if is_active {
177 *vi = 0.0;
178 }
179 }
180}
181
182fn masked_hv_inplace(h: &Array2<f64>, v: &Array1<f64>, active: &[bool], out: &mut Array1<f64>) {
183 out.fill(0.0);
184 for i in 0..h.nrows() {
185 if active[i] {
186 continue;
187 }
188 let mut accum = 0.0;
189 for j in 0..h.ncols() {
190 if active[j] {
191 continue;
192 }
193 accum += h[[i, j]] * v[j];
194 }
195 out[i] = accum;
196 }
197}
198
199fn cg_solve_masked_adaptive(
200 a: &Array2<f64>,
201 b: &Array1<f64>,
202 active: &[bool],
203 max_iter: usize,
204 tol_rel: f64,
205 ridge: f64,
206) -> Option<Array1<f64>> {
207 if a.nrows() != a.ncols() || a.nrows() != b.len() || active.len() != b.len() {
208 return None;
209 }
210 if !any_free_variables(active) {
211 return Some(Array1::zeros(b.len()));
212 }
213 if prefer_dense_direct(b.len()) {
214 let (effective_a, effective_b) = build_masked_subproblem_system(a, b, Some(active));
215 return dense_solve_shifted(&effective_a, &effective_b, ridge);
216 }
217
218 let n = b.len();
219 let mut x = Array1::<f64>::zeros(n);
220 let mut r = b.clone();
221 mask_vector_inplace(&mut r, active);
222 let b_norm = r.dot(&r).sqrt();
223 if !b_norm.is_finite() {
224 return None;
225 }
226 if b_norm <= 1e-32 {
227 return Some(x);
228 }
229 let tol_abs = tol_rel.max(0.0) * b_norm.max(1e-16);
230 let mut p = r.clone();
231 let mut rs_old = r.dot(&r);
232 let mut ap = Array1::<f64>::zeros(n);
233
234 for _ in 0..max_iter {
235 masked_hv_inplace(a, &p, active, &mut ap);
236 if ridge > 0.0 {
237 for i in 0..n {
238 ap[i] += ridge * p[i];
239 }
240 }
241 let p_ap = p.dot(&ap);
242 if !p_ap.is_finite() || p_ap <= 0.0 {
243 return None;
244 }
245 let alpha = rs_old / p_ap;
246 if !alpha.is_finite() {
247 return None;
248 }
249 x.scaled_add(alpha, &p);
250 r.scaled_add(-alpha, &ap);
251 mask_vector_inplace(&mut x, active);
252 mask_vector_inplace(&mut r, active);
253 let rs_new = r.dot(&r);
254 if !rs_new.is_finite() {
255 return None;
256 }
257 if rs_new.sqrt() <= tol_abs {
258 return Some(x);
259 }
260 let beta = rs_new / rs_old;
261 if !beta.is_finite() || beta < 0.0 {
262 return None;
263 }
264 p *= beta;
265 p += &r;
266 mask_vector_inplace(&mut p, active);
267 rs_old = rs_new;
268 }
269 Some(x)
270}
271
272fn bfgs_eval_cost<ObjFn>(
273 oracle: &mut FirstOrderCache,
274 obj_fn: &mut ObjFn,
275 x: &Array1<f64>,
276 func_evals: &mut usize,
277) -> Result<f64, ObjectiveEvalError>
278where
279 ObjFn: FirstOrderObjective,
280{
281 oracle.eval_cost(obj_fn, x, func_evals)
282}
283
284fn bfgs_eval_cost_grad<ObjFn>(
285 oracle: &mut FirstOrderCache,
286 obj_fn: &mut ObjFn,
287 x: &Array1<f64>,
288 func_evals: &mut usize,
289 grad_evals: &mut usize,
290) -> Result<(f64, Array1<f64>), ObjectiveEvalError>
291where
292 ObjFn: FirstOrderObjective,
293{
294 oracle.eval_cost_grad(obj_fn, x, func_evals, grad_evals)
295}
296
297struct GllWindow {
299 buf: VecDeque<f64>,
300 cap: usize,
301}
302impl GllWindow {
303 fn new(cap: usize) -> Self {
304 Self {
305 buf: VecDeque::with_capacity(cap.max(1)),
306 cap: cap.max(1),
307 }
308 }
309 fn clear(&mut self) {
310 self.buf.clear();
311 }
312 fn push(&mut self, f: f64) {
313 if self.buf.len() == self.cap {
314 self.buf.pop_front();
315 }
316 self.buf.push_back(f);
317 }
318 fn fmax(&self) -> f64 {
319 self.buf.iter().cloned().fold(f64::NEG_INFINITY, f64::max)
320 }
321 fn is_empty(&self) -> bool {
322 self.buf.is_empty()
323 }
324 fn set_cap(&mut self, cap: usize) {
325 self.cap = cap.max(1);
326 while self.buf.len() > self.cap {
327 self.buf.pop_front();
328 }
329 }
330}
331
332#[derive(Clone)]
334struct ProbeBest {
335 f: f64,
336 x: Array1<f64>,
337 g: Array1<f64>,
338}
339impl ProbeBest {
340 fn new(x0: &Array1<f64>, f0: f64, g0: &Array1<f64>) -> Self {
341 Self {
342 x: x0.clone(),
343 f: f0,
344 g: g0.clone(),
345 }
346 }
347 fn consider(&mut self, x: &Array1<f64>, f: f64, g: &Array1<f64>) {
348 if !f.is_finite() || g.iter().any(|v| !v.is_finite()) {
349 return;
350 }
351 if !self.f.is_finite() || f < self.f {
352 self.f = f;
353 self.x = x.clone();
354 self.g = g.clone();
355 }
356 }
357}
358
359struct CgResult {
360 x: Array1<f64>,
361 rel_resid: f64,
362}
363
364fn cg_solve_from(
366 a: &Array2<f64>,
367 b: &Array1<f64>,
368 x0: Array1<f64>,
369 max_iter: usize,
370 tol: f64,
371 ridge: f64,
372) -> Option<CgResult> {
373 let n = a.nrows();
374 if a.ncols() != n || b.len() != n {
375 return None;
376 }
377 let mut x = x0;
378 let mut ax = a.dot(&x);
379 if ridge > 0.0 {
380 for i in 0..n {
381 ax[i] += ridge * x[i];
382 }
383 }
384 let mut r = b - &ax;
385 let mut p = r.clone();
386 let mut rs_old = r.dot(&r);
387 if !rs_old.is_finite() {
388 return None;
389 }
390 let b_norm = b.dot(b).sqrt().max(1.0);
391 let tol_abs = tol * b_norm;
392 if rs_old.sqrt() <= tol_abs {
393 return Some(CgResult {
394 x,
395 rel_resid: rs_old.sqrt() / b_norm,
396 });
397 }
398 for _ in 0..max_iter {
399 let mut ap = a.dot(&p);
400 if ridge > 0.0 {
401 for i in 0..n {
402 ap[i] += ridge * p[i];
403 }
404 }
405 let p_ap = p.dot(&ap);
406 if !p_ap.is_finite() || p_ap <= 0.0 {
407 return None;
408 }
409 let alpha = rs_old / p_ap;
410 if !alpha.is_finite() {
411 return None;
412 }
413 x.scaled_add(alpha, &p);
414 r.scaled_add(-alpha, &ap);
415 let rs_new = r.dot(&r);
416 if !rs_new.is_finite() {
417 return None;
418 }
419 if rs_new.sqrt() <= tol_abs {
420 return Some(CgResult {
421 x,
422 rel_resid: rs_new.sqrt() / b_norm,
423 });
424 }
425 let beta = rs_new / rs_old;
426 p *= beta;
427 p += &r;
428 rs_old = rs_new;
429 }
430 Some(CgResult {
431 x,
432 rel_resid: rs_old.sqrt() / b_norm,
433 })
434}
435
436fn dense_solve_shifted(a: &Array2<f64>, b: &Array1<f64>, ridge: f64) -> Option<Array1<f64>> {
437 let n = a.nrows();
438 if a.ncols() != n || b.len() != n {
439 return None;
440 }
441 let mut mat = a.clone();
442 if ridge > 0.0 {
443 for i in 0..n {
444 mat[[i, i]] += ridge;
445 }
446 }
447 let mut rhs = b.clone();
448
449 for k in 0..n {
450 let mut pivot_row = k;
451 let mut pivot_abs = mat[[k, k]].abs();
452 for i in (k + 1)..n {
453 let cand = mat[[i, k]].abs();
454 if cand > pivot_abs {
455 pivot_abs = cand;
456 pivot_row = i;
457 }
458 }
459 if !pivot_abs.is_finite() || pivot_abs <= 1e-14 {
460 return None;
461 }
462 if pivot_row != k {
463 for j in k..n {
464 let tmp = mat[[k, j]];
465 mat[[k, j]] = mat[[pivot_row, j]];
466 mat[[pivot_row, j]] = tmp;
467 }
468 let tmp_rhs = rhs[k];
469 rhs[k] = rhs[pivot_row];
470 rhs[pivot_row] = tmp_rhs;
471 }
472
473 let pivot = mat[[k, k]];
474 for i in (k + 1)..n {
475 let factor = mat[[i, k]] / pivot;
476 mat[[i, k]] = 0.0;
477 for j in (k + 1)..n {
478 mat[[i, j]] -= factor * mat[[k, j]];
479 }
480 rhs[i] -= factor * rhs[k];
481 }
482 }
483
484 let mut x = Array1::<f64>::zeros(n);
485 for ii in 0..n {
486 let i = n - 1 - ii;
487 let mut sum = rhs[i];
488 for j in (i + 1)..n {
489 sum -= mat[[i, j]] * x[j];
490 }
491 let diag = mat[[i, i]];
492 if !diag.is_finite() || diag.abs() <= 1e-14 {
493 return None;
494 }
495 x[i] = sum / diag;
496 }
497 if x.iter().all(|v| v.is_finite()) {
498 Some(x)
499 } else {
500 None
501 }
502}
503
504#[inline]
505fn prefer_dense_direct(n: usize) -> bool {
506 n <= 128
507}
508
509fn build_masked_subproblem_system(
510 h: &Array2<f64>,
511 rhs: &Array1<f64>,
512 active: Option<&[bool]>,
513) -> (Array2<f64>, Array1<f64>) {
514 let mut effective_h = h.clone();
515 let mut effective_rhs = rhs.clone();
516 if let Some(active) = active
517 && !active.is_empty()
518 {
519 for i in 0..active.len() {
520 if active[i] {
521 effective_rhs[i] = 0.0;
522 for j in 0..active.len() {
523 effective_h[[i, j]] = 0.0;
524 effective_h[[j, i]] = 0.0;
525 }
526 effective_h[[i, i]] = 1.0;
527 }
528 }
529 }
530 (effective_h, effective_rhs)
531}
532
533fn dense_trust_region_step(
534 h: &Array2<f64>,
535 g: &Array1<f64>,
536 delta: f64,
537 active: Option<&[bool]>,
538) -> Option<(Array1<f64>, f64)> {
539 let rhs = -g.clone();
540 let (effective_h, effective_rhs) = build_masked_subproblem_system(h, &rhs, active);
541 let solve_with_shift = |lambda: f64| dense_solve_shifted(&effective_h, &effective_rhs, lambda);
542 let predicted = |s: &Array1<f64>| {
543 let hs = h.dot(s);
544 -(g.dot(s) + 0.5 * s.dot(&hs))
545 };
546
547 if let Some(s) = solve_with_shift(0.0) {
548 let s_norm = s.dot(&s).sqrt();
549 let pred = predicted(&s);
550 if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
551 return Some((s, pred));
552 }
553 }
554
555 let mut lambda_lo = 0.0;
556 let mut lambda_hi = 1e-8f64;
557 let mut best: Option<(Array1<f64>, f64)> = None;
558 for _ in 0..80 {
559 match solve_with_shift(lambda_hi) {
560 Some(s) => {
561 let s_norm = s.dot(&s).sqrt();
562 let pred = predicted(&s);
563 if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
564 best = Some((s, pred));
565 break;
566 }
567 }
568 None => {}
569 }
570 lambda_lo = lambda_hi;
571 lambda_hi *= 2.0;
572 }
573 let (mut best_step, mut best_pred) = best?;
574 for _ in 0..80 {
575 let lambda_mid = 0.5 * (lambda_lo + lambda_hi);
576 if !lambda_mid.is_finite() || (lambda_hi - lambda_lo) <= 1e-12 * lambda_hi.max(1.0) {
577 break;
578 }
579 match solve_with_shift(lambda_mid) {
580 Some(s) => {
581 let s_norm = s.dot(&s).sqrt();
582 let pred = predicted(&s);
583 if s_norm.is_finite() && s_norm <= delta && pred.is_finite() && pred > 0.0 {
584 lambda_hi = lambda_mid;
585 best_step = s;
586 best_pred = pred;
587 } else {
588 lambda_lo = lambda_mid;
589 }
590 }
591 None => {
592 lambda_lo = lambda_mid;
593 }
594 }
595 }
596 Some((best_step, best_pred))
597}
598
599fn cg_iter_cap(n: usize, base: usize) -> usize {
601 let full_solve_n = 128usize;
602 let cap = 200usize;
603 if n <= full_solve_n {
604 n.max(1)
605 } else {
606 n.min(cap).max(base)
607 }
608}
609
610fn cg_solve_adaptive(
612 a: &Array2<f64>,
613 b: &Array1<f64>,
614 base_iter: usize,
615 tol: f64,
616 ridge: f64,
617) -> Option<Array1<f64>> {
618 let n = a.nrows();
619 if prefer_dense_direct(n) {
620 return dense_solve_shifted(a, b, ridge);
621 }
622 let cap1 = cg_iter_cap(n, base_iter);
623 let stage1 = cg_solve_from(a, b, Array1::<f64>::zeros(n), cap1, tol, ridge)?;
624 if stage1.rel_resid.is_finite() && stage1.rel_resid <= tol * 10.0 {
625 return Some(stage1.x);
626 }
627 let cap2 = cg_iter_cap(n, base_iter.saturating_mul(2));
628 if cap2 <= cap1 {
629 return Some(stage1.x);
630 }
631 let refine_iters = cap2.saturating_sub(cap1).max(1);
632 let stage2 = cg_solve_from(a, b, stage1.x, refine_iters, tol * 0.1, ridge)?;
633 Some(stage2.x)
634}
635
636fn scaled_identity(n: usize, lambda: f64) -> Array2<f64> {
638 Array2::<f64>::eye(n) * lambda
639}
640
641fn hessian_is_effectively_symmetric(a: &Array2<f64>) -> bool {
642 let n = a.nrows();
643 let mut max_skew = 0.0f64;
644 let mut scale = 0.0f64;
645 for i in 0..n {
646 for j in (i + 1)..n {
647 let aij = a[[i, j]];
648 let aji = a[[j, i]];
649 max_skew = max_skew.max((aij - aji).abs());
650 scale = scale.max(aij.abs()).max(aji.abs());
651 }
652 }
653 max_skew <= 1e-12 * (1.0 + scale)
654}
655
656fn symmetrize_into(workspace: &mut Array2<f64>, a: &Array2<f64>) {
657 workspace.assign(a);
658 let n = a.nrows();
659 for i in 0..n {
660 for j in (i + 1)..n {
661 let v = 0.5 * (a[[i, j]] + a[[j, i]]);
662 workspace[[i, j]] = v;
663 workspace[[j, i]] = v;
664 }
665 }
666}
667
668fn has_finite_positive_diagonal(a: &Array2<f64>) -> bool {
669 for i in 0..a.nrows() {
670 let diag = a[[i, i]];
671 if !diag.is_finite() || diag <= 0.0 {
672 return false;
673 }
674 }
675 true
676}
677
678fn apply_inverse_bfgs_update_in_place(
679 h_inv: &mut Array2<f64>,
680 s: &Array1<f64>,
681 y: &Array1<f64>,
682 backup: &mut Array2<f64>,
683) -> bool {
684 backup.assign(h_inv);
685 let rho = 1.0 / s.dot(y);
686 let hy = backup.dot(y);
687 let yhy = y.dot(&hy);
688 let coeff = (1.0 + yhy * rho) * rho;
689 let n = h_inv.nrows();
690 for i in 0..n {
691 for j in i..n {
692 let v = backup[[i, j]] + coeff * s[i] * s[j] - rho * (hy[i] * s[j] + s[i] * hy[j]);
693 h_inv[[i, j]] = v;
694 h_inv[[j, i]] = v;
695 }
696 }
697 has_finite_positive_diagonal(h_inv)
698}
699
700#[derive(Clone)]
702struct BoxSpec {
703 lower: Array1<f64>,
704 upper: Array1<f64>,
705 tol: f64,
706}
707
708impl BoxSpec {
709 fn new(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Self {
710 Self { lower, upper, tol }
711 }
712
713 fn project(&self, x: &Array1<f64>) -> Array1<f64> {
714 let mut z = x.clone();
715 for i in 0..z.len() {
716 let lo = self.lower[i];
717 let hi = self.upper[i];
718 if z[i] < lo {
719 z[i] = lo;
720 } else if z[i] > hi {
721 z[i] = hi;
722 }
723 }
724 z
725 }
726
727 fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
728 let mut mask = vec![false; x.len()];
729 for i in 0..x.len() {
730 let lo = self.lower[i];
731 let hi = self.upper[i];
732 let tol = self.tol;
733 let at_lower = x[i] <= lo + tol;
734 let at_upper = x[i] >= hi - tol;
735 mask[i] = (at_lower && g[i] >= 0.0) || (at_upper && g[i] <= 0.0);
736 }
737 mask
738 }
739
740 fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
741 let mut gp = g.clone();
742 for i in 0..x.len() {
743 let lo = self.lower[i];
744 let hi = self.upper[i];
745 let tol = self.tol;
746 let at_lower = x[i] <= lo + tol;
747 let at_upper = x[i] >= hi - tol;
748 if (at_lower && g[i] >= 0.0) || (at_upper && g[i] <= 0.0) {
749 gp[i] = 0.0;
750 }
751 }
752 gp
753 }
754}
755
756#[derive(Debug, thiserror::Error)]
757pub enum BoundsError {
758 #[error("lower/upper lengths differ")]
759 DimensionMismatch,
760 #[error("lower bound exceeds upper bound at index {index}")]
761 InvertedInterval { index: usize },
762 #[error("bound tolerance must be finite and >= 0")]
763 InvalidTolerance,
764}
765
766#[derive(Clone)]
767pub struct Bounds {
768 spec: BoxSpec,
769}
770
771impl Bounds {
772 pub fn new(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Result<Self, BoundsError> {
773 if lower.len() != upper.len() {
774 return Err(BoundsError::DimensionMismatch);
775 }
776 for i in 0..lower.len() {
777 if lower[i] > upper[i] {
778 return Err(BoundsError::InvertedInterval { index: i });
779 }
780 }
781 if !tol.is_finite() || tol < 0.0 {
782 return Err(BoundsError::InvalidTolerance);
783 }
784 Ok(Self {
785 spec: BoxSpec::new(lower, upper, tol),
786 })
787 }
788}
789
790#[derive(Debug, Clone, Copy, PartialEq)]
791enum FiniteDiffStencil {
792 Central { h: f64 },
793 Forward { h: f64 },
794 Backward { h: f64 },
795 Fixed,
796}
797
798fn finite_difference_stencil(
799 bounds: Option<&BoxSpec>,
800 x: &Array1<f64>,
801 i: usize,
802 base_h: f64,
803) -> FiniteDiffStencil {
804 if !base_h.is_finite() || base_h <= 0.0 {
805 return FiniteDiffStencil::Fixed;
806 }
807 if let Some(bounds) = bounds {
808 let room_lo = (x[i] - bounds.lower[i]).max(0.0);
809 let room_hi = (bounds.upper[i] - x[i]).max(0.0);
810 if room_lo >= base_h && room_hi >= base_h {
811 FiniteDiffStencil::Central { h: base_h }
812 } else if room_hi >= room_lo && room_hi > 0.0 {
813 FiniteDiffStencil::Forward {
814 h: base_h.min(room_hi),
815 }
816 } else if room_lo > 0.0 {
817 FiniteDiffStencil::Backward {
818 h: base_h.min(room_lo),
819 }
820 } else if room_hi > 0.0 {
821 FiniteDiffStencil::Forward {
822 h: base_h.min(room_hi),
823 }
824 } else {
825 FiniteDiffStencil::Fixed
826 }
827 } else {
828 FiniteDiffStencil::Central { h: base_h }
829 }
830}
831
832#[derive(Debug, Clone, Copy)]
834enum LineSearchStrategy {
835 StrongWolfe,
836 Backtracking,
837}
838
839#[derive(Debug, Clone, Copy, PartialEq, Eq)]
840enum FallbackPolicy {
841 Never,
842 AutoBfgs,
843}
844
845#[derive(Debug, Clone, Copy)]
846enum FlatStepPolicy {
847 Strict,
848 MidpointWithJiggle { scale: f64 },
849}
850
851#[derive(Debug, Clone, Copy)]
852enum RescuePolicy {
853 Off,
854 CoordinateHybrid { pool_mult: f64, heads: usize },
855}
856
857#[derive(Debug, Clone, Copy)]
858enum StallPolicy {
859 Off,
860 On { window: usize },
861}
862
863#[derive(Debug, Clone, Copy)]
864enum AcceptKind {
865 StrongWolfe,
866 ApproxWolfe,
867 Nonmonotone,
868 GradDrop,
869 TrustRegion,
870 Rescue,
871}
872
873#[derive(Debug)]
874enum LineSearchError {
875 MaxAttempts(usize),
876 StepSizeTooSmall,
877 ObjectiveFailed(String),
878}
879
880#[derive(Debug, Clone, Copy, PartialEq, Eq)]
881pub enum LineSearchFailureReason {
882 MaxAttempts,
883 StepSizeTooSmall,
884}
885
886type LsResult = Result<(f64, f64, Array1<f64>, usize, usize, AcceptKind), LineSearchError>;
887const WOLFE_MAX_ATTEMPTS: usize = 20;
888const BACKTRACKING_MAX_ATTEMPTS: usize = 50;
889
890#[derive(Debug, thiserror::Error)]
892pub enum BfgsError {
893 #[error("Internal invariant violated: {message}")]
894 InternalInvariant { message: String },
895 #[error("Objective evaluation failed: {message}")]
896 ObjectiveFailed { message: String },
897 #[error(
898 "The line search failed ({failure_reason:?}) after {max_attempts} attempts. The optimization landscape may be pathological."
899 )]
900 LineSearchFailed {
901 last_solution: Box<Solution>,
903 max_attempts: usize,
905 failure_reason: LineSearchFailureReason,
907 },
908 #[error(
909 "Maximum number of iterations reached without converging. The best solution found is returned."
910 )]
911 MaxIterationsReached {
912 last_solution: Box<Solution>,
914 },
915 #[error("The gradient norm was NaN or infinity, indicating numerical instability.")]
916 GradientIsNaN,
917 #[error(
918 "The line search step size became smaller than machine epsilon, indicating that the algorithm is stuck."
919 )]
920 StepSizeTooSmall,
921}
922
923#[derive(Debug, thiserror::Error)]
924pub enum ConfigError {
925 #[error("tolerance must be finite and > 0")]
926 InvalidTolerance,
927 #[error("max_iterations must be >= 1")]
928 InvalidMaxIterations,
929}
930
931#[derive(Debug, thiserror::Error)]
932pub enum MatrixError {
933 #[error("matrix must be square; got {rows}x{cols}")]
934 NonSquare { rows: usize, cols: usize },
935 #[error("matrix must be symmetric")]
936 NotSymmetric,
937}
938
939fn ensure_square(a: &Array2<f64>) -> Result<usize, MatrixError> {
940 if a.nrows() == a.ncols() {
941 Ok(a.nrows())
942 } else {
943 Err(MatrixError::NonSquare {
944 rows: a.nrows(),
945 cols: a.ncols(),
946 })
947 }
948}
949
950fn ensure_symmetric(a: &Array2<f64>) -> Result<(), MatrixError> {
951 let n = ensure_square(a)?;
952 for i in 0..n {
953 for j in 0..i {
954 if !a[[i, j]].is_finite()
955 || !a[[j, i]].is_finite()
956 || (a[[i, j]] - a[[j, i]]).abs()
957 > 1e-10 * (1.0 + a[[i, j]].abs().max(a[[j, i]].abs()))
958 {
959 return Err(MatrixError::NotSymmetric);
960 }
961 }
962 }
963 Ok(())
964}
965
966#[derive(Debug, Clone)]
967struct SymmetricMatrix {
968 data: Array2<f64>,
969}
970
971impl SymmetricMatrix {
972 fn from_verified(data: Array2<f64>) -> Self {
973 Self { data }
974 }
975
976 fn as_array(&self) -> &Array2<f64> {
977 &self.data
978 }
979}
980
981#[derive(Debug, Clone)]
982struct SpdInverseHessian {
983 data: SymmetricMatrix,
984}
985
986impl SpdInverseHessian {
987 fn from_verified(data: Array2<f64>) -> Self {
988 Self {
989 data: SymmetricMatrix::from_verified(data),
990 }
991 }
992
993 fn into_inner(self) -> Array2<f64> {
994 self.data.data
995 }
996}
997
998pub struct SymmetricHessianMut<'a> {
999 data: &'a mut Array2<f64>,
1000}
1001
1002impl<'a> SymmetricHessianMut<'a> {
1003 pub fn new(data: &'a mut Array2<f64>) -> Result<Self, MatrixError> {
1004 ensure_square(data)?;
1005 Ok(Self { data })
1006 }
1007
1008 pub fn fill(&mut self, value: f64) {
1009 self.data.fill(value);
1010 }
1011
1012 pub fn set(&mut self, i: usize, j: usize, value: f64) {
1013 self.data[[i, j]] = value;
1014 self.data[[j, i]] = value;
1015 }
1016
1017 pub fn assign_dense(&mut self, dense: &Array2<f64>) -> Result<(), MatrixError> {
1018 ensure_symmetric(dense)?;
1019 if dense.raw_dim() != self.data.raw_dim() {
1020 return Err(MatrixError::NonSquare {
1021 rows: dense.nrows(),
1022 cols: dense.ncols(),
1023 });
1024 }
1025 self.data.assign(dense);
1026 Ok(())
1027 }
1028}
1029
1030#[derive(Debug, Clone, Copy)]
1031pub struct Tolerance(f64);
1032
1033impl Tolerance {
1034 pub const DEFAULT: Self = Self(1e-5);
1035
1036 pub fn new(value: f64) -> Result<Self, ConfigError> {
1037 if value.is_finite() && value > 0.0 {
1038 Ok(Self(value))
1039 } else {
1040 Err(ConfigError::InvalidTolerance)
1041 }
1042 }
1043
1044 fn get(self) -> f64 {
1045 self.0
1046 }
1047}
1048
1049#[derive(Debug, Clone, Copy)]
1050pub struct MaxIterations(usize);
1051
1052impl MaxIterations {
1053 pub const DEFAULT: Self = Self(100);
1054
1055 pub fn new(value: usize) -> Result<Self, ConfigError> {
1056 if value >= 1 {
1057 Ok(Self(value))
1058 } else {
1059 Err(ConfigError::InvalidMaxIterations)
1060 }
1061 }
1062
1063 fn get(self) -> usize {
1064 self.0
1065 }
1066}
1067
1068#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1069pub enum Profile {
1070 Robust,
1071 Deterministic,
1072 Aggressive,
1073}
1074
1075#[derive(Debug, Clone)]
1076pub struct FirstOrderSample {
1077 pub value: f64,
1078 pub gradient: Array1<f64>,
1079}
1080
1081#[derive(Debug, Clone)]
1082pub struct SecondOrderSample {
1083 pub value: f64,
1084 pub gradient: Array1<f64>,
1085 pub hessian: Option<Array2<f64>>,
1086}
1087
1088#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1089pub enum FixedPointStatus {
1090 Continue,
1091 Stop,
1092}
1093
1094#[derive(Debug, Clone)]
1095pub struct FixedPointSample {
1096 pub value: f64,
1097 pub step: Array1<f64>,
1098 pub status: FixedPointStatus,
1099}
1100
1101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1102pub enum StationarityKind {
1103 ProjectedGradient,
1104 StepNorm,
1105}
1106
1107#[derive(Debug, Clone)]
1111pub struct Solution {
1112 pub final_point: Array1<f64>,
1114 pub final_value: f64,
1116 pub final_gradient: Option<Array1<f64>>,
1118 pub final_hessian: Option<Array2<f64>>,
1120 pub final_gradient_norm: Option<f64>,
1122 pub final_step_norm: Option<f64>,
1124 pub stationarity_kind: StationarityKind,
1126 pub iterations: usize,
1128 pub func_evals: usize,
1130 pub grad_evals: usize,
1132 pub hess_evals: usize,
1134}
1135
1136impl Solution {
1137 fn gradient_based(
1138 final_point: Array1<f64>,
1139 final_value: f64,
1140 final_gradient: Array1<f64>,
1141 final_gradient_norm: f64,
1142 final_hessian: Option<Array2<f64>>,
1143 iterations: usize,
1144 func_evals: usize,
1145 grad_evals: usize,
1146 hess_evals: usize,
1147 ) -> Self {
1148 Self {
1149 final_point,
1150 final_value,
1151 final_gradient: Some(final_gradient),
1152 final_hessian,
1153 final_gradient_norm: Some(final_gradient_norm),
1154 final_step_norm: None,
1155 stationarity_kind: StationarityKind::ProjectedGradient,
1156 iterations,
1157 func_evals,
1158 grad_evals,
1159 hess_evals,
1160 }
1161 }
1162
1163 fn fixed_point(
1164 final_point: Array1<f64>,
1165 final_value: f64,
1166 final_step_norm: f64,
1167 iterations: usize,
1168 func_evals: usize,
1169 ) -> Self {
1170 Self {
1171 final_point,
1172 final_value,
1173 final_gradient: None,
1174 final_hessian: None,
1175 final_gradient_norm: None,
1176 final_step_norm: Some(final_step_norm),
1177 stationarity_kind: StationarityKind::StepNorm,
1178 iterations,
1179 func_evals,
1180 grad_evals: 0,
1181 hess_evals: 0,
1182 }
1183 }
1184}
1185
1186#[derive(Debug, Clone)]
1187pub enum ObjectiveEvalError {
1188 Recoverable { message: String },
1189 Fatal { message: String },
1190}
1191
1192impl ObjectiveEvalError {
1193 pub fn recoverable(message: impl Into<String>) -> Self {
1194 Self::Recoverable {
1195 message: message.into(),
1196 }
1197 }
1198
1199 pub fn fatal(message: impl Into<String>) -> Self {
1200 Self::Fatal {
1201 message: message.into(),
1202 }
1203 }
1204}
1205
1206pub trait ZerothOrderObjective {
1207 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError>;
1208}
1209
1210pub trait FirstOrderObjective: ZerothOrderObjective {
1211 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError>;
1212
1213 fn set_finite_difference_bounds(&mut self, _bounds: Option<&Bounds>) {}
1214}
1215
1216pub trait SecondOrderObjective: FirstOrderObjective {
1217 fn eval_hessian(&mut self, x: &Array1<f64>) -> Result<SecondOrderSample, ObjectiveEvalError>;
1218}
1219
1220pub trait FixedPointObjective {
1221 fn eval_step(&mut self, x: &Array1<f64>) -> Result<FixedPointSample, ObjectiveEvalError>;
1222}
1223
1224pub struct FiniteDiffGradient<ObjFn> {
1225 inner: ObjFn,
1226 step: f64,
1227 bounds: Option<Bounds>,
1228}
1229
1230impl<ObjFn> FiniteDiffGradient<ObjFn> {
1231 pub fn new(inner: ObjFn) -> Self {
1232 Self {
1233 inner,
1234 step: 1e-4,
1235 bounds: None,
1236 }
1237 }
1238
1239 pub fn with_step(mut self, step: f64) -> Self {
1240 self.step = step;
1241 self
1242 }
1243
1244 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1245 self.bounds = Some(bounds);
1246 self
1247 }
1248}
1249
1250impl<ObjFn> ZerothOrderObjective for FiniteDiffGradient<ObjFn>
1251where
1252 ObjFn: ZerothOrderObjective,
1253{
1254 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
1255 self.inner.eval_cost(x)
1256 }
1257}
1258
1259impl<ObjFn> FirstOrderObjective for FiniteDiffGradient<ObjFn>
1260where
1261 ObjFn: ZerothOrderObjective,
1262{
1263 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
1264 if !self.step.is_finite() || self.step <= 0.0 {
1265 return Err(ObjectiveEvalError::fatal(
1266 "finite-difference gradient step must be positive and finite",
1267 ));
1268 }
1269 let value = recover_on_nonfinite_cost(self.inner.eval_cost(x)?)?;
1270 let mut gradient = Array1::<f64>::zeros(x.len());
1271 for i in 0..x.len() {
1272 let h = self.step * (1.0 + x[i].abs());
1273 match finite_difference_stencil(self.bounds.as_ref().map(|b| &b.spec), x, i, h) {
1274 FiniteDiffStencil::Central { h } => {
1275 let mut xp = x.clone();
1276 xp[i] += h;
1277 let fp = recover_on_nonfinite_cost(self.inner.eval_cost(&xp)?)?;
1278 let mut xm = x.clone();
1279 xm[i] -= h;
1280 let fm = recover_on_nonfinite_cost(self.inner.eval_cost(&xm)?)?;
1281 gradient[i] = (fp - fm) / (2.0 * h);
1282 }
1283 FiniteDiffStencil::Forward { h } => {
1284 let mut xp = x.clone();
1285 xp[i] += h;
1286 let fp = recover_on_nonfinite_cost(self.inner.eval_cost(&xp)?)?;
1287 gradient[i] = (fp - value) / h;
1288 }
1289 FiniteDiffStencil::Backward { h } => {
1290 let mut xm = x.clone();
1291 xm[i] -= h;
1292 let fm = recover_on_nonfinite_cost(self.inner.eval_cost(&xm)?)?;
1293 gradient[i] = (value - fm) / h;
1294 }
1295 FiniteDiffStencil::Fixed => {
1296 gradient[i] = 0.0;
1297 }
1298 }
1299 }
1300 Ok(FirstOrderSample { value, gradient })
1301 }
1302
1303 fn set_finite_difference_bounds(&mut self, bounds: Option<&Bounds>) {
1304 self.bounds = bounds.map(|bounds| Bounds {
1305 spec: bounds.spec.clone(),
1306 });
1307 }
1308}
1309
1310pub struct Problem<ObjFn> {
1311 x0: Array1<f64>,
1312 objective: ObjFn,
1313 bounds: Option<Bounds>,
1314 tolerance: Tolerance,
1315 max_iterations: MaxIterations,
1316 profile: Profile,
1317}
1318
1319impl<ObjFn> Problem<ObjFn>
1320where
1321 ObjFn: FirstOrderObjective,
1322{
1323 pub fn new(x0: Array1<f64>, objective: ObjFn) -> Self {
1324 Self {
1325 x0,
1326 objective,
1327 bounds: None,
1328 tolerance: Tolerance::DEFAULT,
1329 max_iterations: MaxIterations::DEFAULT,
1330 profile: Profile::Robust,
1331 }
1332 }
1333
1334 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1335 self.objective.set_finite_difference_bounds(Some(&bounds));
1336 self.bounds = Some(bounds);
1337 self
1338 }
1339
1340 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
1341 self.tolerance = tolerance;
1342 self
1343 }
1344
1345 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
1346 self.max_iterations = max_iterations;
1347 self
1348 }
1349
1350 pub fn with_profile(mut self, profile: Profile) -> Self {
1351 self.profile = profile;
1352 self
1353 }
1354}
1355
1356pub struct SecondOrderProblem<ObjFn> {
1357 x0: Array1<f64>,
1358 objective: ObjFn,
1359 bounds: Option<Bounds>,
1360 tolerance: Tolerance,
1361 max_iterations: MaxIterations,
1362 profile: Profile,
1363 fd_hessian_step: f64,
1364}
1365
1366impl<ObjFn> SecondOrderProblem<ObjFn>
1367where
1368 ObjFn: SecondOrderObjective,
1369{
1370 pub fn new(x0: Array1<f64>, objective: ObjFn) -> Self {
1371 Self {
1372 x0,
1373 objective,
1374 bounds: None,
1375 tolerance: Tolerance::DEFAULT,
1376 max_iterations: MaxIterations::DEFAULT,
1377 profile: Profile::Robust,
1378 fd_hessian_step: 1e-4,
1379 }
1380 }
1381
1382 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
1383 self.objective.set_finite_difference_bounds(Some(&bounds));
1384 self.bounds = Some(bounds);
1385 self
1386 }
1387
1388 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
1389 self.tolerance = tolerance;
1390 self
1391 }
1392
1393 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
1394 self.max_iterations = max_iterations;
1395 self
1396 }
1397
1398 pub fn with_profile(mut self, profile: Profile) -> Self {
1399 self.profile = profile;
1400 self
1401 }
1402
1403 pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
1404 self.fd_hessian_step = fd_hessian_step;
1405 self
1406 }
1407}
1408
1409pub enum AutoSecondOrderSolver<ObjFn> {
1410 NewtonTrustRegion(NewtonTrustRegion<ObjFn>),
1411 Arc(Arc<ObjFn>),
1412}
1413
1414impl<ObjFn> AutoSecondOrderSolver<ObjFn>
1415where
1416 ObjFn: SecondOrderObjective,
1417{
1418 pub fn run(&mut self) -> Result<Solution, AutoSecondOrderError> {
1419 match self {
1420 Self::NewtonTrustRegion(solver) => solver
1421 .run()
1422 .map_err(AutoSecondOrderError::NewtonTrustRegion),
1423 Self::Arc(solver) => solver.run().map_err(AutoSecondOrderError::Arc),
1424 }
1425 }
1426}
1427
1428#[derive(Debug, thiserror::Error)]
1429pub enum AutoSecondOrderError {
1430 #[error(transparent)]
1431 NewtonTrustRegion(#[from] NewtonTrustRegionError),
1432 #[error(transparent)]
1433 Arc(#[from] ArcError),
1434}
1435
1436#[doc(hidden)]
1437pub trait IntoAutoSolver {
1438 type Solver;
1439
1440 fn into_auto_solver(self) -> Self::Solver;
1441}
1442
1443impl<ObjFn> IntoAutoSolver for Problem<ObjFn>
1444where
1445 ObjFn: FirstOrderObjective,
1446{
1447 type Solver = Bfgs<ObjFn>;
1448
1449 fn into_auto_solver(self) -> Self::Solver {
1450 let mut solver = Bfgs::new(self.x0, self.objective)
1451 .with_tolerance(self.tolerance)
1452 .with_max_iterations(self.max_iterations)
1453 .with_profile(self.profile);
1454 if let Some(bounds) = self.bounds {
1455 solver = solver.with_bounds(bounds);
1456 }
1457 solver
1458 }
1459}
1460
1461impl<ObjFn> IntoAutoSolver for SecondOrderProblem<ObjFn>
1462where
1463 ObjFn: SecondOrderObjective,
1464{
1465 type Solver = AutoSecondOrderSolver<ObjFn>;
1466
1467 fn into_auto_solver(self) -> Self::Solver {
1468 let SecondOrderProblem {
1469 x0,
1470 objective,
1471 bounds,
1472 tolerance,
1473 max_iterations,
1474 profile,
1475 fd_hessian_step,
1476 } = self;
1477 let use_arc = matches!(profile, Profile::Aggressive);
1478 if use_arc {
1479 let mut solver = Arc::new(x0, objective)
1480 .with_tolerance(tolerance)
1481 .with_max_iterations(max_iterations)
1482 .with_profile(profile)
1483 .with_fd_hessian_step(fd_hessian_step);
1484 if let Some(bounds) = bounds {
1485 solver = solver.with_bounds(bounds);
1486 }
1487 AutoSecondOrderSolver::Arc(solver)
1488 } else {
1489 let mut solver = NewtonTrustRegion::new(x0, objective)
1490 .with_tolerance(tolerance)
1491 .with_max_iterations(max_iterations)
1492 .with_profile(profile)
1493 .with_fd_hessian_step(fd_hessian_step);
1494 if let Some(bounds) = bounds {
1495 solver = solver.with_bounds(bounds);
1496 }
1497 AutoSecondOrderSolver::NewtonTrustRegion(solver)
1498 }
1499 }
1500}
1501
1502pub fn optimize<P>(problem: P) -> P::Solver
1503where
1504 P: IntoAutoSolver,
1505{
1506 problem.into_auto_solver()
1507}
1508
1509const CACHE_POINT_EPS: f64 = 1e-14;
1510
1511#[inline]
1512fn approx_scalar(lhs: f64, rhs: f64) -> bool {
1513 (lhs - rhs).abs() <= CACHE_POINT_EPS * (1.0 + lhs.abs().max(rhs.abs()))
1514}
1515
1516#[inline]
1517fn approx_point(lhs: &Array1<f64>, rhs: &Array1<f64>) -> bool {
1518 lhs.len() == rhs.len()
1519 && lhs
1520 .iter()
1521 .zip(rhs.iter())
1522 .all(|(&l, &r)| approx_scalar(l, r))
1523}
1524
1525fn recover_on_nonfinite_cost(cost: f64) -> Result<f64, ObjectiveEvalError> {
1526 if cost.is_finite() {
1527 Ok(cost)
1528 } else {
1529 Err(ObjectiveEvalError::recoverable(
1530 "objective returned a non-finite cost",
1531 ))
1532 }
1533}
1534
1535fn recover_on_nonfinite_gradient(gradient: &Array1<f64>) -> Result<(), ObjectiveEvalError> {
1536 if gradient.iter().all(|value| value.is_finite()) {
1537 Ok(())
1538 } else {
1539 Err(ObjectiveEvalError::recoverable(
1540 "objective returned a non-finite gradient",
1541 ))
1542 }
1543}
1544
1545fn sanitize_first_order_sample(
1546 sample: FirstOrderSample,
1547) -> Result<FirstOrderSample, ObjectiveEvalError> {
1548 recover_on_nonfinite_cost(sample.value)?;
1549 recover_on_nonfinite_gradient(&sample.gradient)?;
1550 Ok(sample)
1551}
1552
1553fn sanitize_second_order_sample(
1554 sample: SecondOrderSample,
1555) -> Result<SecondOrderSample, ObjectiveEvalError> {
1556 let value = recover_on_nonfinite_cost(sample.value)?;
1557 recover_on_nonfinite_gradient(&sample.gradient)?;
1558 let hessian = sample
1559 .hessian
1560 .filter(|h| h.iter().all(|value| value.is_finite()));
1561 Ok(SecondOrderSample {
1562 value,
1563 gradient: sample.gradient,
1564 hessian,
1565 })
1566}
1567
1568struct BorrowedSecondOrderAsFirstOrder<'a, O> {
1569 inner: &'a mut O,
1570}
1571
1572impl<'a, O> BorrowedSecondOrderAsFirstOrder<'a, O> {
1573 fn new(inner: &'a mut O) -> Self {
1574 Self { inner }
1575 }
1576}
1577
1578impl<O> FirstOrderObjective for BorrowedSecondOrderAsFirstOrder<'_, O>
1579where
1580 O: SecondOrderObjective,
1581{
1582 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
1583 self.inner.eval_grad(x)
1584 }
1585
1586 fn set_finite_difference_bounds(&mut self, bounds: Option<&Bounds>) {
1587 self.inner.set_finite_difference_bounds(bounds);
1588 }
1589}
1590
1591impl<O> ZerothOrderObjective for BorrowedSecondOrderAsFirstOrder<'_, O>
1592where
1593 O: SecondOrderObjective,
1594{
1595 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
1596 self.inner.eval_cost(x)
1597 }
1598}
1599
1600struct FirstOrderCache {
1601 last_x: Option<Array1<f64>>,
1602 last_cost: Option<f64>,
1603 last_grad: Array1<f64>,
1604 have_last_grad: bool,
1605}
1606
1607impl FirstOrderCache {
1608 fn new(n: usize) -> Self {
1609 Self {
1610 last_x: None,
1611 last_cost: None,
1612 last_grad: Array1::zeros(n),
1613 have_last_grad: false,
1614 }
1615 }
1616
1617 fn eval_cost<ObjFn>(
1618 &mut self,
1619 obj_fn: &mut ObjFn,
1620 x: &Array1<f64>,
1621 func_evals: &mut usize,
1622 ) -> Result<f64, ObjectiveEvalError>
1623 where
1624 ObjFn: FirstOrderObjective,
1625 {
1626 if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1627 && approx_point(last_x, x)
1628 {
1629 return Ok(last_cost);
1630 }
1631 let cost = recover_on_nonfinite_cost(obj_fn.eval_cost(x)?)?;
1632 *func_evals += 1;
1633 self.last_x = Some(x.clone());
1634 self.last_cost = Some(cost);
1635 self.have_last_grad = false;
1636 Ok(cost)
1637 }
1638
1639 fn eval_cost_grad<ObjFn>(
1640 &mut self,
1641 obj_fn: &mut ObjFn,
1642 x: &Array1<f64>,
1643 func_evals: &mut usize,
1644 grad_evals: &mut usize,
1645 ) -> Result<(f64, Array1<f64>), ObjectiveEvalError>
1646 where
1647 ObjFn: FirstOrderObjective,
1648 {
1649 if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1650 && self.have_last_grad
1651 && approx_point(last_x, x)
1652 {
1653 return Ok((last_cost, self.last_grad.clone()));
1654 }
1655 let sample = sanitize_first_order_sample(obj_fn.eval_grad(x)?)?;
1656 *func_evals += 1;
1657 *grad_evals += 1;
1658 self.last_x = Some(x.clone());
1659 self.last_cost = Some(sample.value);
1660 self.last_grad.assign(&sample.gradient);
1661 self.have_last_grad = true;
1662 Ok((sample.value, self.last_grad.clone()))
1663 }
1664}
1665
1666struct SecondOrderCache {
1667 last_x: Option<Array1<f64>>,
1668 last_cost: Option<f64>,
1669 last_grad: Array1<f64>,
1670 last_hessian: SymmetricMatrix,
1671 have_last_sample: bool,
1672 fd_hessian_step: f64,
1673}
1674
1675impl SecondOrderCache {
1676 fn new(n: usize, fd_hessian_step: f64) -> Self {
1677 Self {
1678 last_x: None,
1679 last_cost: None,
1680 last_grad: Array1::zeros(n),
1681 last_hessian: SymmetricMatrix::from_verified(Array2::zeros((n, n))),
1682 have_last_sample: false,
1683 fd_hessian_step,
1684 }
1685 }
1686
1687 fn finite_difference_hessian<ObjFn>(
1688 &mut self,
1689 obj_fn: &mut ObjFn,
1690 x: &Array1<f64>,
1691 center_gradient: &Array1<f64>,
1692 bounds: Option<&BoxSpec>,
1693 func_evals: &mut usize,
1694 grad_evals: &mut usize,
1695 ) -> Result<Array2<f64>, ObjectiveEvalError>
1696 where
1697 ObjFn: SecondOrderObjective,
1698 {
1699 if !self.fd_hessian_step.is_finite() || self.fd_hessian_step <= 0.0 {
1700 return Err(ObjectiveEvalError::fatal(
1701 "finite-difference Hessian step must be positive and finite",
1702 ));
1703 }
1704 let n = x.len();
1705 let mut hessian = Array2::<f64>::zeros((n, n));
1706 for j in 0..n {
1707 let h = self.fd_hessian_step * (1.0 + x[j].abs());
1708 let column = match finite_difference_stencil(bounds, x, j, h) {
1709 FiniteDiffStencil::Central { h } => {
1710 let mut xp = x.clone();
1711 xp[j] += h;
1712 let gp = sanitize_first_order_sample(obj_fn.eval_grad(&xp)?)?;
1713 *func_evals += 1;
1714 *grad_evals += 1;
1715
1716 let mut xm = x.clone();
1717 xm[j] -= h;
1718 let gm = sanitize_first_order_sample(obj_fn.eval_grad(&xm)?)?;
1719 *func_evals += 1;
1720 *grad_evals += 1;
1721
1722 (&gp.gradient - &gm.gradient) / (2.0 * h)
1723 }
1724 FiniteDiffStencil::Forward { h } => {
1725 let mut xp = x.clone();
1726 xp[j] += h;
1727 let gp = sanitize_first_order_sample(obj_fn.eval_grad(&xp)?)?;
1728 *func_evals += 1;
1729 *grad_evals += 1;
1730 (&gp.gradient - center_gradient) / h
1731 }
1732 FiniteDiffStencil::Backward { h } => {
1733 let mut xm = x.clone();
1734 xm[j] -= h;
1735 let gm = sanitize_first_order_sample(obj_fn.eval_grad(&xm)?)?;
1736 *func_evals += 1;
1737 *grad_evals += 1;
1738 (center_gradient - &gm.gradient) / h
1739 }
1740 FiniteDiffStencil::Fixed => Array1::zeros(n),
1741 };
1742 hessian.column_mut(j).assign(&column);
1743 }
1744 Ok(0.5 * (&hessian + &hessian.t().to_owned()))
1745 }
1746
1747 fn eval_cost_grad_hessian<ObjFn>(
1748 &mut self,
1749 obj_fn: &mut ObjFn,
1750 x: &Array1<f64>,
1751 bounds: Option<&BoxSpec>,
1752 func_evals: &mut usize,
1753 grad_evals: &mut usize,
1754 hess_evals: &mut usize,
1755 ) -> Result<(f64, Array1<f64>, Array2<f64>), ObjectiveEvalError>
1756 where
1757 ObjFn: SecondOrderObjective,
1758 {
1759 if let (Some(last_x), Some(last_cost)) = (&self.last_x, self.last_cost)
1760 && self.have_last_sample
1761 && approx_point(last_x, x)
1762 {
1763 return Ok((
1764 last_cost,
1765 self.last_grad.clone(),
1766 self.last_hessian.as_array().clone(),
1767 ));
1768 }
1769 let sample = sanitize_second_order_sample(obj_fn.eval_hessian(x)?)?;
1770 *func_evals += 1;
1771 *grad_evals += 1;
1772 let hessian = match sample.hessian {
1773 Some(hessian) => {
1774 *hess_evals += 1;
1775 hessian
1776 }
1777 None => self.finite_difference_hessian(
1778 obj_fn,
1779 x,
1780 &sample.gradient,
1781 bounds,
1782 func_evals,
1783 grad_evals,
1784 )?,
1785 };
1786 self.last_x = Some(x.clone());
1787 self.last_cost = Some(sample.value);
1788 self.last_grad.assign(&sample.gradient);
1789 self.last_hessian = SymmetricMatrix::from_verified(hessian.clone());
1790 self.have_last_sample = true;
1791 Ok((sample.value, self.last_grad.clone(), hessian))
1792 }
1793}
1794
1795#[derive(Debug, thiserror::Error)]
1796pub enum NewtonTrustRegionError {
1797 #[error(
1798 "Objective returned a Hessian with shape {got_rows}x{got_cols}; expected {expected}x{expected}"
1799 )]
1800 HessianShapeMismatch {
1801 expected: usize,
1802 got_rows: usize,
1803 got_cols: usize,
1804 },
1805 #[error("Objective returned non-finite values.")]
1806 NonFiniteObjective,
1807 #[error("Objective evaluation failed: {message}")]
1808 ObjectiveFailed { message: String },
1809 #[error("Failed to form a positive-definite trust-region model Hessian.")]
1810 ModelHessianNotSpd,
1811 #[error(
1812 "Maximum number of iterations reached without converging. The best solution found is returned."
1813 )]
1814 MaxIterationsReached { last_solution: Box<Solution> },
1815}
1816
1817struct NewtonTrustRegionCore {
1818 x0: Array1<f64>,
1819 tolerance: f64,
1820 max_iterations: usize,
1821 fd_hessian_step: f64,
1822 bounds: Option<BoxSpec>,
1823 trust_radius: f64,
1824 trust_radius_max: f64,
1825 eta_accept: f64,
1826 fallback_policy: FallbackPolicy,
1827 history_cap: usize,
1828}
1829
1830pub struct NewtonTrustRegion<ObjFn> {
1831 core: NewtonTrustRegionCore,
1832 obj_fn: ObjFn,
1833}
1834
1835#[derive(Debug, thiserror::Error)]
1836pub enum ArcError {
1837 #[error(
1838 "Objective returned a Hessian with shape {got_rows}x{got_cols}; expected {expected}x{expected}"
1839 )]
1840 HessianShapeMismatch {
1841 expected: usize,
1842 got_rows: usize,
1843 got_cols: usize,
1844 },
1845 #[error("Objective returned non-finite values.")]
1846 NonFiniteObjective,
1847 #[error("Objective evaluation failed: {message}")]
1848 ObjectiveFailed { message: String },
1849 #[error("ARC subproblem solver failed to produce a usable step.")]
1850 SubproblemFailed,
1851 #[error(
1852 "Maximum number of iterations reached without converging. The best solution found is returned."
1853 )]
1854 MaxIterationsReached { last_solution: Box<Solution> },
1855}
1856
1857struct ArcCore {
1858 x0: Array1<f64>,
1859 tolerance: f64,
1860 max_iterations: usize,
1861 fd_hessian_step: f64,
1862 bounds: Option<BoxSpec>,
1863 theta: f64,
1864 sigma: f64,
1865 sigma_min: f64,
1866 sigma_max: f64,
1867 eta1: f64,
1868 eta2: f64,
1869 gamma1: f64,
1870 gamma2: f64,
1871 gamma3: f64,
1872 fallback_policy: FallbackPolicy,
1873 history_cap: usize,
1874 subproblem_max_iterations: usize,
1875}
1876
1877pub struct Arc<ObjFn> {
1879 core: ArcCore,
1880 obj_fn: ObjFn,
1881}
1882
1883impl NewtonTrustRegionCore {
1884 fn new(x0: Array1<f64>) -> Self {
1885 Self {
1886 x0,
1887 tolerance: 1e-5,
1888 max_iterations: 100,
1889 fd_hessian_step: 1e-4,
1890 bounds: None,
1891 trust_radius: 1.0,
1892 trust_radius_max: 1e6,
1893 eta_accept: 0.1,
1894 fallback_policy: FallbackPolicy::AutoBfgs,
1895 history_cap: 12,
1896 }
1897 }
1898
1899 fn apply_profile(&mut self, profile: Profile) {
1900 match profile {
1901 Profile::Robust => {
1902 self.eta_accept = 0.1;
1903 self.fallback_policy = FallbackPolicy::AutoBfgs;
1904 self.history_cap = 12;
1905 }
1906 Profile::Deterministic => {
1907 self.eta_accept = 0.1;
1908 self.fallback_policy = FallbackPolicy::Never;
1909 self.history_cap = 2;
1910 }
1911 Profile::Aggressive => {
1912 self.eta_accept = 0.05;
1913 self.fallback_policy = FallbackPolicy::AutoBfgs;
1914 self.history_cap = 20;
1915 }
1916 }
1917 }
1918
1919 #[inline]
1920 fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
1921 if let Some(bounds) = &self.bounds {
1922 bounds.project(x)
1923 } else {
1924 x.clone()
1925 }
1926 }
1927
1928 #[inline]
1929 fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
1930 if let Some(bounds) = &self.bounds {
1931 bounds.projected_gradient(x, g)
1932 } else {
1933 g.clone()
1934 }
1935 }
1936
1937 fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
1938 if let Some(bounds) = &self.bounds {
1939 bounds.active_mask(x, g)
1940 } else {
1941 vec![false; x.len()]
1942 }
1943 }
1944
1945 fn predicted_decrease(h_model: &Array2<f64>, g_proj: &Array1<f64>, step: &Array1<f64>) -> f64 {
1946 let hs = h_model.dot(step);
1947 -(g_proj.dot(step) + 0.5 * step.dot(&hs))
1948 }
1949
1950 fn boundary_tau(p: &Array1<f64>, d: &Array1<f64>, delta: f64) -> Option<f64> {
1951 let a = d.dot(d);
1952 if !a.is_finite() || a <= 0.0 {
1953 return None;
1954 }
1955 let b = 2.0 * p.dot(d);
1956 let c = p.dot(p) - delta * delta;
1957 let disc = b * b - 4.0 * a * c;
1958 if !disc.is_finite() || disc < 0.0 {
1959 return None;
1960 }
1961 let sqrt_disc = disc.sqrt();
1962 let t1 = (-b - sqrt_disc) / (2.0 * a);
1963 let t2 = (-b + sqrt_disc) / (2.0 * a);
1964 let mut tau = None;
1965 if t1.is_finite() && t1 >= 0.0 {
1966 tau = Some(t1);
1967 }
1968 if t2.is_finite() && t2 >= 0.0 {
1969 tau = Some(tau.map(|v| v.min(t2)).unwrap_or(t2));
1970 }
1971 tau
1972 }
1973
1974 fn steihaug_toint_step(
1975 &self,
1976 h_model: &Array2<f64>,
1977 g_proj: &Array1<f64>,
1978 trust_radius: f64,
1979 active: Option<&[bool]>,
1980 ) -> Option<(Array1<f64>, f64)> {
1981 let n = g_proj.len();
1982 let g_norm = g_proj.dot(g_proj).sqrt();
1983 if !g_norm.is_finite() || g_norm <= 0.0 {
1984 return None;
1985 }
1986 let active = active.unwrap_or(&[]);
1987 let use_mask = !active.is_empty();
1988 if use_mask && !any_free_variables(active) {
1989 return None;
1990 }
1991 if prefer_dense_direct(n) {
1992 return dense_trust_region_step(
1993 h_model,
1994 g_proj,
1995 trust_radius,
1996 if use_mask { Some(active) } else { None },
1997 );
1998 }
1999
2000 let mut p = Array1::<f64>::zeros(n);
2001 let mut r = g_proj.clone();
2002 if use_mask {
2003 mask_vector_inplace(&mut r, active);
2004 }
2005 let mut d = r.mapv(|v| -v);
2006 if use_mask {
2007 mask_vector_inplace(&mut d, active);
2008 }
2009 let mut rtr = r.dot(&r);
2010 let cg_tol = (1e-6 * g_norm).max(1e-12);
2011 let max_iter = (2 * n).max(10);
2012 let mut bd = Array1::<f64>::zeros(n);
2013
2014 for _ in 0..max_iter {
2015 if use_mask {
2016 masked_hv_inplace(h_model, &d, active, &mut bd);
2017 } else {
2018 bd.assign(&h_model.dot(&d));
2019 }
2020 let d_bd = d.dot(&bd);
2021
2022 if !d_bd.is_finite() || d_bd <= 1e-14 * d.dot(&d).max(1.0) {
2024 let tau = Self::boundary_tau(&p, &d, trust_radius)?;
2025 let mut p_nc = p.clone();
2026 p_nc.scaled_add(tau, &d);
2027 let pred = Self::predicted_decrease(h_model, g_proj, &p_nc);
2028 if pred.is_finite() && pred > 0.0 {
2029 return Some((p_nc, pred));
2030 }
2031 break;
2032 }
2033
2034 let alpha = rtr / d_bd;
2035 if !alpha.is_finite() || alpha <= 0.0 {
2036 break;
2037 }
2038
2039 let mut p_next = p.clone();
2040 p_next.scaled_add(alpha, &d);
2041 let p_next_norm = p_next.dot(&p_next).sqrt();
2042 if p_next_norm >= trust_radius {
2043 let tau = Self::boundary_tau(&p, &d, trust_radius)?;
2044 let mut p_b = p.clone();
2045 p_b.scaled_add(tau, &d);
2046 let pred = Self::predicted_decrease(h_model, g_proj, &p_b);
2047 if pred.is_finite() && pred > 0.0 {
2048 return Some((p_b, pred));
2049 }
2050 break;
2051 }
2052
2053 r.scaled_add(alpha, &bd);
2054 let r_next_norm = r.dot(&r).sqrt();
2055 if !r_next_norm.is_finite() {
2056 break;
2057 }
2058
2059 p = p_next;
2060 if r_next_norm <= cg_tol {
2061 let pred = Self::predicted_decrease(h_model, g_proj, &p);
2062 if pred.is_finite() && pred > 0.0 {
2063 return Some((p, pred));
2064 }
2065 break;
2066 }
2067
2068 let rtr_next = r.dot(&r);
2069 let beta = rtr_next / rtr;
2070 if !beta.is_finite() || beta < 0.0 {
2071 break;
2072 }
2073 d *= beta;
2074 d -= &r;
2075 if use_mask {
2076 mask_vector_inplace(&mut d, active);
2077 }
2078 rtr = rtr_next;
2079 }
2080
2081 let g_norm2 = g_proj.dot(g_proj);
2083 if g_norm2.is_finite() && g_norm2 > 0.0 {
2084 let mut p_sd = g_proj.clone();
2085 p_sd *= -(trust_radius / g_norm2.sqrt());
2086 let pred = Self::predicted_decrease(h_model, g_proj, &p_sd);
2087 if pred.is_finite() && pred > 0.0 {
2088 return Some((p_sd, pred));
2089 }
2090 }
2091 None
2092 }
2093
2094 fn warm_inverse_from_history(
2095 &self,
2096 n: usize,
2097 history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2098 ) -> Array2<f64> {
2099 let mut h_inv = Array2::<f64>::eye(n);
2100 let mut backup = Array2::<f64>::zeros((n, n));
2101 if let Some((s_last, y_last)) = history.back() {
2102 let sy = s_last.dot(y_last);
2103 let yy = y_last.dot(y_last);
2104 if sy.is_finite() && yy.is_finite() && sy > 1e-16 && yy > 1e-16 {
2105 let gamma = (sy / yy).clamp(1e-8, 1e8);
2106 h_inv = scaled_identity(n, gamma);
2107 }
2108 }
2109 for (s, y) in history {
2110 let sty = s.dot(y);
2111 if !sty.is_finite() || sty <= 1e-12 {
2112 continue;
2113 }
2114 if !apply_inverse_bfgs_update_in_place(&mut h_inv, s, y, &mut backup) {
2115 h_inv.assign(&backup);
2116 }
2117 }
2118 h_inv
2119 }
2120
2121 fn run_bfgs_fallback<ObjFn>(
2122 &self,
2123 obj_fn: &mut ObjFn,
2124 x_start: Array1<f64>,
2125 history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2126 iter_used: usize,
2127 mut func_evals: usize,
2128 mut grad_evals: usize,
2129 ) -> Result<Solution, NewtonTrustRegionError>
2130 where
2131 ObjFn: SecondOrderObjective,
2132 {
2133 eprintln!(
2134 "[OPT-TRACE] NewtonTrustRegion -> BFGS fallback (iter_used={}, dim={})",
2135 iter_used,
2136 x_start.len()
2137 );
2138 let n = x_start.len();
2139 let h0_inv = self.warm_inverse_from_history(n, history);
2140 let bounds = self.bounds.as_ref().map(|b| Bounds { spec: b.clone() });
2141
2142 let mut bfgs = Bfgs::new(x_start, BorrowedSecondOrderAsFirstOrder::new(obj_fn))
2143 .with_tolerance(Tolerance::new(self.tolerance).expect("core tolerance must be valid"))
2144 .with_max_iterations(
2145 MaxIterations::new(self.max_iterations.saturating_sub(iter_used).max(1))
2146 .expect("core max_iterations must be valid"),
2147 );
2148 bfgs.core.initial_b_inv = Some(SpdInverseHessian::from_verified(h0_inv).into_inner());
2149
2150 if let Some(bounds) = bounds {
2151 bfgs = bfgs.with_bounds(bounds);
2152 }
2153
2154 let fallback_sol = match bfgs.run() {
2155 Ok(sol) => sol,
2156 Err(BfgsError::LineSearchFailed { last_solution, .. }) => *last_solution,
2157 Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
2158 Err(BfgsError::ObjectiveFailed { message }) => {
2159 return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2160 }
2161 Err(_) => return Err(NewtonTrustRegionError::ModelHessianNotSpd),
2162 };
2163 func_evals += fallback_sol.func_evals;
2164 grad_evals += fallback_sol.grad_evals;
2165 Ok(Solution {
2166 iterations: iter_used + fallback_sol.iterations,
2167 func_evals,
2168 grad_evals,
2169 ..fallback_sol
2170 })
2171 }
2172
2173 fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, NewtonTrustRegionError>
2174 where
2175 ObjFn: SecondOrderObjective,
2176 {
2177 let n = self.x0.len();
2178 let mut x_k = self.project_point(&self.x0);
2179 let mut func_evals = 0usize;
2180 let mut grad_evals = 0usize;
2181 let mut hess_evals = 0usize;
2182 let mut oracle = SecondOrderCache::new(n, self.fd_hessian_step);
2183 let initial = oracle.eval_cost_grad_hessian(
2184 obj_fn,
2185 &x_k,
2186 self.bounds.as_ref(),
2187 &mut func_evals,
2188 &mut grad_evals,
2189 &mut hess_evals,
2190 );
2191 let mut history: VecDeque<(Array1<f64>, Array1<f64>)> =
2192 VecDeque::with_capacity(self.history_cap.max(2));
2193 let (mut f_k, mut g_k, mut h_k) = match initial {
2194 Ok(sample) => sample,
2195 Err(ObjectiveEvalError::Recoverable { .. }) => {
2196 if matches!(self.fallback_policy, FallbackPolicy::AutoBfgs) {
2197 return self.run_bfgs_fallback(
2198 obj_fn,
2199 x_k.clone(),
2200 &history,
2201 0,
2202 func_evals,
2203 grad_evals,
2204 );
2205 }
2206 return Err(NewtonTrustRegionError::NonFiniteObjective);
2207 }
2208 Err(ObjectiveEvalError::Fatal { message }) => {
2209 return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2210 }
2211 };
2212 if h_k.nrows() != n || h_k.ncols() != n {
2213 return Err(NewtonTrustRegionError::HessianShapeMismatch {
2214 expected: n,
2215 got_rows: h_k.nrows(),
2216 got_cols: h_k.ncols(),
2217 });
2218 }
2219 let mut trust_radius = self.trust_radius.max(1e-8);
2220 let mut g_proj_k = self.projected_gradient(&x_k, &g_k);
2221 let mut h_model_workspace = Array2::<f64>::zeros((n, n));
2222
2223 for k in 0..self.max_iterations {
2224 let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2225 if g_norm.is_finite() && g_norm <= self.tolerance {
2226 return Ok(Solution::gradient_based(
2227 x_k,
2228 f_k,
2229 g_k,
2230 g_norm,
2231 Some(h_k),
2232 k,
2233 func_evals,
2234 grad_evals,
2235 hess_evals,
2236 ));
2237 }
2238
2239 let h_model = if hessian_is_effectively_symmetric(&h_k) {
2240 &h_k
2241 } else {
2242 symmetrize_into(&mut h_model_workspace, &h_k);
2243 &h_model_workspace
2244 };
2245 let active = self.active_mask(&x_k, &g_k);
2246 let any_active = active.iter().copied().any(|v| v);
2247 let (trial_step, pred_dec_free) = if any_active {
2248 if !any_free_variables(&active) {
2249 trust_radius = (trust_radius * 0.5).max(1e-12);
2250 continue;
2251 }
2252 match self.steihaug_toint_step(h_model, &g_proj_k, trust_radius, Some(&active)) {
2253 Some(v) => v,
2254 None => {
2255 trust_radius = (trust_radius * 0.5).max(1e-12);
2256 continue;
2257 }
2258 }
2259 } else {
2260 match self.steihaug_toint_step(h_model, &g_proj_k, trust_radius, None) {
2261 Some(v) => v,
2262 None => {
2263 trust_radius = (trust_radius * 0.5).max(1e-12);
2264 continue;
2265 }
2266 }
2267 };
2268
2269 let x_trial_raw = &x_k + &trial_step;
2270 let x_trial = self.project_point(&x_trial_raw);
2271 let s_trial = &x_trial - &x_k;
2272 let s_norm = s_trial.dot(&s_trial).sqrt();
2273 if !s_norm.is_finite() || s_norm <= 1e-16 {
2274 trust_radius = (trust_radius * 0.5).max(1e-12);
2275 continue;
2276 }
2277 let pred_dec = if (&s_trial - &trial_step)
2278 .dot(&(&s_trial - &trial_step))
2279 .sqrt()
2280 > 1e-8 * (1.0 + trial_step.dot(&trial_step).sqrt())
2281 {
2282 Self::predicted_decrease(h_model, &g_proj_k, &s_trial)
2283 } else {
2284 pred_dec_free
2285 };
2286 if !pred_dec.is_finite() || pred_dec <= 0.0 {
2287 trust_radius = (trust_radius * 0.5).max(1e-12);
2288 continue;
2289 }
2290
2291 let (f_trial, g_trial, h_trial) = match oracle.eval_cost_grad_hessian(
2292 obj_fn,
2293 &x_trial,
2294 self.bounds.as_ref(),
2295 &mut func_evals,
2296 &mut grad_evals,
2297 &mut hess_evals,
2298 ) {
2299 Ok(sample) => sample,
2300 Err(ObjectiveEvalError::Recoverable { .. }) => {
2301 trust_radius = (trust_radius * 0.2).max(1e-12);
2302 continue;
2303 }
2304 Err(ObjectiveEvalError::Fatal { message }) => {
2305 return Err(NewtonTrustRegionError::ObjectiveFailed { message });
2306 }
2307 };
2308 let act_dec = f_k - f_trial;
2309 let rho = act_dec / pred_dec;
2310 if rho > 0.75 && s_norm > 0.99 * trust_radius {
2311 trust_radius = (trust_radius * 2.0).min(self.trust_radius_max.max(1.0));
2312 } else if rho < 0.25 {
2313 trust_radius = (trust_radius * 0.5).max(1e-12);
2314 }
2315
2316 if rho > self.eta_accept {
2317 if h_trial.nrows() != n || h_trial.ncols() != n {
2318 return Err(NewtonTrustRegionError::HessianShapeMismatch {
2319 expected: n,
2320 got_rows: h_trial.nrows(),
2321 got_cols: h_trial.ncols(),
2322 });
2323 }
2324 x_k = x_trial;
2325 f_k = f_trial;
2326 let y_k = &g_trial - &g_k;
2327 if s_trial.dot(&s_trial).sqrt() > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
2328 if history.len() == self.history_cap.max(2) {
2329 history.pop_front();
2330 }
2331 history.push_back((s_trial.clone(), y_k));
2332 }
2333 g_k = g_trial;
2334 h_k = h_trial;
2335 g_proj_k = self.projected_gradient(&x_k, &g_k);
2336 }
2337 }
2338
2339 let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2340 Err(NewtonTrustRegionError::MaxIterationsReached {
2341 last_solution: Box::new(Solution::gradient_based(
2342 x_k,
2343 f_k,
2344 g_k,
2345 g_norm,
2346 Some(h_k),
2347 self.max_iterations,
2348 func_evals,
2349 grad_evals,
2350 hess_evals,
2351 )),
2352 })
2353 }
2354}
2355
2356impl ArcCore {
2357 fn new(x0: Array1<f64>) -> Self {
2358 Self {
2359 x0,
2360 tolerance: 1e-5,
2361 max_iterations: 100,
2362 fd_hessian_step: 1e-4,
2363 bounds: None,
2364 theta: 1.0,
2365 sigma: 1.0,
2366 sigma_min: 1e-10,
2367 sigma_max: 1e12,
2368 eta1: 0.1,
2369 eta2: 0.9,
2370 gamma1: 0.1,
2373 gamma2: 2.0,
2374 gamma3: 2.0,
2375 fallback_policy: FallbackPolicy::AutoBfgs,
2376 history_cap: 12,
2377 subproblem_max_iterations: 80,
2378 }
2379 }
2380
2381 fn apply_profile(&mut self, profile: Profile) {
2382 match profile {
2383 Profile::Robust => {
2384 self.theta = 1.0;
2385 self.eta1 = 0.1;
2386 self.eta2 = 0.9;
2387 self.gamma1 = 0.1;
2388 self.gamma2 = 2.0;
2389 self.gamma3 = 2.0;
2390 self.fallback_policy = FallbackPolicy::AutoBfgs;
2391 self.history_cap = 12;
2392 self.subproblem_max_iterations = 80;
2393 }
2394 Profile::Deterministic => {
2395 self.theta = 1.0;
2396 self.eta1 = 0.1;
2397 self.eta2 = 0.9;
2398 self.gamma1 = 0.1;
2399 self.gamma2 = 2.0;
2400 self.gamma3 = 2.0;
2401 self.fallback_policy = FallbackPolicy::Never;
2402 self.history_cap = 2;
2403 self.subproblem_max_iterations = 80;
2404 }
2405 Profile::Aggressive => {
2406 self.theta = 1.25;
2407 self.eta1 = 0.05;
2408 self.eta2 = 0.8;
2409 self.gamma1 = 0.2;
2410 self.gamma2 = 1.5;
2411 self.gamma3 = 2.5;
2412 self.fallback_policy = FallbackPolicy::AutoBfgs;
2413 self.history_cap = 20;
2414 self.subproblem_max_iterations = 120;
2415 }
2416 }
2417 }
2418
2419 #[inline]
2420 fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
2421 if let Some(bounds) = &self.bounds {
2422 bounds.project(x)
2423 } else {
2424 x.clone()
2425 }
2426 }
2427
2428 #[inline]
2429 fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
2430 if let Some(bounds) = &self.bounds {
2431 bounds.projected_gradient(x, g)
2432 } else {
2433 g.clone()
2434 }
2435 }
2436
2437 fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
2438 if let Some(bounds) = &self.bounds {
2439 bounds.active_mask(x, g)
2440 } else {
2441 vec![false; x.len()]
2442 }
2443 }
2444
2445 fn warm_inverse_from_history(
2446 &self,
2447 n: usize,
2448 history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2449 ) -> Array2<f64> {
2450 let mut h_inv = Array2::<f64>::eye(n);
2451 let mut backup = Array2::<f64>::zeros((n, n));
2452 if let Some((s_last, y_last)) = history.back() {
2453 let sy = s_last.dot(y_last);
2454 let yy = y_last.dot(y_last);
2455 if sy.is_finite() && yy.is_finite() && sy > 1e-16 && yy > 1e-16 {
2456 let gamma = (sy / yy).clamp(1e-8, 1e8);
2457 h_inv = scaled_identity(n, gamma);
2458 }
2459 }
2460 for (s, y) in history {
2461 let sty = s.dot(y);
2462 if !sty.is_finite() || sty <= 1e-12 {
2463 continue;
2464 }
2465 if !apply_inverse_bfgs_update_in_place(&mut h_inv, s, y, &mut backup) {
2466 h_inv.assign(&backup);
2467 }
2468 }
2469 h_inv
2470 }
2471
2472 fn run_bfgs_fallback<ObjFn>(
2473 &self,
2474 obj_fn: &mut ObjFn,
2475 x_start: Array1<f64>,
2476 history: &VecDeque<(Array1<f64>, Array1<f64>)>,
2477 iter_used: usize,
2478 mut func_evals: usize,
2479 mut grad_evals: usize,
2480 ) -> Result<Solution, ArcError>
2481 where
2482 ObjFn: SecondOrderObjective,
2483 {
2484 eprintln!(
2485 "[OPT-TRACE] ARC -> BFGS fallback (iter_used={}, dim={})",
2486 iter_used,
2487 x_start.len()
2488 );
2489 let n = x_start.len();
2490 let h0_inv = self.warm_inverse_from_history(n, history);
2491 let bounds = self.bounds.as_ref().map(|b| Bounds { spec: b.clone() });
2492
2493 let mut bfgs = Bfgs::new(x_start, BorrowedSecondOrderAsFirstOrder::new(obj_fn))
2494 .with_tolerance(Tolerance::new(self.tolerance).expect("core tolerance must be valid"))
2495 .with_max_iterations(
2496 MaxIterations::new(self.max_iterations.saturating_sub(iter_used).max(1))
2497 .expect("core max_iterations must be valid"),
2498 );
2499 bfgs.core.initial_b_inv = Some(SpdInverseHessian::from_verified(h0_inv).into_inner());
2500
2501 if let Some(bounds) = bounds {
2502 bfgs = bfgs.with_bounds(bounds);
2503 }
2504
2505 let fallback_sol = match bfgs.run() {
2506 Ok(sol) => sol,
2507 Err(BfgsError::LineSearchFailed { last_solution, .. }) => *last_solution,
2508 Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
2509 Err(BfgsError::ObjectiveFailed { message }) => {
2510 return Err(ArcError::ObjectiveFailed { message });
2511 }
2512 Err(_) => return Err(ArcError::SubproblemFailed),
2513 };
2514 func_evals += fallback_sol.func_evals;
2515 grad_evals += fallback_sol.grad_evals;
2516 Ok(Solution {
2517 iterations: iter_used + fallback_sol.iterations,
2518 func_evals,
2519 grad_evals,
2520 ..fallback_sol
2521 })
2522 }
2523
2524 fn arc_model_value(
2525 &self,
2526 g: &Array1<f64>,
2527 h: &Array2<f64>,
2528 sigma: f64,
2529 s: &Array1<f64>,
2530 active: Option<&[bool]>,
2531 ) -> (f64, f64, Array1<f64>) {
2532 let mut hs = Array1::<f64>::zeros(s.len());
2537 if let Some(active) = active {
2538 masked_hv_inplace(h, s, active, &mut hs);
2539 } else {
2540 hs.assign(&h.dot(s));
2541 }
2542 let s_norm = s.dot(s).sqrt();
2543 let cubic = (sigma / 3.0) * s_norm.powi(3);
2544 let model_delta = g.dot(s) + 0.5 * s.dot(&hs) + cubic;
2545 let mut grad_m = g + &hs + &(s * (sigma * s_norm));
2546 if let Some(active) = active {
2547 mask_vector_inplace(&mut grad_m, active);
2548 }
2549 (model_delta, s_norm, grad_m)
2550 }
2551
2552 fn cauchy_arc_step(
2553 &self,
2554 g: &Array1<f64>,
2555 h: &Array2<f64>,
2556 sigma: f64,
2557 active: Option<&[bool]>,
2558 ) -> Option<Array1<f64>> {
2559 let g_norm = g.dot(g).sqrt();
2560 if !g_norm.is_finite() || g_norm <= 0.0 {
2561 return Some(Array1::<f64>::zeros(g.len()));
2562 }
2563 let mut d = -g.clone();
2564 if let Some(active) = active {
2565 mask_vector_inplace(&mut d, active);
2566 }
2567 let g2 = g.dot(g);
2568 let mut hd = Array1::<f64>::zeros(d.len());
2569 if let Some(active) = active {
2570 masked_hv_inplace(h, &d, active, &mut hd);
2571 } else {
2572 hd.assign(&h.dot(&d));
2573 }
2574 let d_hd = d.dot(&hd);
2575 let c = sigma * g_norm.powi(3);
2576 let mut alpha = if c > 1e-16 {
2577 let disc = d_hd * d_hd + 4.0 * c * g2;
2578 let sqrt_disc = disc.max(0.0).sqrt();
2579 (-d_hd + sqrt_disc) / (2.0 * c)
2580 } else if d_hd > 1e-16 {
2581 g2 / d_hd
2582 } else {
2583 1.0 / g_norm.max(1.0)
2584 };
2585 if !alpha.is_finite() || alpha <= 0.0 {
2586 alpha = 1.0 / g_norm.max(1.0);
2587 }
2588 let mut s = d * alpha;
2589 let mut m = self.arc_model_value(g, h, sigma, &s, active).0;
2590 for _ in 0..8 {
2591 if m <= 0.0 {
2592 return Some(s);
2593 }
2594 s *= 0.5;
2595 m = self.arc_model_value(g, h, sigma, &s, active).0;
2596 }
2597 if m <= 0.0 { Some(s) } else { None }
2598 }
2599
2600 #[inline]
2601 fn escalate_sigma_on_failure(&mut self, failure_streak: &mut usize) {
2602 *failure_streak += 1;
2606 let growth = if *failure_streak >= 3 {
2607 self.gamma3
2608 } else {
2609 self.gamma2
2610 };
2611 self.sigma = (self.sigma * growth).min(self.sigma_max);
2612 }
2613
2614 fn solve_arc_subproblem(
2615 &self,
2616 h: &Array2<f64>,
2617 g: &Array1<f64>,
2618 sigma: f64,
2619 active: Option<&[bool]>,
2620 ) -> Option<Array1<f64>> {
2621 let g_norm = g.dot(g).sqrt();
2622 if !g_norm.is_finite() {
2623 return None;
2624 }
2625 if g_norm <= 1e-16 {
2626 return Some(Array1::<f64>::zeros(g.len()));
2627 }
2628
2629 let rhs = -g.clone();
2630 let n = g.len();
2631 let cg_base_iter = (n / 2).clamp(25, 120);
2632 let active_opt = active;
2633 let active = active.unwrap_or(&[]);
2634 let use_mask = !active.is_empty();
2635 if use_mask && !any_free_variables(active) {
2636 return Some(Array1::<f64>::zeros(g.len()));
2637 }
2638 let direct_small_dense = prefer_dense_direct(n);
2639 let (effective_h, effective_rhs) = if direct_small_dense {
2640 build_masked_subproblem_system(h, &rhs, if use_mask { Some(active) } else { None })
2641 } else {
2642 (Array2::<f64>::zeros((0, 0)), Array1::<f64>::zeros(0))
2643 };
2644 let mut lambda = (sigma * g_norm.sqrt()).max(1e-8);
2647 let mut best: Option<(f64, Array1<f64>)> = None;
2648 let mut hs = Array1::<f64>::zeros(n);
2649
2650 for _ in 0..self.subproblem_max_iterations {
2651 let mut s = if direct_small_dense {
2652 match dense_solve_shifted(&effective_h, &effective_rhs, lambda) {
2653 Some(v) => v,
2654 None => {
2655 lambda = (2.0 * lambda).max(1e-8);
2656 continue;
2657 }
2658 }
2659 } else if use_mask {
2660 let mut s = Array1::<f64>::zeros(n);
2661 let mut r = rhs.clone();
2662 mask_vector_inplace(&mut r, active);
2663 let mut p = r.clone();
2664 let mut rtr = r.dot(&r);
2665 if !rtr.is_finite() {
2666 return None;
2667 }
2668 for _ in 0..cg_base_iter {
2669 masked_hv_inplace(h, &p, active, &mut hs);
2670 hs.scaled_add(lambda, &p);
2671 let denom = p.dot(&hs);
2672 if !denom.is_finite() || denom <= 1e-14 * p.dot(&p).max(1.0) {
2673 s.fill(f64::NAN);
2674 break;
2675 }
2676 let alpha = rtr / denom;
2677 if !alpha.is_finite() || alpha <= 0.0 {
2678 s.fill(f64::NAN);
2679 break;
2680 }
2681 s.scaled_add(alpha, &p);
2682 r.scaled_add(-alpha, &hs);
2683 mask_vector_inplace(&mut s, active);
2684 mask_vector_inplace(&mut r, active);
2685 let rtr_next = r.dot(&r);
2686 if !rtr_next.is_finite() {
2687 s.fill(f64::NAN);
2688 break;
2689 }
2690 if rtr_next.sqrt() <= 1e-10 * g_norm.max(1.0) {
2691 break;
2692 }
2693 let beta = rtr_next / rtr.max(1e-32);
2694 if !beta.is_finite() || beta < 0.0 {
2695 s.fill(f64::NAN);
2696 break;
2697 }
2698 p *= beta;
2699 p += &r;
2700 mask_vector_inplace(&mut p, active);
2701 rtr = rtr_next;
2702 }
2703 s
2704 } else {
2705 match cg_solve_adaptive(h, &rhs, cg_base_iter, 1e-10, lambda) {
2706 Some(v) => v,
2707 None => {
2708 lambda = (2.0 * lambda).max(1e-8);
2709 continue;
2710 }
2711 }
2712 };
2713 if use_mask {
2714 mask_vector_inplace(&mut s, active);
2715 }
2716 if s.iter().any(|v| !v.is_finite()) {
2717 lambda = (2.0 * lambda).max(1e-8);
2718 continue;
2719 }
2720
2721 let (m_delta, s_norm, grad_m) =
2722 self.arc_model_value(g, h, sigma, &s, if use_mask { Some(active) } else { None });
2723 if !m_delta.is_finite() || !s_norm.is_finite() {
2724 lambda = (2.0 * lambda).max(1e-8);
2725 continue;
2726 }
2727 let grad_norm = grad_m.dot(&grad_m).sqrt();
2728 let target = self.theta * s_norm * s_norm;
2729 let merit = if target > 0.0 {
2730 grad_norm / target
2731 } else {
2732 grad_norm
2733 };
2734 if best.as_ref().map(|(bm, _)| merit < *bm).unwrap_or(true) {
2735 best = Some((merit, s.clone()));
2736 }
2737
2738 let lambda_target = (sigma * s_norm).max(1e-12);
2743 let rel_lam_gap = (lambda - lambda_target).abs() / lambda.max(1.0);
2744 if m_delta <= 0.0 && grad_norm <= target.max(1e-14) && rel_lam_gap <= 0.25 {
2745 return Some(s);
2746 }
2747
2748 if m_delta > 0.0 {
2749 lambda = (2.0 * lambda.max(lambda_target)).max(1e-8);
2750 } else {
2751 let ratio = (lambda_target / lambda.max(1e-16)).clamp(0.25, 4.0);
2754 let lambda_next = lambda * ratio;
2755 let mixed = 0.5 * lambda + 0.5 * lambda_next;
2756 lambda = mixed.max(1e-12);
2757 }
2758 }
2759
2760 if let Some((_, s)) = best {
2761 let (m_delta, s_norm, grad_m) =
2762 self.arc_model_value(g, h, sigma, &s, if use_mask { Some(active) } else { None });
2763 let grad_norm = grad_m.dot(&grad_m).sqrt();
2764 let target = self.theta * s_norm * s_norm;
2765 if m_delta <= 0.0 && grad_norm <= target.max(1e-14) {
2766 return Some(s);
2767 }
2768 }
2769 self.cauchy_arc_step(
2770 g,
2771 h,
2772 sigma,
2773 if use_mask { Some(active) } else { active_opt },
2774 )
2775 }
2776
2777 fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, ArcError>
2778 where
2779 ObjFn: SecondOrderObjective,
2780 {
2781 let n = self.x0.len();
2782 let mut x_k = self.project_point(&self.x0);
2783 let mut func_evals = 0usize;
2784 let mut grad_evals = 0usize;
2785 let mut hess_evals = 0usize;
2786 let mut oracle = SecondOrderCache::new(n, self.fd_hessian_step);
2787 let initial = oracle.eval_cost_grad_hessian(
2788 obj_fn,
2789 &x_k,
2790 self.bounds.as_ref(),
2791 &mut func_evals,
2792 &mut grad_evals,
2793 &mut hess_evals,
2794 );
2795 let mut history: VecDeque<(Array1<f64>, Array1<f64>)> =
2796 VecDeque::with_capacity(self.history_cap.max(2));
2797 let (mut f_k, mut g_k, mut h_k) = match initial {
2798 Ok(sample) => sample,
2799 Err(ObjectiveEvalError::Recoverable { .. }) => {
2800 if matches!(self.fallback_policy, FallbackPolicy::AutoBfgs) {
2801 return self.run_bfgs_fallback(
2802 obj_fn,
2803 x_k.clone(),
2804 &history,
2805 0,
2806 func_evals,
2807 grad_evals,
2808 );
2809 }
2810 return Err(ArcError::NonFiniteObjective);
2811 }
2812 Err(ObjectiveEvalError::Fatal { message }) => {
2813 return Err(ArcError::ObjectiveFailed { message });
2814 }
2815 };
2816 if h_k.nrows() != n || h_k.ncols() != n {
2817 return Err(ArcError::HessianShapeMismatch {
2818 expected: n,
2819 got_rows: h_k.nrows(),
2820 got_cols: h_k.ncols(),
2821 });
2822 }
2823 let mut model_failure_streak = 0usize;
2824 let mut h_model_workspace = Array2::<f64>::zeros((n, n));
2825
2826 for k in 0..self.max_iterations {
2827 let g_proj_k = self.projected_gradient(&x_k, &g_k);
2828 let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
2829 if g_norm.is_finite() && g_norm <= self.tolerance {
2830 return Ok(Solution::gradient_based(
2831 x_k,
2832 f_k,
2833 g_k,
2834 g_norm,
2835 Some(h_k),
2836 k,
2837 func_evals,
2838 grad_evals,
2839 hess_evals,
2840 ));
2841 }
2842
2843 let h_model = if hessian_is_effectively_symmetric(&h_k) {
2844 &h_k
2845 } else {
2846 symmetrize_into(&mut h_model_workspace, &h_k);
2847 &h_model_workspace
2848 };
2849 let active = self.active_mask(&x_k, &g_k);
2850 let any_active = active.iter().copied().any(|v| v);
2851 let step = if any_active {
2854 if !any_free_variables(&active) {
2855 self.escalate_sigma_on_failure(&mut model_failure_streak);
2857 continue;
2858 }
2859 match self.solve_arc_subproblem(h_model, &g_proj_k, self.sigma, Some(&active)) {
2860 Some(s) => s,
2861 None => {
2862 self.escalate_sigma_on_failure(&mut model_failure_streak);
2865 continue;
2866 }
2867 }
2868 } else {
2869 match self.solve_arc_subproblem(h_model, &g_proj_k, self.sigma, None) {
2870 Some(s) => s,
2871 None => {
2872 self.escalate_sigma_on_failure(&mut model_failure_streak);
2875 continue;
2876 }
2877 }
2878 };
2879
2880 let x_trial_raw = &x_k + &step;
2881 let x_trial = self.project_point(&x_trial_raw);
2882 let s_trial = &x_trial - &x_k;
2883 let s_norm = s_trial.dot(&s_trial).sqrt();
2884 if !s_norm.is_finite() || s_norm <= 1e-16 {
2885 self.escalate_sigma_on_failure(&mut model_failure_streak);
2886 continue;
2887 }
2888 let step_distortion = (&s_trial - &step).dot(&(&s_trial - &step)).sqrt();
2889 let step_norm_ref = step.dot(&step).sqrt();
2890 let proj_changed = step_distortion > 1e-8 * (1.0 + step_norm_ref);
2891 if proj_changed {
2892 let projected = oracle.eval_cost_grad_hessian(
2897 obj_fn,
2898 &x_trial,
2899 self.bounds.as_ref(),
2900 &mut func_evals,
2901 &mut grad_evals,
2902 &mut hess_evals,
2903 );
2904 let (f_trial, g_trial, h_trial) = match projected {
2905 Ok(sample) => sample,
2906 Err(ObjectiveEvalError::Recoverable { .. }) => {
2907 self.escalate_sigma_on_failure(&mut model_failure_streak);
2908 continue;
2909 }
2910 Err(ObjectiveEvalError::Fatal { message }) => {
2911 return Err(ArcError::ObjectiveFailed { message });
2912 }
2913 };
2914 if h_trial.nrows() != n || h_trial.ncols() != n {
2915 return Err(ArcError::HessianShapeMismatch {
2916 expected: n,
2917 got_rows: h_trial.nrows(),
2918 got_cols: h_trial.ncols(),
2919 });
2920 }
2921 let g_proj_trial = self.projected_gradient(&x_trial, &g_trial);
2922 let g_proj_trial_norm = g_proj_trial.dot(&g_proj_trial).sqrt();
2923 if f_trial <= f_k
2924 && (g_proj_trial_norm <= g_norm || g_proj_trial_norm <= self.tolerance)
2925 {
2926 let y_k = &g_trial - &g_k;
2927 if s_norm > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
2928 if history.len() == self.history_cap.max(2) {
2929 history.pop_front();
2930 }
2931 history.push_back((s_trial.clone(), y_k));
2932 }
2933 x_k = x_trial;
2934 f_k = f_trial;
2935 g_k = g_trial;
2936 h_k = h_trial;
2937 model_failure_streak = 0;
2938 self.sigma = (self.sigma * self.gamma2).min(self.sigma_max);
2941 } else {
2942 self.escalate_sigma_on_failure(&mut model_failure_streak);
2943 }
2944 continue;
2945 }
2946 let (m_delta_trial, _, grad_m_trial) =
2947 self.arc_model_value(&g_proj_k, h_model, self.sigma, &s_trial, Some(&active));
2948
2949 let grad_m_norm = grad_m_trial.dot(&grad_m_trial).sqrt();
2953 let target_m = self.theta * s_norm * s_norm;
2954 if !m_delta_trial.is_finite()
2955 || !grad_m_norm.is_finite()
2956 || m_delta_trial > 0.0
2957 || grad_m_norm > target_m.max(1e-14)
2958 {
2959 self.escalate_sigma_on_failure(&mut model_failure_streak);
2960 continue;
2961 }
2962
2963 let denom = -m_delta_trial;
2966 if !denom.is_finite() || denom <= 0.0 {
2967 self.escalate_sigma_on_failure(&mut model_failure_streak);
2968 continue;
2969 }
2970
2971 let (f_trial, g_trial, h_trial) = match oracle.eval_cost_grad_hessian(
2972 obj_fn,
2973 &x_trial,
2974 self.bounds.as_ref(),
2975 &mut func_evals,
2976 &mut grad_evals,
2977 &mut hess_evals,
2978 ) {
2979 Ok(sample) => sample,
2980 Err(ObjectiveEvalError::Recoverable { .. }) => {
2981 self.escalate_sigma_on_failure(&mut model_failure_streak);
2982 continue;
2983 }
2984 Err(ObjectiveEvalError::Fatal { message }) => {
2985 return Err(ArcError::ObjectiveFailed { message });
2986 }
2987 };
2988 let rho = (f_k - f_trial) / denom;
2989 model_failure_streak = 0;
2990 if rho >= self.eta1 {
2993 if h_trial.nrows() != n || h_trial.ncols() != n {
2994 return Err(ArcError::HessianShapeMismatch {
2995 expected: n,
2996 got_rows: h_trial.nrows(),
2997 got_cols: h_trial.ncols(),
2998 });
2999 }
3000 let y_k = &g_trial - &g_k;
3001 if s_norm > 1e-14 && y_k.dot(&y_k).sqrt() > 1e-14 {
3002 if history.len() == self.history_cap.max(2) {
3003 history.pop_front();
3004 }
3005 history.push_back((s_trial.clone(), y_k));
3006 }
3007 x_k = x_trial;
3008 f_k = f_trial;
3009 g_k = g_trial;
3010 h_k = h_trial;
3011 }
3012
3013 if rho >= self.eta2 {
3016 self.sigma = (self.sigma * self.gamma1).max(self.sigma_min);
3017 } else if rho >= self.eta1 {
3018 self.sigma = self.sigma.max(self.sigma_min);
3019 } else if rho.is_finite() {
3020 self.sigma = (self.sigma * self.gamma2).min(self.sigma_max);
3021 } else {
3022 self.sigma = (self.sigma * self.gamma3).min(self.sigma_max);
3024 }
3025 }
3026
3027 let g_proj_k = self.projected_gradient(&x_k, &g_k);
3028 let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
3029 Err(ArcError::MaxIterationsReached {
3030 last_solution: Box::new(Solution::gradient_based(
3031 x_k,
3032 f_k,
3033 g_k,
3034 g_norm,
3035 Some(h_k),
3036 self.max_iterations,
3037 func_evals,
3038 grad_evals,
3039 hess_evals,
3040 )),
3041 })
3042 }
3043}
3044
3045struct BfgsCore {
3047 x0: Array1<f64>,
3048 tolerance: f64,
3050 max_iterations: usize,
3051 c1: f64,
3052 c2: f64,
3053 tau_f: f64,
3054 tau_g: f64,
3055 bounds: Option<BoxSpec>,
3056 flat_step_policy: FlatStepPolicy,
3057 rng_state: u64,
3058 flat_accept_streak: usize,
3059 rescue_policy: RescuePolicy,
3060 stall_policy: StallPolicy,
3061 stall_noimprove_streak: usize,
3062 curv_slack_scale: f64,
3064 grad_drop_factor: f64,
3066 tol_f_rel: f64,
3068 max_no_improve: usize,
3069 no_improve_streak: usize,
3070 gll: GllWindow,
3072 c1_adapt: f64,
3073 c2_adapt: f64,
3074 wolfe_fail_streak: usize,
3075 primary_strategy: LineSearchStrategy,
3076 trust_radius: f64,
3077 global_best: Option<ProbeBest>,
3078 nonfinite_seen: bool,
3080 wolfe_clean_successes: usize,
3081 bt_clean_successes: usize,
3082 ls_failures_in_row: usize,
3083 chol_fail_iters: usize,
3084 spd_fail_seen: bool,
3085 initial_b_inv: Option<Array2<f64>>,
3086 initial_grad_norm: f64,
3087 local_mode: bool,
3088}
3089
3090pub struct Bfgs<ObjFn> {
3092 core: BfgsCore,
3093 obj_fn: ObjFn,
3094}
3095
3096impl BfgsCore {
3097 const FALLBACK_THRESHOLD: usize = 3;
3098
3099 fn projected_gradient(&self, x: &Array1<f64>, g: &Array1<f64>) -> Array1<f64> {
3100 if let Some(bounds) = &self.bounds {
3101 bounds.projected_gradient(x, g)
3102 } else {
3103 g.clone()
3104 }
3105 }
3106
3107 fn active_mask(&self, x: &Array1<f64>, g: &Array1<f64>) -> Vec<bool> {
3108 if let Some(bounds) = &self.bounds {
3109 bounds.active_mask(x, g)
3110 } else {
3111 vec![false; x.len()]
3112 }
3113 }
3114
3115 fn project_with_step(
3116 &self,
3117 x: &Array1<f64>,
3118 d: &Array1<f64>,
3119 alpha: f64,
3120 ) -> (Array1<f64>, Array1<f64>, bool) {
3121 let trial = x + alpha * d;
3122 let x_new = self.project_point(&trial);
3123 let kinked = (&x_new - &trial)
3124 .iter()
3125 .zip(trial.iter())
3126 .any(|(dv, tv)| dv.abs() > 1e-12 * (1.0 + tv.abs()));
3127 let s = &x_new - x;
3128 (x_new, s, kinked)
3129 }
3130
3131 #[inline]
3132 fn step_tolerance(&self, x: &Array1<f64>) -> f64 {
3133 1e-12 * (1.0 + x.dot(x).sqrt()) + 1e-16
3134 }
3135
3136 #[inline]
3137 fn feasible_step_small(&self, x_prev: &Array1<f64>, x_next: &Array1<f64>) -> bool {
3138 let s = x_next - x_prev;
3139 self.projected_step_small(x_prev, &s)
3140 }
3141
3142 #[inline]
3143 fn projected_step_small(&self, x_prev: &Array1<f64>, s: &Array1<f64>) -> bool {
3144 s.dot(s).sqrt() <= self.step_tolerance(x_prev)
3145 }
3146
3147 #[inline]
3148 fn stagnation_converged(
3149 &self,
3150 x_prev: &Array1<f64>,
3151 x_next: &Array1<f64>,
3152 g_proj_next: &Array1<f64>,
3153 ) -> bool {
3154 let gnorm = g_proj_next.dot(g_proj_next).sqrt();
3155 gnorm < self.tolerance || self.feasible_step_small(x_prev, x_next)
3156 }
3157
3158 #[inline]
3159 fn update_no_improve_streak(&mut self, rel_impr: f64) -> bool {
3160 if rel_impr <= self.tol_f_rel {
3161 self.no_improve_streak += 1;
3162 } else {
3163 self.no_improve_streak = 0;
3164 }
3165 self.no_improve_streak >= self.max_no_improve
3166 }
3167
3168 fn try_trust_region_step<ObjFn>(
3171 &mut self,
3172 obj_fn: &mut ObjFn,
3173 oracle: &mut FirstOrderCache,
3174 b_inv: &mut Array2<f64>,
3175 x_k: &Array1<f64>,
3176 f_k: f64,
3177 g_k: &Array1<f64>,
3178 func_evals: &mut usize,
3179 grad_evals: &mut usize,
3180 ) -> Option<(Array1<f64>, f64, Array1<f64>)>
3181 where
3182 ObjFn: FirstOrderObjective,
3183 {
3184 let n = b_inv.nrows();
3185 let mut b_inv_backup = Array2::<f64>::zeros((n, n));
3186 let delta = self.trust_radius;
3187 let g_proj_k = self.projected_gradient(x_k, g_k);
3188 let active = self.active_mask(x_k, g_k);
3189 let active_before = active.clone();
3190 let active_opt = if active.iter().copied().any(|v| v) {
3191 if !any_free_variables(&active) {
3192 self.trust_radius = (delta * 0.5).max(1e-12);
3193 return None;
3194 }
3195 Some(active.as_slice())
3196 } else {
3197 None
3198 };
3199 let (p_tr, pred_dec_tr) = self.trust_region_dogleg(b_inv, &g_proj_k, delta, active_opt)?;
3200 let raw_try = x_k + &p_tr;
3201 let x_try = self.project_point(&raw_try);
3202 let s_tr = &x_try - x_k;
3203 let g_old = g_k.clone();
3204 let (f_try, g_try) =
3205 bfgs_eval_cost_grad(oracle, obj_fn, &x_try, func_evals, grad_evals).ok()?;
3206 let act_dec = f_k - f_try;
3207 let p_diff = &s_tr - &p_tr;
3208 let p_diff_norm = p_diff.dot(&p_diff).sqrt();
3209 let p_norm = p_tr.dot(&p_tr).sqrt();
3210 let proj_changed = p_diff_norm > 1e-6 * (1.0 + p_norm);
3211 if proj_changed {
3212 let descent_ok = g_proj_k.dot(&s_tr) <= -eps_g(&g_proj_k, &s_tr, self.tau_g);
3214 if !descent_ok {
3215 self.trust_radius = (delta * 0.5).max(1e-12);
3216 return None;
3217 }
3218 }
3219 let pred_dec = if proj_changed {
3220 self.trust_region_predicted_decrease(b_inv, &g_proj_k, &s_tr, active_opt)?
3221 } else {
3222 pred_dec_tr
3223 };
3224 if !pred_dec.is_finite() || pred_dec <= 0.0 {
3225 self.trust_radius = (delta * 0.5).max(1e-12);
3226 return None;
3227 }
3228 let rho = act_dec / pred_dec;
3229 if rho > 0.75 && s_tr.dot(&s_tr).sqrt() > 0.99 * delta {
3230 self.trust_radius = (delta * 2.0).min(1e6);
3231 } else if rho < 0.25 {
3232 self.trust_radius = (delta * 0.5).max(1e-12);
3233 }
3234 if rho <= 0.1 || !f_try.is_finite() || g_try.iter().any(|v| !v.is_finite()) {
3235 return None;
3236 }
3237 self.gll.push(f_try);
3240 let maybe_f = self.global_best.as_ref().map(|b| b.f);
3241 if let Some(bf) = maybe_f {
3242 if f_try < bf - eps_f(bf, self.tau_f) {
3243 self.global_best = Some(ProbeBest {
3244 f: f_try,
3245 x: x_try.clone(),
3246 g: g_try.clone(),
3247 });
3248 }
3249 } else {
3250 self.global_best = Some(ProbeBest::new(&x_try, f_try, &g_try));
3251 }
3252
3253 let poor_model = rho <= 0.25;
3255 let mut s_update = s_tr.clone();
3256 let mut y_update = &g_try - &g_old;
3257 if let Some(bounds) = &self.bounds {
3258 let active_after = bounds.active_mask(&x_try, &g_try);
3259 for i in 0..n {
3260 let tiny_step = s_update[i].abs() <= 1e-14 * (1.0 + x_k[i].abs());
3261 if (active_before[i] && active_after[i]) || tiny_step {
3262 s_update[i] = 0.0;
3263 y_update[i] = 0.0;
3264 }
3265 }
3266 }
3267 let s_norm_tr = s_update.dot(&s_update).sqrt();
3268 let mut update_status = "applied";
3269 if !poor_model && s_norm_tr > 1e-14 {
3270 let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3271 let ridge = (1e-10 * mean_diag).max(1e-16);
3272 if let Some(h_s) = cg_solve_adaptive(b_inv, &s_update, 25, 1e-10, ridge) {
3274 let s_h_s = s_update.dot(&h_s);
3275 let sy_tr = s_update.dot(&y_update);
3276 let denom_raw = s_h_s - sy_tr;
3277 let denom = if denom_raw <= 0.0 { 1e-16 } else { denom_raw };
3278 let theta_raw = if sy_tr < 0.2 * s_h_s {
3279 (0.8 * s_h_s) / denom
3280 } else {
3281 1.0
3282 };
3283 let theta = theta_raw.clamp(0.0, 1.0);
3284 let mut y_tilde = &y_update * theta + &h_s * (1.0 - theta);
3285 let mut sty = s_update.dot(&y_tilde);
3286 let mut y_norm = y_tilde.dot(&y_tilde).sqrt();
3287 let kappa = 1e-4;
3288 let min_curv = kappa * s_norm_tr * y_norm;
3289 if sty < min_curv {
3290 let beta = (min_curv - sty) / (s_norm_tr * s_norm_tr);
3291 y_tilde = &y_tilde + &s_update * beta;
3292 sty = s_update.dot(&y_tilde);
3293 y_norm = y_tilde.dot(&y_tilde).sqrt();
3294 }
3295 let rel = if s_norm_tr > 0.0 && y_norm > 0.0 {
3296 sty / (s_norm_tr * y_norm)
3297 } else {
3298 0.0
3299 };
3300 if !sty.is_finite() || rel < 1e-8 {
3301 update_status = "skipped";
3302 for i in 0..n {
3303 b_inv[[i, i]] *= 1.0 + 1e-3;
3304 }
3305 } else {
3306 if !apply_inverse_bfgs_update_in_place(
3307 b_inv,
3308 &s_update,
3309 &y_tilde,
3310 &mut b_inv_backup,
3311 ) {
3312 b_inv.assign(&b_inv_backup);
3313 for i in 0..n {
3314 b_inv[[i, i]] += 1e-6;
3315 }
3316 update_status = "reverted";
3317 }
3318 }
3319 if !has_finite_positive_diagonal(b_inv) {
3320 for i in 0..n {
3321 b_inv[[i, i]] += 1e-12;
3322 }
3323 }
3324 } else {
3325 self.spd_fail_seen = true;
3326 self.chol_fail_iters = self.chol_fail_iters + 1;
3327 update_status = "skipped";
3328 }
3329 if self.spd_fail_seen && self.chol_fail_iters >= 2 {
3330 let sy = s_update.dot(&y_update);
3331 let yy = y_update.dot(&y_update);
3332 let mut lambda = if yy > 0.0 { (sy / yy).abs() } else { 1.0 };
3333 lambda = lambda.clamp(1e-6, 1e6);
3334 *b_inv = scaled_identity(n, lambda);
3335 self.chol_fail_iters = 0;
3336 update_status = "reverted";
3337 }
3338 } else {
3339 update_status = "skipped";
3340 }
3341 log::info!(
3342 "[BFGS] step accepted via {:?}; inverse update {}",
3343 AcceptKind::TrustRegion,
3344 update_status
3345 );
3346 Some((x_try, f_try, g_try))
3347 }
3348
3349 fn new(x0: Array1<f64>) -> Self {
3351 Self {
3352 x0,
3353 tolerance: 1e-5,
3354 max_iterations: 100,
3355 c1: 1e-4, c2: 0.9, tau_f: 1e3,
3358 tau_g: 1e2,
3359 bounds: None,
3360 flat_step_policy: FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 },
3361 rng_state: 0xB5F0_D00D_1234_5678u64,
3362 flat_accept_streak: 0,
3363 rescue_policy: RescuePolicy::CoordinateHybrid {
3364 pool_mult: 4.0,
3365 heads: 2,
3366 },
3367 stall_policy: StallPolicy::On { window: 3 },
3368 stall_noimprove_streak: 0,
3369 curv_slack_scale: 1.0,
3370 grad_drop_factor: 0.9,
3371 tol_f_rel: 1e-8,
3372 max_no_improve: 5,
3373 no_improve_streak: 0,
3374 gll: GllWindow::new(8),
3375 c1_adapt: 1e-4,
3376 c2_adapt: 0.9,
3377 wolfe_fail_streak: 0,
3378 primary_strategy: LineSearchStrategy::StrongWolfe,
3379 trust_radius: 1.0,
3380 global_best: None,
3381 nonfinite_seen: false,
3382 wolfe_clean_successes: 0,
3383 bt_clean_successes: 0,
3384 ls_failures_in_row: 0,
3385 chol_fail_iters: 0,
3386 spd_fail_seen: false,
3387 initial_b_inv: None,
3388 initial_grad_norm: 0.0,
3389 local_mode: false,
3390 }
3391 }
3392
3393 fn apply_profile(&mut self, profile: Profile) {
3394 match profile {
3395 Profile::Robust => {
3396 self.tau_f = 1e3;
3397 self.tau_g = 1e2;
3398 self.flat_step_policy = FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 };
3399 self.rescue_policy = RescuePolicy::CoordinateHybrid {
3400 pool_mult: 4.0,
3401 heads: 2,
3402 };
3403 self.stall_policy = StallPolicy::On { window: 3 };
3404 self.curv_slack_scale = 1.0;
3405 self.tol_f_rel = 1e-8;
3406 self.max_no_improve = 5;
3407 }
3408 Profile::Deterministic => {
3409 self.tau_f = 1e2;
3410 self.tau_g = 1e2;
3411 self.flat_step_policy = FlatStepPolicy::Strict;
3412 self.rescue_policy = RescuePolicy::Off;
3413 self.stall_policy = StallPolicy::On { window: 3 };
3414 self.curv_slack_scale = 1.0;
3415 self.tol_f_rel = 1e-8;
3416 self.max_no_improve = 5;
3417 }
3418 Profile::Aggressive => {
3419 self.tau_f = 1e4;
3420 self.tau_g = 1e3;
3421 self.flat_step_policy = FlatStepPolicy::MidpointWithJiggle { scale: 1e-3 };
3422 self.rescue_policy = RescuePolicy::CoordinateHybrid {
3423 pool_mult: 6.0,
3424 heads: 4,
3425 };
3426 self.stall_policy = StallPolicy::Off;
3427 self.curv_slack_scale = 2.0;
3428 self.tol_f_rel = 1e-10;
3429 self.max_no_improve = 10;
3430 }
3431 }
3432 }
3433
3434 #[inline]
3435 fn accept_armijo(&self, f_k: f64, gk_ts: f64, f_i: f64) -> bool {
3436 let c1 = self.c1_adapt;
3437 let epsf_k = eps_f(f_k, self.tau_f);
3438 f_i <= f_k + c1 * gk_ts + epsf_k
3439 }
3440
3441 #[inline]
3442 fn accept_gll_nonmonotone(&self, fmax: f64, gk_ts: f64, f_i: f64) -> bool {
3443 !self.local_mode && {
3444 let c1 = self.c1_adapt;
3445 let epsf_max = eps_f(fmax, self.tau_f);
3446 f_i <= fmax + c1 * gk_ts + epsf_max
3447 }
3448 }
3449
3450 #[inline]
3451 fn relaxed_acceptors_enabled(&self) -> bool {
3452 !self.local_mode
3453 }
3454
3455 #[inline]
3456 fn jiggle_enabled(&self) -> bool {
3457 matches!(
3458 self.flat_step_policy,
3459 FlatStepPolicy::MidpointWithJiggle { .. }
3460 ) && !self.local_mode
3461 }
3462
3463 #[inline]
3464 fn jiggle_scale(&self) -> f64 {
3465 match self.flat_step_policy {
3466 FlatStepPolicy::MidpointWithJiggle { scale } => scale,
3467 FlatStepPolicy::Strict => 0.0,
3468 }
3469 }
3470
3471 #[inline]
3472 fn rescue_enabled(&self) -> bool {
3473 !matches!(self.rescue_policy, RescuePolicy::Off) && !self.local_mode
3474 }
3475
3476 #[inline]
3477 fn refresh_local_mode(&mut self, g_norm: f64) {
3478 let baseline = self.initial_grad_norm.max(self.tolerance).max(1e-16);
3479 let gradient_small = g_norm <= 1e-2 * baseline;
3480 let clean_successes = self.wolfe_clean_successes + self.bt_clean_successes;
3481 self.local_mode = gradient_small || clean_successes >= 5;
3482 if self.local_mode {
3483 self.primary_strategy = LineSearchStrategy::StrongWolfe;
3484 self.c1_adapt = self.c1;
3485 self.c2_adapt = self.c2;
3486 self.flat_accept_streak = 0;
3487 self.curv_slack_scale = 1.0;
3488 self.grad_drop_factor = 0.9;
3489 self.gll.set_cap(1);
3490 }
3491 }
3492
3493 fn trust_region_dogleg(
3494 &self,
3495 b_inv: &Array2<f64>,
3496 g: &Array1<f64>,
3497 delta: f64,
3498 active: Option<&[bool]>,
3499 ) -> Option<(Array1<f64>, f64)> {
3500 let n = b_inv.nrows();
3502 let active = active.unwrap_or(&[]);
3503 let use_mask = !active.is_empty();
3504 if use_mask && !any_free_variables(active) {
3505 return None;
3506 }
3507 let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3508 let ridge = (1e-10 * mean_diag).max(1e-16);
3509 let z = if use_mask {
3510 cg_solve_masked_adaptive(b_inv, g, active, 50, 1e-10, ridge)?
3511 } else {
3512 cg_solve_adaptive(b_inv, g, 50, 1e-10, ridge)?
3513 };
3514 let gnorm2 = g.dot(g);
3515 if !gnorm2.is_finite() || gnorm2 <= 0.0 {
3516 return None;
3517 }
3518 let gHg = g.dot(&z).max(1e-16);
3519 let tau = gnorm2 / gHg;
3521 let p_u = -&(g * tau);
3522 let mut h_g = Array1::<f64>::zeros(n);
3524 if use_mask {
3525 masked_hv_inplace(b_inv, g, active, &mut h_g);
3526 } else {
3527 h_g.assign(&b_inv.dot(g));
3528 }
3529 let p_b = -h_g;
3530 let p_b_norm = p_b.dot(&p_b).sqrt();
3531 if p_b_norm <= delta {
3532 let pred_dec = self.trust_region_predicted_decrease(
3533 b_inv,
3534 g,
3535 &p_b,
3536 if use_mask { Some(active) } else { None },
3537 )?;
3538 return Some((p_b, pred_dec));
3539 }
3540 let p_u_norm = p_u.dot(&p_u).sqrt();
3541 if p_u_norm >= delta {
3542 let p = -g * (delta / gnorm2.sqrt());
3543 let pred_dec = self.trust_region_predicted_decrease(
3544 b_inv,
3545 g,
3546 &p,
3547 if use_mask { Some(active) } else { None },
3548 )?;
3549 return Some((p, pred_dec));
3550 }
3551 let s = &p_b - &p_u;
3553 let a = s.dot(&s);
3554 let b = 2.0 * p_u.dot(&s);
3555 let c = p_u.dot(&p_u) - delta * delta;
3556 let disc = b * b - 4.0 * a * c;
3557 if !disc.is_finite() || disc < 0.0 {
3558 return None;
3559 }
3560 let sqrt_disc = disc.sqrt();
3561 let t1 = (-b - sqrt_disc) / (2.0 * a);
3562 let t2 = (-b + sqrt_disc) / (2.0 * a);
3563 let mut candidates: Vec<f64> = vec![];
3565 if t1.is_finite() && t1 > 0.0 && t1 < 1.0 {
3566 candidates.push(t1);
3567 }
3568 if t2.is_finite() && t2 > 0.0 && t2 < 1.0 {
3569 candidates.push(t2);
3570 }
3571 let t: f64 = if !candidates.is_empty() {
3572 candidates.into_iter().fold(1.0, f64::min)
3573 } else {
3574 0.5
3575 };
3576 let mut p = &p_u + &(s * t);
3577 let p_norm = p.dot(&p).sqrt();
3578 if p_norm.is_finite() && p_norm > delta && delta.is_finite() && delta > 0.0 {
3579 p = p * (delta / p_norm);
3580 }
3581 let pred_dec = self.trust_region_predicted_decrease(
3582 b_inv,
3583 g,
3584 &p,
3585 if use_mask { Some(active) } else { None },
3586 )?;
3587 Some((p, pred_dec))
3588 }
3589
3590 fn trust_region_predicted_decrease(
3591 &self,
3592 b_inv: &Array2<f64>,
3593 g: &Array1<f64>,
3594 s: &Array1<f64>,
3595 active: Option<&[bool]>,
3596 ) -> Option<f64> {
3597 let n = b_inv.nrows();
3598 let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
3599 let ridge = (1e-10 * mean_diag).max(1e-16);
3600 let hs = if let Some(active) = active {
3601 cg_solve_masked_adaptive(b_inv, s, active, 50, 1e-10, ridge)?
3602 } else {
3603 cg_solve_adaptive(b_inv, s, 50, 1e-10, ridge)?
3604 };
3605 let pred = g.dot(s) + 0.5 * s.dot(&hs);
3606 let pred_dec = -pred;
3607 if pred_dec.is_finite() && pred_dec > 0.0 {
3608 Some(pred_dec)
3609 } else {
3610 None
3611 }
3612 }
3613
3614 fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
3615 if let Some(bounds) = &self.bounds {
3616 bounds.project(x)
3617 } else {
3618 x.clone()
3619 }
3620 }
3621
3622 fn next_rand_sym(&mut self) -> f64 {
3624 let mut x = self.rng_state;
3625 x ^= x >> 12;
3627 x ^= x << 25;
3628 x ^= x >> 27;
3629 x = x.wrapping_mul(0x2545F4914F6CDD1Du64);
3630 self.rng_state = x;
3631 let u = ((x >> 11) as f64) * (1.0 / (1u64 << 53) as f64);
3633 2.0 * u - 1.0
3634 }
3635
3636 fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, BfgsError>
3637 where
3638 ObjFn: FirstOrderObjective,
3639 {
3640 let n = self.x0.len();
3641 let mut x_k = self.project_point(&self.x0);
3642 let mut oracle = FirstOrderCache::new(x_k.len());
3643 let mut func_evals = 0;
3644 let mut grad_evals = 0;
3645 let mut b_inv_backup = Array2::<f64>::zeros((n, n));
3646 let initial = oracle
3647 .eval_cost_grad(obj_fn, &x_k, &mut func_evals, &mut grad_evals)
3648 .map_err(|err| match err {
3649 ObjectiveEvalError::Recoverable { message }
3650 | ObjectiveEvalError::Fatal { message } => BfgsError::ObjectiveFailed { message },
3651 })?;
3652 let (mut f_k, mut g_k) = initial;
3653 if !f_k.is_finite() || g_k.iter().any(|v| !v.is_finite()) {
3654 return Err(BfgsError::GradientIsNaN);
3655 }
3656 let mut g_proj_k = self.projected_gradient(&x_k, &g_k);
3657 let mut active_mask = if let Some(bounds) = &self.bounds {
3658 bounds.active_mask(&x_k, &g_k)
3659 } else {
3660 vec![false; n]
3661 };
3662
3663 if !matches!(self.primary_strategy, LineSearchStrategy::StrongWolfe)
3664 && self.wolfe_fail_streak != 0
3665 {
3666 return Err(BfgsError::InternalInvariant {
3667 message: "primary strategy mismatch with fail streak".to_string(),
3668 });
3669 }
3670 if !self.gll.buf.is_empty() && self.gll.buf.len() > self.gll.cap {
3671 return Err(BfgsError::InternalInvariant {
3672 message: "GLL window exceeded capacity".to_string(),
3673 });
3674 }
3675 if !self.trust_radius.is_finite() {
3676 return Err(BfgsError::InternalInvariant {
3677 message: "trust radius is non-finite".to_string(),
3678 });
3679 }
3680 self.wolfe_fail_streak = 0;
3681 self.wolfe_clean_successes = 0;
3682 self.bt_clean_successes = 0;
3683 self.ls_failures_in_row = 0;
3684 self.nonfinite_seen = false;
3685 self.chol_fail_iters = 0;
3686 self.spd_fail_seen = false;
3687 self.flat_accept_streak = 0;
3688
3689 let mut b_inv = if let Some(h0) = self.initial_b_inv.clone() {
3690 if h0.nrows() == n && h0.ncols() == n && h0.iter().all(|v| v.is_finite()) {
3691 h0
3692 } else {
3693 Array2::<f64>::eye(n)
3694 }
3695 } else {
3696 Array2::<f64>::eye(n)
3697 };
3698
3699 self.gll.clear();
3701 self.gll.push(f_k);
3702 self.global_best = Some(ProbeBest::new(&x_k, f_k, &g_k));
3703 self.c1_adapt = self.c1;
3704 self.c2_adapt = self.c2;
3705 self.primary_strategy = LineSearchStrategy::StrongWolfe;
3706 self.wolfe_fail_streak = 0;
3707 let g0_norm = g_proj_k.dot(&g_proj_k).sqrt();
3709 self.initial_grad_norm = g0_norm;
3710 self.local_mode = false;
3711 let delta0 = if g0_norm.is_finite() && g0_norm > 0.0 {
3712 (10.0 / g0_norm).min(1.0)
3713 } else {
3714 1.0
3715 };
3716 self.trust_radius = delta0;
3717
3718 let mut f_last_accepted = f_k;
3719 for k in 0..self.max_iterations {
3720 self.nonfinite_seen = false;
3722 self.chol_fail_iters = 0;
3723 self.spd_fail_seen = false;
3724 g_proj_k = self.projected_gradient(&x_k, &g_k);
3725 let g_norm = g_proj_k.dot(&g_proj_k).sqrt();
3726 if !g_norm.is_finite() {
3727 log::warn!(
3728 "[BFGS] Non-finite gradient norm at iter {}: g_norm={:?}",
3729 k,
3730 g_norm
3731 );
3732 return Err(BfgsError::GradientIsNaN);
3733 }
3734 self.refresh_local_mode(g_norm);
3735 if g_norm < self.tolerance {
3736 let sol = Solution::gradient_based(
3737 x_k, f_k, g_k, g_norm, None, k, func_evals, grad_evals, 0,
3738 );
3739 log::info!(
3740 "[BFGS] Converged by gradient: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
3741 k,
3742 sol.final_value,
3743 sol.final_gradient_norm
3744 .expect("gradient-based solution must report gradient norm"),
3745 sol.func_evals,
3746 sol.grad_evals,
3747 self.trust_radius
3748 );
3749 return Ok(sol);
3750 }
3751
3752 let mut present_d_k = -b_inv.dot(&g_proj_k);
3753 if let Some(bounds) = &self.bounds {
3754 for (i, &active) in active_mask.iter().enumerate() {
3755 if active {
3756 present_d_k[i] = 0.0;
3757 }
3758 }
3759 for i in 0..present_d_k.len() {
3761 if present_d_k[i] < 0.0 && x_k[i] <= bounds.lower[i] + bounds.tol {
3762 present_d_k[i] = 0.0;
3763 }
3764 if present_d_k[i] > 0.0 && x_k[i] >= bounds.upper[i] - bounds.tol {
3765 present_d_k[i] = 0.0;
3766 }
3767 }
3768 }
3769 let gdotd = g_proj_k.dot(&present_d_k);
3771 let dnorm = present_d_k.dot(&present_d_k).sqrt();
3772 let tiny_d = dnorm <= 1e-14 * (1.0 + x_k.dot(&x_k).sqrt());
3773 let eps_dir = eps_g(&g_proj_k, &present_d_k, self.tau_g);
3774 if gdotd >= -eps_dir || tiny_d {
3775 log::warn!("[BFGS] Non-descent direction; resetting to -g and B_inv=I.");
3776 b_inv = Array2::eye(n);
3777 present_d_k = -g_proj_k.clone();
3778 if let Some(bounds) = &self.bounds {
3779 for (i, &active) in active_mask.iter().enumerate() {
3780 if active {
3781 present_d_k[i] = 0.0;
3782 }
3783 }
3784 for i in 0..present_d_k.len() {
3785 if present_d_k[i] < 0.0 && x_k[i] <= bounds.lower[i] + bounds.tol {
3786 present_d_k[i] = 0.0;
3787 }
3788 if present_d_k[i] > 0.0 && x_k[i] >= bounds.upper[i] - bounds.tol {
3789 present_d_k[i] = 0.0;
3790 }
3791 }
3792 }
3793 }
3794
3795 let active_before = active_mask.clone();
3797 let (alpha_k, mut f_next, mut g_next, f_evals, g_evals, mut accept_kind) = {
3798 let search_result = match self.primary_strategy {
3799 LineSearchStrategy::StrongWolfe => line_search(
3800 self,
3801 obj_fn,
3802 &mut oracle,
3803 &x_k,
3804 &present_d_k,
3805 f_k,
3806 &g_k,
3807 self.c1_adapt,
3808 self.c2_adapt,
3809 ),
3810 LineSearchStrategy::Backtracking => backtracking_line_search(
3811 self,
3812 obj_fn,
3813 &mut oracle,
3814 &x_k,
3815 &present_d_k,
3816 f_k,
3817 &g_k,
3818 ),
3819 };
3820
3821 match search_result {
3822 Ok(result) => {
3823 self.wolfe_fail_streak = 0;
3825 self.ls_failures_in_row = 0;
3826 if self.wolfe_clean_successes >= 2 || self.bt_clean_successes >= 2 {
3828 self.c1_adapt = self.c1;
3829 self.c2_adapt = self.c2;
3830 } else {
3831 self.c1_adapt = (self.c1_adapt * 0.9).max(self.c1);
3832 self.c2_adapt = (self.c2_adapt * 1.1).min(self.c2);
3833 }
3834 match self.primary_strategy {
3835 LineSearchStrategy::StrongWolfe => {
3836 self.wolfe_clean_successes += 1;
3837 self.bt_clean_successes = 0;
3838 if self.wolfe_clean_successes >= 3 {
3839 self.gll.set_cap(8);
3840 }
3841 }
3842 LineSearchStrategy::Backtracking => {
3843 self.bt_clean_successes += 1;
3844 self.wolfe_clean_successes = 0;
3845 }
3846 }
3847 result
3848 }
3849 Err(e) => {
3850 match e {
3852 LineSearchError::StepSizeTooSmall => {
3853 log::debug!("[BFGS] Line search failed: step size too small.");
3854 }
3855 LineSearchError::MaxAttempts(attempts) => {
3856 log::debug!(
3857 "[BFGS] Line search failed: max attempts reached ({attempts})."
3858 );
3859 }
3860 LineSearchError::ObjectiveFailed(message) => {
3861 return Err(BfgsError::ObjectiveFailed { message });
3862 }
3863 }
3864 if matches!(self.primary_strategy, LineSearchStrategy::StrongWolfe) {
3866 let streak = self.wolfe_fail_streak + 1;
3867 self.wolfe_fail_streak = streak;
3868 log::warn!(
3869 "[BFGS Adaptive] Strong Wolfe failed at iter {}. Falling back to Backtracking.",
3870 k
3871 );
3872 if streak == 1 {
3874 self.c2_adapt = 0.5;
3875 }
3876 if streak >= 2 {
3877 self.c2_adapt = 0.1;
3878 self.c1_adapt = 1e-3;
3879 }
3880 self.ls_failures_in_row += 1;
3881 if self.ls_failures_in_row >= 2 {
3882 self.gll.set_cap(10);
3883 }
3884 let fallback_result = backtracking_line_search(
3885 self,
3886 obj_fn,
3887 &mut oracle,
3888 &x_k,
3889 &present_d_k,
3890 f_k,
3891 &g_k,
3892 );
3893 if let Ok(result) = fallback_result {
3894 result
3896 } else {
3897 let (max_attempts, failure_reason) = match fallback_result {
3899 Err(LineSearchError::MaxAttempts(attempts)) => {
3900 (attempts, LineSearchFailureReason::MaxAttempts)
3901 }
3902 Err(LineSearchError::StepSizeTooSmall) => (
3903 BACKTRACKING_MAX_ATTEMPTS,
3904 LineSearchFailureReason::StepSizeTooSmall,
3905 ),
3906 Err(LineSearchError::ObjectiveFailed(message)) => {
3907 return Err(BfgsError::ObjectiveFailed { message });
3908 }
3909 Ok(_) => unreachable!(
3910 "entered fallback failure branch with Ok line-search result"
3911 ),
3912 };
3913 if let Some(b) = self.global_best.clone() {
3915 let epsF = eps_f(f_k, self.tau_f);
3916 let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
3917 let gb_proj = self.projected_gradient(&b.x, &b.g);
3918 let gb_norm = gb_proj.dot(&gb_proj).sqrt();
3919 let drop_factor = self.grad_drop_factor;
3920 if (b.f <= f_k + epsF && gb_norm <= drop_factor * gk_norm)
3921 || (b.f < f_k - epsF)
3922 {
3923 let rel_impr = (f_k - b.f).abs() / (1.0 + f_k.abs());
3924 if self.update_no_improve_streak(rel_impr)
3925 && self.stagnation_converged(&x_k, &b.x, &gb_proj)
3926 {
3927 return Ok(Solution::gradient_based(
3928 b.x.clone(),
3929 b.f,
3930 b.g.clone(),
3931 gb_norm,
3932 None,
3933 k,
3934 func_evals,
3935 grad_evals,
3936 0,
3937 ));
3938 }
3939 x_k = self.project_point(&b.x);
3940 f_k = b.f;
3941 g_k = b.g.clone();
3942 g_proj_k = gb_proj;
3943 if let Some(bounds) = &self.bounds {
3944 active_mask = bounds.active_mask(&x_k, &g_k);
3945 }
3946 for i in 0..n {
3947 b_inv[[i, i]] *= 1.0 + 1e-3;
3948 }
3949 continue;
3950 }
3951 }
3952 if let Some((x_new, f_new, g_new)) = self.try_trust_region_step(
3954 obj_fn,
3955 &mut oracle,
3956 &mut b_inv,
3957 &x_k,
3958 f_k,
3959 &g_k,
3960 &mut func_evals,
3961 &mut grad_evals,
3962 ) {
3963 let g_proj_new = self.projected_gradient(&x_new, &g_new);
3964 let rel_impr = (f_k - f_new).abs() / (1.0 + f_k.abs());
3965 if self.update_no_improve_streak(rel_impr)
3966 && self.stagnation_converged(&x_k, &x_new, &g_proj_new)
3967 {
3968 return Ok(Solution::gradient_based(
3969 x_new,
3970 f_new,
3971 g_new,
3972 g_proj_new.dot(&g_proj_new).sqrt(),
3973 None,
3974 k + 1,
3975 func_evals,
3976 grad_evals,
3977 0,
3978 ));
3979 }
3980 x_k = x_new;
3981 f_k = f_new;
3982 g_k = g_new;
3983 g_proj_k = g_proj_new;
3984 if let Some(bounds) = &self.bounds {
3985 active_mask = bounds.active_mask(&x_k, &g_k);
3986 }
3987 self.ls_failures_in_row = 0;
3988 continue;
3989 }
3990 self.trust_radius = (self.trust_radius * 0.7).max(1e-12);
3991 if self.nonfinite_seen {
3992 let mut ls = Solution::gradient_based(
3993 x_k.clone(),
3994 f_k,
3995 g_k.clone(),
3996 g_norm,
3997 None,
3998 k,
3999 func_evals,
4000 grad_evals,
4001 0,
4002 );
4003 if let Some(b) = self.global_best.as_ref()
4004 && b.f < f_k - eps_f(f_k, self.tau_f)
4005 {
4006 let gb_proj = self.projected_gradient(&b.x, &b.g);
4007 ls = Solution::gradient_based(
4008 b.x.clone(),
4009 b.f,
4010 b.g.clone(),
4011 gb_proj.dot(&gb_proj).sqrt(),
4012 None,
4013 k,
4014 func_evals,
4015 grad_evals,
4016 0,
4017 );
4018 }
4019 log::warn!(
4020 "[BFGS] Line search failed at iter {} (nonfinite seen), fe={}, ge={}, Δ={:.3e}",
4021 k,
4022 func_evals,
4023 grad_evals,
4024 self.trust_radius
4025 );
4026 return Err(BfgsError::LineSearchFailed {
4027 last_solution: Box::new(ls),
4028 max_attempts,
4029 failure_reason,
4030 });
4031 }
4032 if self.ls_failures_in_row >= 2 {
4033 let ls = Solution::gradient_based(
4034 x_k.clone(),
4035 f_k,
4036 g_k.clone(),
4037 g_norm,
4038 None,
4039 k,
4040 func_evals,
4041 grad_evals,
4042 0,
4043 );
4044 return Err(BfgsError::LineSearchFailed {
4045 last_solution: Box::new(ls),
4046 max_attempts,
4047 failure_reason,
4048 });
4049 }
4050 continue;
4051 }
4052 } else {
4053 self.ls_failures_in_row += 1;
4056 log::error!(
4057 "[BFGS Adaptive] CRITICAL: Backtracking failed at iter {}. Resetting Hessian.",
4058 k
4059 );
4060 b_inv = Array2::<f64>::eye(n);
4061 present_d_k = -g_k.clone();
4062 let fallback_result = backtracking_line_search(
4063 self,
4064 obj_fn,
4065 &mut oracle,
4066 &x_k,
4067 &present_d_k,
4068 f_k,
4069 &g_k,
4070 );
4071 if let Ok(result) = fallback_result {
4072 result
4073 } else {
4074 let (max_attempts, failure_reason) = match fallback_result {
4075 Err(LineSearchError::MaxAttempts(attempts)) => {
4076 (attempts, LineSearchFailureReason::MaxAttempts)
4077 }
4078 Err(LineSearchError::StepSizeTooSmall) => (
4079 BACKTRACKING_MAX_ATTEMPTS,
4080 LineSearchFailureReason::StepSizeTooSmall,
4081 ),
4082 Err(LineSearchError::ObjectiveFailed(message)) => {
4083 return Err(BfgsError::ObjectiveFailed { message });
4084 }
4085 Ok(_) => unreachable!(
4086 "entered fallback failure branch with Ok line-search result"
4087 ),
4088 };
4089 if let Some((x_new, f_new, g_new)) = self.try_trust_region_step(
4091 obj_fn,
4092 &mut oracle,
4093 &mut b_inv,
4094 &x_k,
4095 f_k,
4096 &g_k,
4097 &mut func_evals,
4098 &mut grad_evals,
4099 ) {
4100 let g_proj_new = self.projected_gradient(&x_new, &g_new);
4101 let rel_impr = (f_k - f_new).abs() / (1.0 + f_k.abs());
4102 if self.update_no_improve_streak(rel_impr)
4103 && self.stagnation_converged(&x_k, &x_new, &g_proj_new)
4104 {
4105 return Ok(Solution::gradient_based(
4106 x_new,
4107 f_new,
4108 g_new,
4109 g_proj_new.dot(&g_proj_new).sqrt(),
4110 None,
4111 k + 1,
4112 func_evals,
4113 grad_evals,
4114 0,
4115 ));
4116 }
4117 x_k = x_new;
4118 f_k = f_new;
4119 g_k = g_new;
4120 g_proj_k = g_proj_new;
4121 if let Some(bounds) = &self.bounds {
4122 active_mask = bounds.active_mask(&x_k, &g_k);
4123 }
4124 self.ls_failures_in_row = 0;
4125 continue;
4126 }
4127 if let Some(b) = self.global_best.clone() {
4128 let epsF = eps_f(f_k, self.tau_f);
4129 let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
4130 let gb_proj = self.projected_gradient(&b.x, &b.g);
4131 let gb_norm = gb_proj.dot(&gb_proj).sqrt();
4132 let drop_factor = self.grad_drop_factor;
4133 if (b.f <= f_k + epsF && gb_norm <= drop_factor * gk_norm)
4134 || (b.f < f_k - epsF)
4135 {
4136 let rel_impr = (f_k - b.f).abs() / (1.0 + f_k.abs());
4137 if self.update_no_improve_streak(rel_impr)
4138 && self.stagnation_converged(&x_k, &b.x, &gb_proj)
4139 {
4140 return Ok(Solution::gradient_based(
4141 b.x.clone(),
4142 b.f,
4143 b.g.clone(),
4144 gb_norm,
4145 None,
4146 k,
4147 func_evals,
4148 grad_evals,
4149 0,
4150 ));
4151 }
4152 x_k = self.project_point(&b.x);
4153 f_k = b.f;
4154 g_k = b.g.clone();
4155 g_proj_k = gb_proj;
4156 if let Some(bounds) = &self.bounds {
4157 active_mask = bounds.active_mask(&x_k, &g_k);
4158 }
4159 for i in 0..n {
4160 b_inv[[i, i]] *= 1.0 + 1e-3;
4161 }
4162 continue;
4163 }
4164 }
4165 self.trust_radius = (self.trust_radius * 0.7).max(1e-12);
4166 if self.nonfinite_seen {
4167 let mut ls = Solution::gradient_based(
4168 x_k.clone(),
4169 f_k,
4170 g_k.clone(),
4171 g_norm,
4172 None,
4173 k,
4174 func_evals,
4175 grad_evals,
4176 0,
4177 );
4178 if let Some(b) = self.global_best.as_ref()
4179 && b.f < f_k - eps_f(f_k, self.tau_f)
4180 {
4181 let b_proj = self.projected_gradient(&b.x, &b.g);
4182 ls = Solution::gradient_based(
4183 b.x.clone(),
4184 b.f,
4185 b.g.clone(),
4186 b_proj.dot(&b_proj).sqrt(),
4187 None,
4188 k,
4189 func_evals,
4190 grad_evals,
4191 0,
4192 );
4193 }
4194 log::warn!(
4195 "[BFGS] Line search failed at iter {} (nonfinite seen), fe={}, ge={}, Δ={:.3e}",
4196 k,
4197 func_evals,
4198 grad_evals,
4199 self.trust_radius
4200 );
4201 return Err(BfgsError::LineSearchFailed {
4202 last_solution: Box::new(ls),
4203 max_attempts,
4204 failure_reason,
4205 });
4206 }
4207 if self.ls_failures_in_row >= 2 {
4208 let ls = Solution::gradient_based(
4209 x_k.clone(),
4210 f_k,
4211 g_k.clone(),
4212 g_norm,
4213 None,
4214 k,
4215 func_evals,
4216 grad_evals,
4217 0,
4218 );
4219 return Err(BfgsError::LineSearchFailed {
4220 last_solution: Box::new(ls),
4221 max_attempts,
4222 failure_reason,
4223 });
4224 }
4225 continue;
4226 }
4227 }
4228 }
4229 }
4230 };
4231
4232 let mut s_override: Option<Array1<f64>> = None;
4234 let mut rescued = false;
4235 if self.rescue_enabled() {
4236 let epsF_iter = eps_f(f_k, self.tau_f);
4237 let flat_now = (f_next - f_k).abs() <= epsF_iter;
4238 if flat_now && self.flat_accept_streak >= 2 {
4239 let x_base = self.project_point(&(&x_k + &(alpha_k * &present_d_k)));
4240 let g_proj_base = self.projected_gradient(&x_base, &g_next);
4241 let gnext_norm0 = g_proj_base.iter().map(|v| v * v).sum::<f64>().sqrt();
4242 let delta = self.trust_radius;
4243 let eta = (0.2 * delta).min(1.0 / (1.0 + gnext_norm0));
4244 if eta.is_finite() && eta > 0.0 {
4245 let n = x_k.len();
4246 let mut best_x = None;
4247 let mut best_f = f_next;
4248 let mut best_g = g_next.clone();
4249 let k = n.min(8);
4251 let mut idx: Vec<usize> = (0..n).collect();
4252 idx.sort_by(|&i, &j| {
4253 g_next[i]
4254 .abs()
4255 .partial_cmp(&g_next[j].abs())
4256 .unwrap_or(std::cmp::Ordering::Equal)
4257 .reverse()
4258 });
4259 let (use_hybrid, pool_mult, rescue_heads) = match self.rescue_policy {
4260 RescuePolicy::Off => (false, 1.0, 0),
4261 RescuePolicy::CoordinateHybrid { pool_mult, heads } => {
4262 (true, pool_mult, heads)
4263 }
4264 };
4265 let m = (pool_mult * (k as f64)).round() as usize;
4266 let m = m.min(n).max(k);
4267 let heads = rescue_heads.min(k).min(m);
4268 let mut chosen: Vec<usize> = Vec::new();
4269 for &i in idx.iter().take(heads) {
4271 chosen.push(i);
4272 }
4273 if use_hybrid {
4274 let mut pool: Vec<usize> =
4276 idx.iter().cloned().skip(heads).take(m - heads).collect();
4277 while chosen.len() < k && !pool.is_empty() {
4278 let r = (self.rng_state >> 1) as usize;
4280 let t = r % pool.len();
4281 let pick = pool.swap_remove(t);
4282 chosen.push(pick);
4283 let _ = self.next_rand_sym();
4285 }
4286 } else {
4287 for &i in idx.iter().skip(heads).take(k - heads) {
4288 chosen.push(i);
4289 }
4290 }
4291 for &i in &chosen {
4292 for &sgn in &[-1.0, 1.0] {
4293 let mut x_try = x_base.clone();
4294 x_try[i] += sgn * eta; x_try = self.project_point(&x_try);
4296 let (f_try, g_try) = match bfgs_eval_cost_grad(
4297 &mut oracle,
4298 obj_fn,
4299 &x_try,
4300 &mut func_evals,
4301 &mut grad_evals,
4302 ) {
4303 Ok(sample) => sample,
4304 Err(ObjectiveEvalError::Recoverable { .. }) => continue,
4305 Err(ObjectiveEvalError::Fatal { message }) => {
4306 return Err(BfgsError::ObjectiveFailed { message });
4307 }
4308 };
4309 if !f_try.is_finite() || g_try.iter().any(|v| !v.is_finite()) {
4310 continue;
4311 }
4312 let g_proj_try = self.projected_gradient(&x_try, &g_try);
4313 let g_try_norm = g_proj_try.dot(&g_proj_try).sqrt();
4314 let f_thresh = f_k.min(f_next) + epsF_iter;
4315 let s_trial = &x_try - &x_k;
4316 let descent_ok = g_proj_k.dot(&s_trial)
4317 <= -eps_g(&g_proj_k, &s_trial, self.tau_g);
4318 let f_ok = f_try <= f_thresh;
4319 let g_ok = g_try_norm <= self.grad_drop_factor * gnext_norm0;
4320 if (f_ok || g_ok) && descent_ok && f_try <= best_f {
4321 best_f = f_try;
4322 best_x = Some(x_try.clone());
4323 best_g = g_try.clone();
4324 }
4325 }
4326 }
4327 if let Some(xb) = best_x {
4328 let mut s_tmp = &xb - &x_k;
4330 let s_norm = s_tmp.dot(&s_tmp).sqrt();
4331 let delta = self.trust_radius;
4332 if s_norm.is_finite()
4333 && s_norm > delta
4334 && delta.is_finite()
4335 && delta > 0.0
4336 {
4337 let scale = delta / s_norm;
4338 let x_scaled = &x_k + &(s_tmp.mapv(|v| v * scale));
4339 let x_scaled = self.project_point(&x_scaled);
4340 let (f_s, g_s) = match bfgs_eval_cost_grad(
4341 &mut oracle,
4342 obj_fn,
4343 &x_scaled,
4344 &mut func_evals,
4345 &mut grad_evals,
4346 ) {
4347 Ok(sample) => sample,
4348 Err(ObjectiveEvalError::Recoverable { .. }) => {
4349 (f64::NAN, Array1::zeros(x_scaled.len()))
4350 }
4351 Err(ObjectiveEvalError::Fatal { message }) => {
4352 return Err(BfgsError::ObjectiveFailed { message });
4353 }
4354 };
4355 if f_s.is_finite() && g_s.iter().all(|v| v.is_finite()) {
4356 s_tmp = &x_scaled - &x_k;
4357 f_next = f_s;
4358 g_next = g_s;
4359 } else {
4360 f_next = best_f;
4362 g_next = best_g.clone();
4363 }
4364 } else {
4365 f_next = best_f;
4366 g_next = best_g.clone();
4367 }
4368 s_override = Some(s_tmp);
4369 rescued = true;
4370 accept_kind = AcceptKind::Rescue;
4371 self.flat_accept_streak = 0;
4372 }
4373 }
4374 }
4375 }
4376
4377 if self.wolfe_fail_streak >= Self::FALLBACK_THRESHOLD {
4379 log::warn!(
4380 "[BFGS Adaptive] Fallback streak ({}) reached. Switching primary to Backtracking.",
4381 self.wolfe_fail_streak
4382 );
4383 self.primary_strategy = LineSearchStrategy::Backtracking;
4384 self.wolfe_fail_streak = 0;
4385 }
4386 if matches!(self.primary_strategy, LineSearchStrategy::Backtracking)
4388 && self.bt_clean_successes >= 3
4389 && self.wolfe_fail_streak == 0
4390 {
4391 log::info!(
4392 "[BFGS Adaptive] Backtracking succeeded cleanly ({} iters); switching back to StrongWolfe.",
4393 self.bt_clean_successes
4394 );
4395 self.primary_strategy = LineSearchStrategy::StrongWolfe;
4396 self.bt_clean_successes = 0;
4397 self.gll.set_cap(8);
4398 }
4399
4400 func_evals += f_evals;
4401 grad_evals += g_evals;
4402
4403 let mut s_k = if let Some(ref s) = s_override {
4404 s.clone()
4405 } else {
4406 alpha_k * &present_d_k
4407 };
4408 let x_next = self.project_point(&(x_k.clone() + &s_k));
4409 s_k = &x_next - &x_k;
4410 let g_proj_next = self.projected_gradient(&x_next, &g_next);
4411 let active_after = if let Some(bounds) = &self.bounds {
4412 bounds.active_mask(&x_next, &g_next)
4413 } else {
4414 vec![false; n]
4415 };
4416 let step_len = s_k.dot(&s_k).sqrt();
4417 if step_len.is_finite() && step_len > 0.0 {
4418 if step_len >= 0.9 * self.trust_radius {
4419 self.trust_radius = (self.trust_radius * 1.5).min(1e6);
4420 } else {
4421 self.trust_radius = (self.trust_radius * 1.1).min(1e6);
4422 }
4423 }
4424
4425 let rel_impr = (f_last_accepted - f_next).abs() / (1.0 + f_last_accepted.abs());
4426 if self.update_no_improve_streak(rel_impr)
4427 && self.stagnation_converged(&x_k, &x_next, &g_proj_next)
4428 {
4429 return Ok(Solution::gradient_based(
4430 x_next.clone(),
4431 f_next,
4432 g_next.clone(),
4433 g_proj_next.dot(&g_proj_next).sqrt(),
4434 None,
4435 k + 1,
4436 func_evals,
4437 grad_evals,
4438 0,
4439 ));
4440 }
4441
4442 let f_ok_flat = (f_next - f_k).abs() <= eps_f(f_k, self.tau_f)
4444 || (f_next - f_k).abs() <= self.tol_f_rel * (1.0 + f_k.abs());
4445 if f_ok_flat {
4446 self.flat_accept_streak += 1;
4447 } else {
4448 self.flat_accept_streak = 0;
4449 }
4450 if self.flat_accept_streak >= 2 {
4451 self.curv_slack_scale = (self.curv_slack_scale * 0.5).max(0.1);
4452 self.grad_drop_factor = 0.95;
4453 } else {
4454 self.curv_slack_scale = 1.0;
4455 self.grad_drop_factor = 0.9;
4456 }
4457
4458 let mut y_k = &g_next - &g_k;
4459
4460 if self.bounds.is_some() {
4461 for i in 0..n {
4462 let tiny_step = s_k[i].abs() <= 1e-14 * (1.0 + x_k[i].abs());
4463 if (active_before[i] && active_after[i]) || tiny_step {
4464 s_k[i] = 0.0;
4465 y_k[i] = 0.0;
4466 }
4467 }
4468 }
4469
4470 let sy = s_k.dot(&y_k);
4472 let mut update_status = "applied";
4473
4474 if k == 0 {
4475 let yy = y_k.dot(&y_k);
4477 let mut scale = if sy > 1e-12 && yy > 0.0 { sy / yy } else { 1.0 };
4478 if !scale.is_finite() {
4479 scale = 1.0;
4480 }
4481 scale = scale.clamp(1e-3, 1e3);
4482 b_inv = Array2::eye(n) * scale;
4483 }
4484
4485 let s_norm = s_k.dot(&s_k).sqrt();
4487 if s_norm > 1e-14 {
4488 if !rescued {
4489 let mean_diag = (0..n).map(|i| b_inv[[i, i]].abs()).sum::<f64>() / (n as f64);
4491 let ridge = (1e-10 * mean_diag).max(1e-16);
4492 if let Some(h_s) = cg_solve_adaptive(&b_inv, &s_k, 25, 1e-10, ridge) {
4493 let s_h_s = s_k.dot(&h_s);
4494 let denom_raw = s_h_s - sy;
4495 let denom = if denom_raw <= 0.0 { 1e-16 } else { denom_raw };
4496 let theta_raw = if sy < 0.2 * s_h_s {
4498 (0.8 * s_h_s) / denom
4499 } else {
4500 1.0
4501 };
4502 let theta = theta_raw.clamp(0.0, 1.0);
4503 let mut y_tilde = &y_k * theta + &h_s * (1.0 - theta);
4504 let mut sty = s_k.dot(&y_tilde);
4505 let mut y_norm = y_tilde.dot(&y_tilde).sqrt();
4506 let s_norm2 = s_norm * s_norm;
4507 let kappa = 1e-4;
4508 let min_curv = kappa * s_norm * y_norm;
4509 if sty < min_curv {
4510 let beta = (min_curv - sty) / s_norm2;
4511 y_tilde = &y_tilde + &s_k * beta;
4512 sty = s_k.dot(&y_tilde);
4513 y_norm = y_tilde.dot(&y_tilde).sqrt();
4514 }
4515 let rel = if s_norm > 0.0 && y_norm > 0.0 {
4516 sty / (s_norm * y_norm)
4517 } else {
4518 0.0
4519 };
4520 if !sty.is_finite() || rel < 1e-8 {
4521 log::warn!(
4522 "[BFGS] s^T y_tilde non-positive/tiny; skipping update and inflating diag."
4523 );
4524 update_status = "skipped";
4525 self.chol_fail_iters = self.chol_fail_iters + 1;
4526 for i in 0..n {
4527 b_inv[[i, i]] *= 1.0 + 1e-3;
4528 }
4529 } else {
4530 if !apply_inverse_bfgs_update_in_place(
4531 &mut b_inv,
4532 &s_k,
4533 &y_tilde,
4534 &mut b_inv_backup,
4535 ) {
4536 b_inv.assign(&b_inv_backup);
4537 for i in 0..n {
4538 b_inv[[i, i]] += 1e-6;
4539 }
4540 update_status = "reverted";
4541 }
4542 }
4543 } else {
4544 self.chol_fail_iters = self.chol_fail_iters + 1;
4545 self.spd_fail_seen = true;
4546 log::warn!("[BFGS] B_inv not SPD after ridge; skipping update this iter.");
4547 update_status = "skipped";
4548 }
4549 } else {
4550 log::info!("[BFGS] Coordinate rescue used; skipping inverse update this iter.");
4551 update_status = "skipped";
4552 }
4553
4554 for i in 0..n {
4556 for j in (i + 1)..n {
4557 let a = b_inv[[i, j]];
4558 let b = b_inv[[j, i]];
4559 let v = 0.5 * (a + b);
4560 b_inv[[i, j]] = v;
4561 b_inv[[j, i]] = v;
4562 }
4563 }
4564 let mut diag_min = f64::INFINITY;
4565 for i in 0..n {
4566 diag_min = diag_min.min(b_inv[[i, i]]);
4567 }
4568 if !diag_min.is_finite() || diag_min <= 0.0 {
4569 let mut trace = 0.0;
4570 for i in 0..n {
4571 trace += b_inv[[i, i]].abs();
4572 }
4573 let delta = 1e-12 * trace.max(1.0);
4574 for i in 0..n {
4575 b_inv[[i, i]] += delta;
4576 }
4577 }
4578
4579 if self.spd_fail_seen && self.chol_fail_iters >= 2 {
4580 let sy = s_k.dot(&y_k);
4581 let yy = y_k.dot(&y_k);
4582 let mut lambda = if yy > 0.0 { (sy / yy).abs() } else { 1.0 };
4583 lambda = lambda.clamp(1e-6, 1e6);
4584 b_inv = scaled_identity(n, lambda);
4585 self.chol_fail_iters = 0;
4586 update_status = "reverted";
4587 }
4588 } else {
4589 update_status = "skipped";
4590 }
4591
4592 log::info!(
4593 "[BFGS] step accepted via {:?}; inverse update {}",
4594 accept_kind,
4595 update_status
4596 );
4597
4598 let step_ok = self.feasible_step_small(&x_k, &x_next);
4600 let f_ok = (f_next - f_k).abs() <= eps_f(f_k, self.tau_f);
4601 let gnext_finite = f_next.is_finite() && g_next.iter().all(|v| v.is_finite());
4602 let gnext_norm = g_proj_next.dot(&g_proj_next).sqrt();
4603 if step_ok && f_ok && gnext_finite && gnext_norm < self.tolerance {
4604 let sol = Solution::gradient_based(
4605 x_next.clone(),
4606 f_next,
4607 g_next.clone(),
4608 gnext_norm,
4609 None,
4610 k + 1,
4611 func_evals,
4612 grad_evals,
4613 0,
4614 );
4615 log::info!(
4616 "[BFGS] Converged by small step/flat f: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
4617 sol.iterations,
4618 sol.final_value,
4619 sol.final_gradient_norm
4620 .expect("gradient-based solution must report gradient norm"),
4621 sol.func_evals,
4622 sol.grad_evals,
4623 self.trust_radius
4624 );
4625 return Ok(sol);
4626 }
4627
4628 if let StallPolicy::On { window } = self.stall_policy {
4630 let g_inf = g_proj_k.iter().fold(0.0, |acc, &v| f64::max(acc, v.abs()));
4631 let x_inf = x_k.iter().fold(0.0, |acc, &v| f64::max(acc, v.abs()));
4632 let rel_g_ok = g_inf <= self.tolerance * (1.0 + x_inf);
4633 let rel_f_ok = (f_k - f_last_accepted).abs() <= eps_f(f_last_accepted, self.tau_f);
4634 if rel_g_ok && rel_f_ok {
4635 self.stall_noimprove_streak += 1;
4636 } else {
4637 self.stall_noimprove_streak = 0;
4638 }
4639 if self.stall_noimprove_streak >= window {
4640 let sol = Solution::gradient_based(
4641 x_k.clone(),
4642 f_k,
4643 g_k.clone(),
4644 g_inf,
4645 None,
4646 k + 1,
4647 func_evals,
4648 grad_evals,
4649 0,
4650 );
4651 log::info!(
4652 "[BFGS] Converged (flat/stalled): iters={}, f={:.6e}, ||g||={:.3e}",
4653 sol.iterations,
4654 sol.final_value,
4655 sol.final_gradient_norm
4656 .expect("gradient-based solution must report gradient norm")
4657 );
4658 return Ok(sol);
4659 }
4660 }
4661
4662 x_k = x_next;
4663 f_k = f_next;
4664 g_k = g_next;
4665 g_proj_k = g_proj_next;
4666 active_mask = active_after;
4667 self.gll.push(f_k);
4669 f_last_accepted = f_k;
4670 let maybe_f = self.global_best.as_ref().map(|b| b.f);
4671 match maybe_f {
4672 Some(bf) => {
4673 if f_k < bf - eps_f(bf, self.tau_f) {
4674 self.global_best = Some(ProbeBest {
4675 f: f_k,
4676 x: x_k.clone(),
4677 g: g_k.clone(),
4678 });
4679 }
4680 }
4681 None => {
4682 self.global_best = Some(ProbeBest::new(&x_k, f_k, &g_k));
4683 }
4684 }
4685
4686 }
4690
4691 let final_g_norm = g_proj_k.dot(&g_proj_k).sqrt();
4693 let last_solution = Box::new(Solution::gradient_based(
4694 x_k,
4695 f_k,
4696 g_k,
4697 final_g_norm,
4698 None,
4699 self.max_iterations,
4700 func_evals,
4701 grad_evals,
4702 0,
4703 ));
4704 log::warn!(
4705 "[BFGS] Max iterations reached: iters={}, f={:.6e}, ||g||={:.3e}, fe={}, ge={}, Δ={:.3e}",
4706 self.max_iterations,
4707 last_solution.final_value,
4708 last_solution
4709 .final_gradient_norm
4710 .expect("gradient-based solution must report gradient norm"),
4711 last_solution.func_evals,
4712 last_solution.grad_evals,
4713 self.trust_radius
4714 );
4715 Err(BfgsError::MaxIterationsReached { last_solution })
4716 }
4717}
4718
4719impl<ObjFn> Bfgs<ObjFn>
4720where
4721 ObjFn: FirstOrderObjective,
4722{
4723 pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4729 Self {
4730 core: BfgsCore::new(x0),
4731 obj_fn,
4732 }
4733 }
4734
4735 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4737 self.core.tolerance = tolerance.get();
4738 self
4739 }
4740
4741 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4743 self.core.max_iterations = max_iterations.get();
4744 self
4745 }
4746
4747 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4751 self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4752 self.core.bounds = Some(bounds.spec);
4753 self
4754 }
4755
4756 pub fn with_profile(mut self, profile: Profile) -> Self {
4757 self.core.apply_profile(profile);
4758 self
4759 }
4760
4761 pub fn run(&mut self) -> Result<Solution, BfgsError> {
4764 self.core.run(&mut self.obj_fn)
4765 }
4766
4767 #[cfg(test)]
4768 fn next_rand_sym(&mut self) -> f64 {
4769 self.core.next_rand_sym()
4770 }
4771}
4772
4773impl<ObjFn> NewtonTrustRegion<ObjFn>
4774where
4775 ObjFn: SecondOrderObjective,
4776{
4777 pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4783 Self {
4784 core: NewtonTrustRegionCore::new(x0),
4785 obj_fn,
4786 }
4787 }
4788
4789 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4791 self.core.tolerance = tolerance.get();
4792 self
4793 }
4794
4795 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4797 self.core.max_iterations = max_iterations.get();
4798 self
4799 }
4800
4801 pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
4802 self.core.fd_hessian_step = fd_hessian_step;
4803 self
4804 }
4805
4806 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4808 self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4809 self.core.bounds = Some(bounds.spec);
4810 self
4811 }
4812
4813 pub fn with_profile(mut self, profile: Profile) -> Self {
4814 self.core.apply_profile(profile);
4815 self
4816 }
4817
4818 pub fn run(&mut self) -> Result<Solution, NewtonTrustRegionError> {
4820 self.core.run(&mut self.obj_fn)
4821 }
4822}
4823
4824impl<ObjFn> Arc<ObjFn>
4825where
4826 ObjFn: SecondOrderObjective,
4827{
4828 pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4834 Self {
4835 core: ArcCore::new(x0),
4836 obj_fn,
4837 }
4838 }
4839
4840 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
4842 self.core.tolerance = tolerance.get();
4843 self
4844 }
4845
4846 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
4848 self.core.max_iterations = max_iterations.get();
4849 self
4850 }
4851
4852 pub fn with_fd_hessian_step(mut self, fd_hessian_step: f64) -> Self {
4853 self.core.fd_hessian_step = fd_hessian_step;
4854 self
4855 }
4856
4857 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
4859 self.obj_fn.set_finite_difference_bounds(Some(&bounds));
4860 self.core.bounds = Some(bounds.spec);
4861 self
4862 }
4863
4864 pub fn with_profile(mut self, profile: Profile) -> Self {
4865 self.core.apply_profile(profile);
4866 self
4867 }
4868
4869 pub fn run(&mut self) -> Result<Solution, ArcError> {
4877 self.core.run(&mut self.obj_fn)
4878 }
4879}
4880
4881#[derive(Debug, thiserror::Error)]
4882pub enum FixedPointError {
4883 #[error("Objective evaluation failed: {message}")]
4884 ObjectiveFailed { message: String },
4885 #[error("Fixed-point objective returned a step with length {got}; expected {expected}")]
4886 StepDimensionMismatch { expected: usize, got: usize },
4887 #[error("Fixed-point objective returned a non-finite step")]
4888 NonFiniteStep,
4889 #[error(
4890 "Maximum number of iterations reached without converging. The best solution found is returned."
4891 )]
4892 MaxIterationsReached { last_solution: Box<Solution> },
4893}
4894
4895struct FixedPointCore {
4896 x0: Array1<f64>,
4897 tolerance: f64,
4898 max_iterations: usize,
4899 bounds: Option<BoxSpec>,
4900}
4901
4902impl FixedPointCore {
4903 fn new(x0: Array1<f64>) -> Self {
4904 Self {
4905 x0,
4906 tolerance: 1e-5,
4907 max_iterations: 100,
4908 bounds: None,
4909 }
4910 }
4911
4912 fn project_point(&self, x: &Array1<f64>) -> Array1<f64> {
4913 if let Some(bounds) = &self.bounds {
4914 bounds.project(x)
4915 } else {
4916 x.clone()
4917 }
4918 }
4919
4920 fn run<ObjFn>(&mut self, obj_fn: &mut ObjFn) -> Result<Solution, FixedPointError>
4921 where
4922 ObjFn: FixedPointObjective,
4923 {
4924 let mut x_k = self.project_point(&self.x0);
4925 let mut func_evals = 0usize;
4926 let mut last_value = f64::INFINITY;
4927 let mut last_step_norm = 0.0;
4928 for k in 0..self.max_iterations {
4929 let sample = match obj_fn.eval_step(&x_k) {
4930 Ok(sample) => sample,
4931 Err(ObjectiveEvalError::Recoverable { message })
4932 | Err(ObjectiveEvalError::Fatal { message }) => {
4933 return Err(FixedPointError::ObjectiveFailed { message });
4934 }
4935 };
4936 func_evals += 1;
4937 let value = recover_on_nonfinite_cost(sample.value).map_err(|err| match err {
4938 ObjectiveEvalError::Recoverable { message }
4939 | ObjectiveEvalError::Fatal { message } => {
4940 FixedPointError::ObjectiveFailed { message }
4941 }
4942 })?;
4943 if sample.step.len() != x_k.len() {
4944 return Err(FixedPointError::StepDimensionMismatch {
4945 expected: x_k.len(),
4946 got: sample.step.len(),
4947 });
4948 }
4949 if sample.step.iter().any(|value| !value.is_finite()) {
4950 return Err(FixedPointError::NonFiniteStep);
4951 }
4952 if matches!(sample.status, FixedPointStatus::Stop) {
4953 return Ok(Solution::fixed_point(x_k, value, 0.0, k, func_evals));
4954 }
4955 let x_next = self.project_point(&(&x_k + &sample.step));
4956 let applied_step = &x_next - &x_k;
4957 let step_norm = applied_step.dot(&applied_step).sqrt();
4958 if !step_norm.is_finite() {
4959 return Err(FixedPointError::NonFiniteStep);
4960 }
4961 last_value = value;
4962 last_step_norm = step_norm;
4963 x_k = x_next;
4964 if step_norm <= self.tolerance {
4965 return Ok(Solution::fixed_point(
4966 x_k,
4967 value,
4968 step_norm,
4969 k + 1,
4970 func_evals,
4971 ));
4972 }
4973 }
4974 Err(FixedPointError::MaxIterationsReached {
4975 last_solution: Box::new(Solution::fixed_point(
4976 x_k,
4977 last_value,
4978 last_step_norm,
4979 self.max_iterations,
4980 func_evals,
4981 )),
4982 })
4983 }
4984}
4985
4986pub struct FixedPoint<ObjFn> {
4987 core: FixedPointCore,
4988 obj_fn: ObjFn,
4989}
4990
4991impl<ObjFn> FixedPoint<ObjFn>
4992where
4993 ObjFn: FixedPointObjective,
4994{
4995 pub fn new(x0: Array1<f64>, obj_fn: ObjFn) -> Self {
4996 Self {
4997 core: FixedPointCore::new(x0),
4998 obj_fn,
4999 }
5000 }
5001
5002 pub fn with_tolerance(mut self, tolerance: Tolerance) -> Self {
5003 self.core.tolerance = tolerance.get();
5004 self
5005 }
5006
5007 pub fn with_max_iterations(mut self, max_iterations: MaxIterations) -> Self {
5008 self.core.max_iterations = max_iterations.get();
5009 self
5010 }
5011
5012 pub fn with_bounds(mut self, bounds: Bounds) -> Self {
5013 self.core.bounds = Some(bounds.spec);
5014 self
5015 }
5016
5017 pub fn run(&mut self) -> Result<Solution, FixedPointError> {
5018 self.core.run(&mut self.obj_fn)
5019 }
5020}
5021
5022#[allow(clippy::too_many_arguments)]
5026fn line_search<ObjFn>(
5027 core: &mut BfgsCore,
5028 obj_fn: &mut ObjFn,
5029 oracle: &mut FirstOrderCache,
5030 x_k: &Array1<f64>,
5031 d_k: &Array1<f64>,
5032 f_k: f64,
5033 g_k: &Array1<f64>,
5034 c1: f64,
5035 c2: f64,
5036) -> LsResult
5037where
5038 ObjFn: FirstOrderObjective,
5039{
5040 let mut alpha_i: f64 = 1.0; let mut alpha_prev = 0.0;
5042
5043 let mut f_prev = f_k;
5044 let g_proj_k = core.projected_gradient(x_k, g_k);
5045 let g_k_dot_d = g_proj_k.dot(d_k); if g_k_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5047 log::warn!(
5048 "[BFGS Wolfe] Non-descent direction detected (gᵀd = {:.2e} >= 0).",
5049 g_k_dot_d
5050 );
5051 }
5052 let mut g_prev_dot_d = g_k_dot_d;
5053
5054 let max_attempts = WOLFE_MAX_ATTEMPTS;
5055 let mut func_evals = 0;
5056 let mut grad_evals = 0;
5057 let epsF = eps_f(f_k, core.tau_f);
5058 let mut best = ProbeBest::new(x_k, f_k, g_k);
5059 for _ in 0..max_attempts {
5060 let (x_new, s, kinked) = core.project_with_step(x_k, d_k, alpha_i);
5061 let step_ok = !core.projected_step_small(x_k, &s);
5062 if !step_ok {
5063 return Err(LineSearchError::StepSizeTooSmall);
5064 }
5065 let mut f_i = match bfgs_eval_cost(oracle, obj_fn, &x_new, &mut func_evals) {
5066 Ok(f) => f,
5067 Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5068 Err(ObjectiveEvalError::Fatal { message }) => {
5069 return Err(LineSearchError::ObjectiveFailed(message));
5070 }
5071 };
5072
5073 if !f_i.is_finite() {
5075 core.nonfinite_seen = true;
5076 if alpha_prev == 0.0 {
5077 alpha_i *= 0.5;
5078 } else {
5079 alpha_i = 0.5 * (alpha_prev + alpha_i);
5080 }
5081 if alpha_i <= 1e-18 {
5082 if let Some((a, f, g, kind)) = probe_alphas(
5083 core,
5084 obj_fn,
5085 oracle,
5086 x_k,
5087 d_k,
5088 f_k,
5089 g_k,
5090 0.0,
5091 alpha_i.max(f64::EPSILON),
5092 core.tau_g,
5093 core.grad_drop_factor,
5094 &mut func_evals,
5095 &mut grad_evals,
5096 ) {
5097 return Ok((a, f, g, func_evals, grad_evals, kind));
5098 }
5099 return Err(LineSearchError::StepSizeTooSmall);
5100 }
5101 if func_evals >= 3 {
5103 return Err(LineSearchError::MaxAttempts(max_attempts));
5104 }
5105 continue;
5106 }
5107
5108 let gkTs = g_proj_k.dot(&s);
5110 let armijo_strict = f_i > f_k + c1 * gkTs + epsF;
5111 let prev_worse = func_evals > 1 && f_i >= f_prev - epsF;
5112 if armijo_strict || prev_worse {
5113 let kink_lo = if alpha_prev > 0.0 {
5114 let (_, _, kink_prev) = core.project_with_step(x_k, d_k, alpha_prev);
5115 kink_prev
5116 } else {
5117 false
5118 };
5119 if kink_lo || kinked {
5120 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5121 return fallback.map(|(a, f, g, fe, ge, kind)| {
5122 (a, f, g, fe + func_evals, ge + grad_evals, kind)
5123 });
5124 }
5125 let r = zoom(
5126 core,
5127 obj_fn,
5128 oracle,
5129 x_k,
5130 d_k,
5131 f_k,
5132 g_k,
5133 &g_proj_k,
5134 g_k_dot_d,
5135 c1,
5136 c2,
5137 alpha_prev,
5138 alpha_i,
5139 f_prev,
5140 f_i,
5141 g_prev_dot_d,
5142 f64::NAN,
5143 func_evals,
5144 grad_evals,
5145 );
5146 if r.is_err() {
5147 if best.f.is_finite() {
5148 core.global_best = Some(best.clone());
5149 }
5150 }
5151 return r;
5152 }
5153
5154 let (f_full, g_i) =
5155 match bfgs_eval_cost_grad(oracle, obj_fn, &x_new, &mut func_evals, &mut grad_evals) {
5156 Ok(sample) => sample,
5157 Err(ObjectiveEvalError::Recoverable { .. }) => {
5158 core.nonfinite_seen = true;
5159 if alpha_prev == 0.0 {
5160 alpha_i *= 0.5;
5161 } else {
5162 alpha_i = 0.5 * (alpha_prev + alpha_i);
5163 }
5164 if alpha_i <= 1e-18 {
5165 return Err(LineSearchError::StepSizeTooSmall);
5166 }
5167 continue;
5168 }
5169 Err(ObjectiveEvalError::Fatal { message }) => {
5170 return Err(LineSearchError::ObjectiveFailed(message));
5171 }
5172 };
5173 f_i = f_full;
5174 if !f_i.is_finite() || g_i.iter().any(|v| !v.is_finite()) {
5175 core.nonfinite_seen = true;
5176 if alpha_prev == 0.0 {
5177 alpha_i *= 0.5;
5178 } else {
5179 alpha_i = 0.5 * (alpha_prev + alpha_i);
5180 }
5181 if alpha_i <= 1e-18 {
5182 return Err(LineSearchError::StepSizeTooSmall);
5183 }
5184 continue;
5185 }
5186 best.consider(&x_new, f_i, &g_i);
5187
5188 let armijo_strict = f_i > f_k + c1 * gkTs + epsF;
5189 let prev_worse = func_evals > 1 && f_i >= f_prev - epsF;
5190 if armijo_strict || prev_worse {
5191 let kink_lo = if alpha_prev > 0.0 {
5192 let (_, _, kink_prev) = core.project_with_step(x_k, d_k, alpha_prev);
5193 kink_prev
5194 } else {
5195 false
5196 };
5197 if kink_lo || kinked {
5198 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5199 return fallback.map(|(a, f, g, fe, ge, kind)| {
5200 (a, f, g, fe + func_evals, ge + grad_evals, kind)
5201 });
5202 }
5203 let g_proj_i = core.projected_gradient(&x_new, &g_i);
5204 let g_i_dot_d = directional_derivative(&g_proj_i, &s, alpha_i, d_k);
5205 let r = zoom(
5206 core,
5207 obj_fn,
5208 oracle,
5209 x_k,
5210 d_k,
5211 f_k,
5212 g_k,
5213 &g_proj_k,
5214 g_k_dot_d,
5215 c1,
5216 c2,
5217 alpha_prev,
5218 alpha_i,
5219 f_prev,
5220 f_i,
5221 g_prev_dot_d,
5222 g_i_dot_d,
5223 func_evals,
5224 grad_evals,
5225 );
5226 if r.is_err() && best.f.is_finite() {
5227 core.global_best = Some(best.clone());
5228 }
5229 return r;
5230 }
5231
5232 let g_proj_i = core.projected_gradient(&x_new, &g_i);
5233 let g_i_dot_d = directional_derivative(&g_proj_i, &s, alpha_i, d_k);
5234 let g_k_dot_eff = directional_derivative(&g_proj_k, &s, alpha_i, d_k);
5235 let gi_norm = g_proj_i.dot(&g_proj_i).sqrt();
5236 let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
5237 let drop_factor = core.grad_drop_factor;
5238 let fmax = if core.gll.is_empty() {
5239 f_k
5240 } else {
5241 core.gll.fmax()
5242 };
5243 let epsG = eps_g(&g_proj_k, d_k, core.tau_g);
5244 if let Some(kind) = classify_line_search_accept(
5245 core,
5246 step_ok,
5247 f_k,
5248 fmax,
5249 f_i,
5250 gkTs,
5251 g_i_dot_d,
5252 g_k_dot_eff,
5253 gi_norm,
5254 gk_norm,
5255 drop_factor,
5256 epsF,
5257 epsG,
5258 c2,
5259 ) {
5260 if matches!(kind, AcceptKind::StrongWolfe) {
5261 let delta_now = core.trust_radius;
5262 core.trust_radius = (delta_now * 1.25).min(1e6);
5263 }
5264 return Ok((alpha_i, f_i, g_i, func_evals, grad_evals, kind));
5265 }
5266
5267 if g_i_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5268 let r = zoom(
5271 core,
5272 obj_fn,
5273 oracle,
5274 x_k,
5275 d_k,
5276 f_k,
5277 g_k,
5278 &g_proj_k,
5279 g_k_dot_d,
5280 c1,
5281 c2,
5282 alpha_i,
5283 alpha_prev,
5284 f_i,
5285 f_prev,
5286 g_i_dot_d,
5287 g_prev_dot_d,
5288 func_evals,
5289 grad_evals,
5290 );
5291 if r.is_err() {
5292 if best.f.is_finite() {
5293 core.global_best = Some(best.clone());
5294 }
5295 }
5296 return r;
5297 }
5298
5299 alpha_prev = alpha_i;
5301 f_prev = f_i;
5302 g_prev_dot_d = g_i_dot_d;
5303 alpha_i *= 2.0;
5305 }
5306
5307 if best.f.is_finite() {
5308 core.global_best = Some(best);
5309 }
5310 if alpha_i > 0.0
5312 && let Some((a, f, g, kind)) = probe_alphas(
5313 core,
5314 obj_fn,
5315 oracle,
5316 x_k,
5317 d_k,
5318 f_k,
5319 g_k,
5320 0.0,
5321 alpha_i,
5322 core.tau_g,
5323 core.grad_drop_factor,
5324 &mut func_evals,
5325 &mut grad_evals,
5326 )
5327 {
5328 return Ok((a, f, g, func_evals, grad_evals, kind));
5329 }
5330 Err(LineSearchError::MaxAttempts(max_attempts))
5331}
5332
5333fn backtracking_line_search<ObjFn>(
5335 core: &mut BfgsCore,
5336 obj_fn: &mut ObjFn,
5337 oracle: &mut FirstOrderCache,
5338 x_k: &Array1<f64>,
5339 d_k: &Array1<f64>,
5340 f_k: f64,
5341 g_k: &Array1<f64>,
5342) -> LsResult
5343where
5344 ObjFn: FirstOrderObjective,
5345{
5346 let mut alpha: f64 = 1.0;
5347 let mut rho = 0.5;
5348 let max_attempts = BACKTRACKING_MAX_ATTEMPTS;
5349
5350 let g_proj_k = core.projected_gradient(x_k, g_k);
5351 let g_k_dot_d = g_proj_k.dot(d_k);
5352 if g_k_dot_d >= -eps_g(&g_proj_k, d_k, core.tau_g) {
5354 log::warn!(
5355 "[BFGS Backtracking] Search started with a non-descent direction (gᵀd = {:.2e} > 0). This step will likely fail.",
5356 g_k_dot_d
5357 );
5358 }
5359
5360 let mut func_evals = 0;
5361 let mut grad_evals = 0;
5362 let mut best = ProbeBest::new(x_k, f_k, g_k);
5363 let epsF = eps_f(f_k, core.tau_f);
5364 let mut no_change_count = 0usize;
5365 let mut expanded_once = false;
5366 let dnorm = d_k.dot(d_k).sqrt();
5367 for _ in 0..max_attempts {
5368 let (x_new, s, _) = core.project_with_step(x_k, d_k, alpha);
5369 let step_ok = !core.projected_step_small(x_k, &s);
5370 if !step_ok {
5371 return Err(LineSearchError::StepSizeTooSmall);
5372 }
5373 let mut f_new = match bfgs_eval_cost(oracle, obj_fn, &x_new, &mut func_evals) {
5374 Ok(f) => f,
5375 Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5376 Err(ObjectiveEvalError::Fatal { message }) => {
5377 return Err(LineSearchError::ObjectiveFailed(message));
5378 }
5379 };
5380
5381 if !f_new.is_finite() {
5383 core.nonfinite_seen = true;
5384 alpha *= rho;
5385 if alpha < 1e-16 {
5386 return Err(LineSearchError::StepSizeTooSmall);
5387 }
5388 if func_evals >= 3 {
5389 return Err(LineSearchError::MaxAttempts(max_attempts));
5390 }
5391 continue;
5392 }
5393
5394 let gkTs = g_proj_k.dot(&s);
5395 let fmax = if core.gll.is_empty() {
5396 f_k
5397 } else {
5398 core.gll.fmax()
5399 };
5400 let armijo_accept = core.accept_armijo(f_k, gkTs, f_new);
5401 let gll_accept = core.accept_gll_nonmonotone(fmax, gkTs, f_new);
5402 let candidate_for_gradient = armijo_accept
5403 || gll_accept
5404 || (core.relaxed_acceptors_enabled() && f_new <= f_k + epsF);
5405 let mut g_new_opt = None;
5406 if candidate_for_gradient {
5407 let (f_full, g_new) =
5408 match bfgs_eval_cost_grad(oracle, obj_fn, &x_new, &mut func_evals, &mut grad_evals)
5409 {
5410 Ok(sample) => sample,
5411 Err(ObjectiveEvalError::Recoverable { .. }) => {
5412 core.nonfinite_seen = true;
5413 alpha *= rho;
5414 if alpha < 1e-16 {
5415 return Err(LineSearchError::StepSizeTooSmall);
5416 }
5417 continue;
5418 }
5419 Err(ObjectiveEvalError::Fatal { message }) => {
5420 return Err(LineSearchError::ObjectiveFailed(message));
5421 }
5422 };
5423 f_new = f_full;
5424 if !f_new.is_finite() || g_new.iter().any(|v| !v.is_finite()) {
5425 core.nonfinite_seen = true;
5426 alpha *= rho;
5427 if alpha < 1e-16 {
5428 return Err(LineSearchError::StepSizeTooSmall);
5429 }
5430 continue;
5431 }
5432 best.consider(&x_new, f_new, &g_new);
5433 g_new_opt = Some(g_new);
5434 }
5435
5436 let Some(g_new) = g_new_opt else {
5437 if (f_new - f_k).abs() <= epsF {
5438 no_change_count += 1;
5439 } else {
5440 no_change_count = 0;
5441 expanded_once = false;
5442 }
5443 if no_change_count >= 3 {
5444 rho = 0.8;
5445 }
5446 if no_change_count >= 2 && !expanded_once {
5447 alpha /= rho;
5448 expanded_once = true;
5449 } else {
5450 alpha *= rho;
5451 }
5452 if core.jiggle_enabled() && no_change_count >= 2 {
5453 let jiggle = 1.0 + core.jiggle_scale() * core.next_rand_sym();
5454 alpha = (alpha * jiggle).max(f64::EPSILON);
5455 }
5456 let tol_x = core.step_tolerance(x_k);
5457 if (alpha * dnorm) <= tol_x {
5458 return Err(LineSearchError::StepSizeTooSmall);
5459 }
5460 continue;
5461 };
5462
5463 let g_proj_new = core.projected_gradient(&x_new, &g_new);
5465 let gk_dot_eff = directional_derivative(&g_proj_k, &s, alpha, d_k);
5466 let gnew_norm = g_proj_new.dot(&g_proj_new).sqrt();
5467 let gk_norm = g_proj_k.dot(&g_proj_k).sqrt();
5468 let drop_factor = core.grad_drop_factor;
5469 let g_new_dot_d = directional_derivative(&g_proj_new, &s, alpha, d_k);
5470 let epsG = eps_g(&g_proj_k, d_k, core.tau_g);
5471 if let Some(kind) = classify_line_search_accept(
5472 core,
5473 step_ok,
5474 f_k,
5475 fmax,
5476 f_new,
5477 gkTs,
5478 g_new_dot_d,
5479 gk_dot_eff,
5480 gnew_norm,
5481 gk_norm,
5482 drop_factor,
5483 epsF,
5484 epsG,
5485 core.c2_adapt,
5486 ) {
5487 return Ok((alpha, f_new, g_new, func_evals, grad_evals, kind));
5488 }
5489
5490 if (f_new - f_k).abs() <= epsF {
5491 no_change_count += 1;
5492 } else {
5493 no_change_count = 0;
5494 expanded_once = false;
5495 }
5496 if no_change_count >= 3 {
5497 rho = 0.8;
5498 }
5499 if no_change_count >= 2 && !expanded_once {
5500 alpha /= rho; expanded_once = true;
5503 } else {
5504 alpha *= rho;
5505 }
5506 if core.jiggle_enabled() && no_change_count >= 2 {
5508 let jiggle = 1.0 + core.jiggle_scale() * core.next_rand_sym();
5509 alpha = (alpha * jiggle).max(f64::EPSILON);
5510 }
5511 let tol_x = core.step_tolerance(x_k);
5513 if (alpha * dnorm) <= tol_x {
5514 return Err(LineSearchError::StepSizeTooSmall);
5515 }
5516 }
5517
5518 if alpha > 0.0
5520 && let Some((a, f, g, kind)) = probe_alphas(
5521 core,
5522 obj_fn,
5523 oracle,
5524 x_k,
5525 d_k,
5526 f_k,
5527 g_k,
5528 0.0,
5529 alpha,
5530 core.tau_g,
5531 core.grad_drop_factor,
5532 &mut func_evals,
5533 &mut grad_evals,
5534 )
5535 {
5536 return Ok((a, f, g, func_evals, grad_evals, kind));
5537 }
5538
5539 if best.f.is_finite() {
5541 core.global_best = Some(best);
5542 }
5543 Err(LineSearchError::MaxAttempts(max_attempts))
5544}
5545
5546#[allow(clippy::too_many_arguments)]
5552fn zoom<ObjFn>(
5553 core: &mut BfgsCore,
5554 obj_fn: &mut ObjFn,
5555 oracle: &mut FirstOrderCache,
5556 x_k: &Array1<f64>,
5557 d_k: &Array1<f64>,
5558 f_k: f64,
5559 g_k: &Array1<f64>,
5560 g_proj_k: &Array1<f64>,
5561 _g_k_dot_d: f64,
5562 c1: f64,
5563 c2: f64,
5564 mut alpha_lo: f64,
5565 mut alpha_hi: f64,
5566 mut f_lo: f64,
5567 mut f_hi: f64,
5568 mut g_lo_dot_d: f64,
5569 mut g_hi_dot_d: f64,
5570 mut func_evals: usize,
5571 mut grad_evals: usize,
5572) -> LsResult
5573where
5574 ObjFn: FirstOrderObjective,
5575{
5576 let max_zoom_attempts = 15;
5577 let min_alpha_step = 1e-12; let epsF = eps_f(f_k, core.tau_f);
5579 let mut best = ProbeBest::new(x_k, f_k, g_k);
5580 let mut lo_deriv_known = g_lo_dot_d.is_finite();
5581 let mut hi_deriv_known = g_hi_dot_d.is_finite();
5582 for _ in 0..max_zoom_attempts {
5583 let kink_lo = if alpha_lo > 0.0 {
5584 let (_, _, kink) = core.project_with_step(x_k, d_k, alpha_lo);
5585 kink
5586 } else {
5587 false
5588 };
5589 let kink_hi = if alpha_hi > 0.0 {
5590 let (_, _, kink) = core.project_with_step(x_k, d_k, alpha_hi);
5591 kink
5592 } else {
5593 false
5594 };
5595 if kink_lo || kink_hi {
5596 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5597 return fallback
5598 .map(|(a, f, g, fe, ge, kind)| (a, f, g, fe + func_evals, ge + grad_evals, kind));
5599 }
5600 let tiny_bracket = (alpha_hi - alpha_lo).abs() <= 1e-12;
5601 let flat_f = (f_hi - f_lo).abs() <= epsF;
5602 let similar_slope = lo_deriv_known
5603 && hi_deriv_known
5604 && (g_hi_dot_d.abs() - g_lo_dot_d.abs()).abs()
5605 <= core.curv_slack_scale * eps_g(g_proj_k, d_k, core.tau_g);
5606 if tiny_bracket || (flat_f && !similar_slope) {
5608 let (mut alpha_j, choose_lo) = match (lo_deriv_known, hi_deriv_known) {
5609 (true, true) => {
5610 if g_lo_dot_d.abs() <= g_hi_dot_d.abs() {
5611 (alpha_lo, true)
5612 } else {
5613 (alpha_hi, false)
5614 }
5615 }
5616 (true, false) => (alpha_lo, true),
5617 (false, true) => (alpha_hi, false),
5618 (false, false) => ((alpha_lo + alpha_hi) / 2.0, false),
5619 };
5620 if alpha_j <= f64::EPSILON {
5622 alpha_j = if choose_lo { alpha_hi } else { alpha_lo };
5623 }
5624 if alpha_j <= f64::EPSILON {
5625 alpha_j = 0.5 * (alpha_lo + alpha_hi);
5626 }
5627 let (x_j, s_j, kink_mid) = core.project_with_step(x_k, d_k, alpha_j);
5628 let step_ok = !core.projected_step_small(x_k, &s_j);
5629 if !step_ok {
5630 return Err(LineSearchError::StepSizeTooSmall);
5631 }
5632 if kink_mid {
5633 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5634 return fallback.map(|(a, f, g, fe, ge, kind)| {
5635 (a, f, g, fe + func_evals, ge + grad_evals, kind)
5636 });
5637 }
5638 let (f_j, g_j) =
5639 match bfgs_eval_cost_grad(oracle, obj_fn, &x_j, &mut func_evals, &mut grad_evals) {
5640 Ok(sample) => sample,
5641 Err(ObjectiveEvalError::Recoverable { .. }) => {
5642 (f64::NAN, Array1::zeros(x_j.len()))
5643 }
5644 Err(ObjectiveEvalError::Fatal { message }) => {
5645 return Err(LineSearchError::ObjectiveFailed(message));
5646 }
5647 };
5648 if !f_j.is_finite() || g_j.iter().any(|&v| !v.is_finite()) {
5649 core.nonfinite_seen = true;
5650 if choose_lo {
5651 alpha_lo = 0.5 * (alpha_lo + alpha_hi);
5652 lo_deriv_known = false;
5653 } else {
5654 alpha_hi = 0.5 * (alpha_lo + alpha_hi);
5655 hi_deriv_known = false;
5656 }
5657 continue;
5658 }
5659 let g_proj_j = core.projected_gradient(&x_j, &g_j);
5661 let gkTs = g_proj_k.dot(&s_j);
5662 let gk_dot_d_eff = directional_derivative(g_proj_k, &s_j, alpha_j, d_k);
5663 let g_j_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5664 let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5665 let gj_norm = g_proj_j.iter().map(|v| v * v).sum::<f64>().sqrt();
5666 let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
5667 let drop_factor = core.grad_drop_factor;
5668 let fmax = if core.gll.is_empty() {
5669 f_k
5670 } else {
5671 core.gll.fmax()
5672 };
5673 if let Some(kind) = classify_line_search_accept(
5674 core,
5675 step_ok,
5676 f_k,
5677 fmax,
5678 f_j,
5679 gkTs,
5680 g_j_dot_d,
5681 gk_dot_d_eff,
5682 gj_norm,
5683 gk_norm,
5684 drop_factor,
5685 epsF,
5686 epsG,
5687 c2,
5688 ) {
5689 return Ok((alpha_j, f_j, g_j, func_evals, grad_evals, kind));
5690 } else {
5691 let mid = 0.5 * (alpha_lo + alpha_hi);
5693 if alpha_j > mid {
5694 alpha_hi = alpha_j;
5695 f_hi = f_j;
5696 g_hi_dot_d = g_j_dot_d;
5697 hi_deriv_known = true;
5698 } else {
5699 alpha_lo = alpha_j;
5700 f_lo = f_j;
5701 g_lo_dot_d = g_j_dot_d;
5702 lo_deriv_known = true;
5703 }
5704 continue;
5705 }
5706 }
5707 if flat_f && similar_slope {
5708 let alpha_mid = 0.5 * (alpha_lo + alpha_hi);
5709 let (x_mid, s_mid, kink_mid) = core.project_with_step(x_k, d_k, alpha_mid);
5710 let step_ok = !core.projected_step_small(x_k, &s_mid);
5711 if !step_ok {
5712 return Err(LineSearchError::StepSizeTooSmall);
5713 }
5714 if kink_mid {
5715 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5716 return fallback.map(|(a, f, g, fe, ge, kind)| {
5717 (a, f, g, fe + func_evals, ge + grad_evals, kind)
5718 });
5719 }
5720 let (f_mid, g_mid) =
5721 match bfgs_eval_cost_grad(oracle, obj_fn, &x_mid, &mut func_evals, &mut grad_evals)
5722 {
5723 Ok(sample) => sample,
5724 Err(ObjectiveEvalError::Recoverable { .. }) => {
5725 core.nonfinite_seen = true;
5726 let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5727 if tighten_lo {
5728 alpha_lo = alpha_mid;
5729 lo_deriv_known = false;
5730 } else {
5731 alpha_hi = alpha_mid;
5732 hi_deriv_known = false;
5733 }
5734 continue;
5735 }
5736 Err(ObjectiveEvalError::Fatal { message }) => {
5737 return Err(LineSearchError::ObjectiveFailed(message));
5738 }
5739 };
5740 if f_mid.is_finite() && g_mid.iter().all(|v| v.is_finite()) {
5741 let g_proj_mid = core.projected_gradient(&x_mid, &g_mid);
5743 let g_mid_dot_d = directional_derivative(&g_proj_mid, &s_mid, alpha_mid, d_k);
5744 let gkTs = g_proj_k.dot(&s_mid);
5745 let gk_dot_d_eff = directional_derivative(g_proj_k, &s_mid, alpha_mid, d_k);
5746 let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5747 let gmid_norm = g_proj_mid.iter().map(|v| v * v).sum::<f64>().sqrt();
5748 let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
5749 let drop_factor = core.grad_drop_factor;
5750 let fmax = if core.gll.is_empty() {
5751 f_k
5752 } else {
5753 core.gll.fmax()
5754 };
5755 if let Some(kind) = classify_line_search_accept(
5756 core,
5757 step_ok,
5758 f_k,
5759 fmax,
5760 f_mid,
5761 gkTs,
5762 g_mid_dot_d,
5763 gk_dot_d_eff,
5764 gmid_norm,
5765 gk_norm,
5766 drop_factor,
5767 epsF,
5768 epsG,
5769 c2,
5770 ) {
5771 return Ok((alpha_mid, f_mid, g_mid, func_evals, grad_evals, kind));
5772 }
5773 let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5774 if tighten_lo {
5775 alpha_lo = alpha_mid;
5776 f_lo = f_mid;
5777 g_lo_dot_d = g_mid_dot_d;
5778 lo_deriv_known = true;
5779 } else {
5780 alpha_hi = alpha_mid;
5781 f_hi = f_mid;
5782 g_hi_dot_d = g_mid_dot_d;
5783 hi_deriv_known = true;
5784 }
5785 continue;
5786 } else {
5787 core.nonfinite_seen = true;
5788 let tighten_lo = g_lo_dot_d.abs() > g_hi_dot_d.abs();
5789 if tighten_lo {
5790 alpha_lo = alpha_mid;
5791 lo_deriv_known = false;
5792 } else {
5793 alpha_hi = alpha_mid;
5794 hi_deriv_known = false;
5795 }
5796 continue;
5797 }
5798 }
5799 if !f_lo.is_finite() && !f_hi.is_finite() {
5802 log::warn!("[BFGS Zoom] Line search bracketed an infinite region. Aborting.");
5803 return Err(LineSearchError::MaxAttempts(max_zoom_attempts));
5804 }
5805 let alpha_j = {
5806 let (alpha_lo_i, alpha_hi_i, f_lo_i, f_hi_i, g_lo_i, g_hi_i) = if alpha_lo <= alpha_hi {
5807 (alpha_lo, alpha_hi, f_lo, f_hi, g_lo_dot_d, g_hi_dot_d)
5808 } else {
5809 (alpha_hi, alpha_lo, f_hi, f_lo, g_hi_dot_d, g_lo_dot_d)
5810 };
5811
5812 let alpha_diff = alpha_hi_i - alpha_lo_i;
5813
5814 if alpha_diff < min_alpha_step
5817 || !f_lo_i.is_finite()
5818 || !f_hi_i.is_finite()
5819 || !lo_deriv_known
5820 || !hi_deriv_known
5821 {
5822 (alpha_lo + alpha_hi) / 2.0
5823 } else {
5824 let d1 = g_lo_i + g_hi_i - 3.0 * (f_hi_i - f_lo_i) / alpha_diff;
5828 let d2_sq = d1 * d1 - g_lo_i * g_hi_i;
5829
5830 if d2_sq >= 0.0 && d2_sq.is_finite() {
5831 let d2 = d2_sq.sqrt();
5832 let trial =
5833 alpha_hi_i - alpha_diff * (g_hi_i + d2 - d1) / (g_hi_i - g_lo_i + 2.0 * d2);
5834
5835 if !trial.is_finite() || trial < alpha_lo_i || trial > alpha_hi_i {
5838 (alpha_lo + alpha_hi) / 2.0
5839 } else {
5840 trial
5841 }
5842 } else {
5843 (alpha_lo + alpha_hi) / 2.0
5844 }
5845 }
5846 };
5847
5848 let alpha_j = if (alpha_j - alpha_lo).abs() < min_alpha_step
5850 || (alpha_j - alpha_hi).abs() < min_alpha_step
5851 {
5852 (alpha_lo + alpha_hi) / 2.0
5853 } else {
5854 alpha_j
5855 };
5856
5857 let (x_j, s_j, kink_j) = core.project_with_step(x_k, d_k, alpha_j);
5858 let step_ok = !core.projected_step_small(x_k, &s_j);
5859 if !step_ok {
5860 return Err(LineSearchError::StepSizeTooSmall);
5861 }
5862 if kink_j {
5863 let fallback = backtracking_line_search(core, obj_fn, oracle, x_k, d_k, f_k, g_k);
5864 return fallback
5865 .map(|(a, f, g, fe, ge, kind)| (a, f, g, fe + func_evals, ge + grad_evals, kind));
5866 }
5867 let mut f_j = match bfgs_eval_cost(oracle, obj_fn, &x_j, &mut func_evals) {
5868 Ok(f) => f,
5869 Err(ObjectiveEvalError::Recoverable { .. }) => f64::NAN,
5870 Err(ObjectiveEvalError::Fatal { message }) => {
5871 return Err(LineSearchError::ObjectiveFailed(message));
5872 }
5873 };
5874
5875 if !f_j.is_finite() {
5877 core.nonfinite_seen = true;
5878 let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5880 if to_hi {
5881 alpha_hi = alpha_j;
5882 f_hi = f_j;
5883 hi_deriv_known = false;
5884 } else {
5885 alpha_lo = alpha_j;
5886 f_lo = f_j;
5887 lo_deriv_known = false;
5888 }
5889 continue;
5890 }
5891
5892 let fmax = if core.gll.is_empty() {
5895 f_k
5896 } else {
5897 core.gll.fmax()
5898 };
5899 let gkTs = g_proj_k.dot(&s_j);
5900 let gk_dot_d_eff = directional_derivative(g_proj_k, &s_j, alpha_j, d_k);
5901 let armijo_ok = f_j <= f_k + c1 * gkTs + epsF;
5902 let armijo_gll_ok = f_j <= fmax + c1 * gkTs + epsF;
5903 if (!armijo_ok && !armijo_gll_ok) || f_j >= f_lo - epsF {
5904 alpha_hi = alpha_j;
5905 f_hi = f_j;
5906 hi_deriv_known = false;
5907 } else {
5908 let (f_full, g_j) =
5909 match bfgs_eval_cost_grad(oracle, obj_fn, &x_j, &mut func_evals, &mut grad_evals) {
5910 Ok(sample) => sample,
5911 Err(ObjectiveEvalError::Recoverable { .. }) => {
5912 core.nonfinite_seen = true;
5913 let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5914 if to_hi {
5915 alpha_hi = alpha_j;
5916 f_hi = f64::NAN;
5917 hi_deriv_known = false;
5918 } else {
5919 alpha_lo = alpha_j;
5920 f_lo = f64::NAN;
5921 lo_deriv_known = false;
5922 }
5923 continue;
5924 }
5925 Err(ObjectiveEvalError::Fatal { message }) => {
5926 return Err(LineSearchError::ObjectiveFailed(message));
5927 }
5928 };
5929 f_j = f_full;
5930 if !f_j.is_finite() || g_j.iter().any(|&v| !v.is_finite()) {
5931 core.nonfinite_seen = true;
5932 let to_hi = (alpha_hi - alpha_j).abs() <= (alpha_j - alpha_lo).abs();
5933 if to_hi {
5934 alpha_hi = alpha_j;
5935 f_hi = f_j;
5936 hi_deriv_known = false;
5937 } else {
5938 alpha_lo = alpha_j;
5939 f_lo = f_j;
5940 lo_deriv_known = false;
5941 }
5942 continue;
5943 }
5944 best.consider(&x_j, f_j, &g_j);
5945 let armijo_ok = f_j <= f_k + c1 * gkTs + epsF;
5946 let armijo_gll_ok = f_j <= fmax + c1 * gkTs + epsF;
5947 if (!armijo_ok && !armijo_gll_ok) || f_j >= f_lo - epsF {
5948 alpha_hi = alpha_j;
5949 f_hi = f_j;
5950 let g_proj_j = core.projected_gradient(&x_j, &g_j);
5951 g_hi_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5952 hi_deriv_known = true;
5953 continue;
5954 }
5955
5956 let g_proj_j = core.projected_gradient(&x_j, &g_j);
5957 let g_j_dot_d = directional_derivative(&g_proj_j, &s_j, alpha_j, d_k);
5958 let gj_norm = g_proj_j.dot(&g_proj_j).sqrt();
5959 let gk_norm = g_proj_k.dot(g_proj_k).sqrt();
5960 let drop_factor = core.grad_drop_factor;
5961 let epsG = eps_g(g_proj_k, d_k, core.tau_g);
5962 if let Some(kind) = classify_line_search_accept(
5963 core,
5964 step_ok,
5965 f_k,
5966 fmax,
5967 f_j,
5968 gkTs,
5969 g_j_dot_d,
5970 gk_dot_d_eff,
5971 gj_norm,
5972 gk_norm,
5973 drop_factor,
5974 epsF,
5975 epsG,
5976 c2,
5977 ) {
5978 return Ok((alpha_j, f_j, g_j, func_evals, grad_evals, kind));
5979 }
5980
5981 if g_j_dot_d >= -eps_g(g_proj_k, d_k, core.tau_g) {
5984 alpha_hi = alpha_lo;
5987 f_hi = f_lo;
5988 g_hi_dot_d = g_lo_dot_d;
5989 hi_deriv_known = lo_deriv_known;
5990
5991 alpha_lo = alpha_j;
5992 f_lo = f_j;
5993 g_lo_dot_d = g_j_dot_d;
5994 lo_deriv_known = true;
5995 } else {
5996 alpha_lo = alpha_j;
5999 f_lo = f_j;
6000 g_lo_dot_d = g_j_dot_d;
6001 lo_deriv_known = true;
6002 }
6003 }
6004 }
6005 if let Some((a, f, g, kind)) = probe_alphas(
6007 core,
6008 obj_fn,
6009 oracle,
6010 x_k,
6011 d_k,
6012 f_k,
6013 g_k,
6014 alpha_lo.min(alpha_hi),
6015 alpha_lo.max(alpha_hi),
6016 core.tau_g,
6017 core.grad_drop_factor,
6018 &mut func_evals,
6019 &mut grad_evals,
6020 ) {
6021 return Ok((a, f, g, func_evals, grad_evals, kind));
6022 }
6023 if best.f.is_finite() {
6024 core.global_best = Some(best);
6025 }
6026 Err(LineSearchError::MaxAttempts(max_zoom_attempts))
6027}
6028
6029#[allow(clippy::too_many_arguments)]
6030fn probe_alphas<ObjFn>(
6031 core: &mut BfgsCore,
6032 obj_fn: &mut ObjFn,
6033 oracle: &mut FirstOrderCache,
6034 x_k: &Array1<f64>,
6035 d_k: &Array1<f64>,
6036 f_k: f64,
6037 g_k: &Array1<f64>,
6038 a_lo: f64,
6039 a_hi: f64,
6040 tau_g: f64,
6041 drop_factor: f64,
6042 fe: &mut usize,
6043 ge: &mut usize,
6044) -> Option<(f64, f64, Array1<f64>, AcceptKind)>
6045where
6046 ObjFn: FirstOrderObjective,
6047{
6048 let cands = [0.2, 0.5, 0.8].map(|t| a_lo + t * (a_hi - a_lo));
6049 let g_proj_k = core.projected_gradient(x_k, g_k);
6050 let gk_norm = g_proj_k.iter().map(|v| v * v).sum::<f64>().sqrt();
6051 let epsF = eps_f(f_k, core.tau_f);
6052 let epsG = eps_g(&g_proj_k, d_k, tau_g);
6053 let mut best: Option<(f64, f64, Array1<f64>, AcceptKind)> = None;
6054 for &a in &cands {
6055 if !a.is_finite() || a <= 0.0 {
6056 continue;
6057 }
6058 let (x, s, _) = core.project_with_step(x_k, d_k, a);
6059 let step_ok = !core.projected_step_small(x_k, &s);
6060 if !step_ok {
6061 continue;
6062 }
6063 let f = match bfgs_eval_cost(oracle, obj_fn, &x, fe) {
6064 Ok(f) => f,
6065 Err(_) => continue,
6066 };
6067 if !f.is_finite() {
6068 continue;
6069 }
6070 let gkTs = g_proj_k.dot(&s);
6071 let (f, g) = match bfgs_eval_cost_grad(oracle, obj_fn, &x, fe, ge) {
6072 Ok(sample) => sample,
6073 Err(_) => continue,
6074 };
6075 if !f.is_finite() || g.iter().any(|v| !v.is_finite()) {
6076 continue;
6077 }
6078 let g_proj = core.projected_gradient(&x, &g);
6079 let gi_norm = g_proj.dot(&g_proj).sqrt();
6080 let g_trial_dot_d = directional_derivative(&g_proj, &s, a, d_k);
6081 let gk_dot_d_eff = directional_derivative(&g_proj_k, &s, a, d_k);
6082 let fmax = if core.gll.is_empty() {
6083 f_k
6084 } else {
6085 core.gll.fmax()
6086 };
6087 if let Some(kind) = classify_line_search_accept(
6088 core,
6089 step_ok,
6090 f_k,
6091 fmax,
6092 f,
6093 gkTs,
6094 g_trial_dot_d,
6095 gk_dot_d_eff,
6096 gi_norm,
6097 gk_norm,
6098 drop_factor,
6099 epsF,
6100 epsG,
6101 core.c2_adapt,
6102 ) && best.as_ref().map(|(fb, _, _, _)| f < *fb).unwrap_or(true)
6103 {
6104 best = Some((f, a, g, kind));
6105 }
6106 }
6107 best.map(|(f, a, g, kind)| (a, f, g, kind))
6108}
6109
6110#[cfg(test)]
6111mod tests {
6112 use super::{
6123 ArcError, AutoSecondOrderSolver, BACKTRACKING_MAX_ATTEMPTS, Bfgs, BfgsError, Bounds,
6124 FiniteDiffGradient, FirstOrderObjective, FirstOrderSample, FixedPoint, FixedPointObjective,
6125 FixedPointSample, FixedPointStatus, LineSearchFailureReason, MaxIterations,
6126 NewtonTrustRegion, ObjectiveEvalError, Problem, Profile, SecondOrderObjective,
6127 SecondOrderProblem, SecondOrderSample, Solution, Tolerance, ZerothOrderObjective, optimize,
6128 };
6129 use ndarray::{Array1, Array2, array};
6130 use spectral::prelude::*;
6131
6132 use std::path::{Path, PathBuf};
6134 use std::process::Command;
6135 use std::sync::OnceLock;
6136 use std::sync::{Arc, Mutex};
6137
6138 #[derive(serde::Deserialize)]
6139 struct PythonOptResult {
6140 success: bool,
6141 final_point: Option<Vec<f64>>,
6142 final_value: Option<f64>,
6143 final_gradient_norm: Option<f64>,
6144 iterations: Option<usize>,
6145 func_evals: Option<usize>,
6146 grad_evals: Option<usize>,
6147 message: Option<String>,
6148 error: Option<String>,
6149 }
6150
6151 fn optimize_with_python(
6153 x0: &Array1<f64>,
6154 function_name: &str,
6155 tolerance: f64,
6156 max_iterations: usize,
6157 ) -> Result<PythonOptResult, String> {
6158 let python = ensure_python_deps()?;
6159 let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
6160 let harness = crate_dir.join("optimization_harness.py");
6161 let input_json = serde_json::json!({
6162 "x0": x0.to_vec(),
6163 "function": function_name,
6164 "tolerance": tolerance,
6165 "max_iterations": max_iterations
6166 });
6167
6168 let output = Command::new(python)
6169 .arg(&harness)
6170 .arg(input_json.to_string())
6171 .current_dir(&crate_dir)
6172 .output()
6173 .map_err(|e| format!("Failed to execute Python script: {}", e))?;
6174
6175 if !output.status.success() {
6176 return Err(format!(
6177 "Python script failed: {}",
6178 String::from_utf8_lossy(&output.stderr)
6179 ));
6180 }
6181
6182 let result_str = String::from_utf8(output.stdout)
6183 .map_err(|e| format!("Invalid UTF-8 in Python output: {}", e))?;
6184
6185 serde_json::from_str(&result_str)
6186 .map_err(|e| format!("Failed to parse Python result: {}", e))
6187 }
6188
6189 fn ensure_python_deps() -> Result<String, String> {
6190 static PYTHON_PATH: OnceLock<Result<String, String>> = OnceLock::new();
6191 PYTHON_PATH
6192 .get_or_init(|| {
6193 let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
6194 let venv_python = crate_dir.join(".venv/bin/python");
6195 let venv_python_str = venv_python.display().to_string();
6196 let python = if Path::new(&venv_python).exists() {
6197 venv_python_str.clone()
6198 } else {
6199 "python3".to_string()
6200 };
6201
6202 let check = Command::new(&python)
6203 .arg("-c")
6204 .arg("import numpy, scipy")
6205 .output()
6206 .map_err(|e| format!("Failed to execute Python: {}", e))?;
6207
6208 if check.status.success() {
6209 return Ok(python);
6210 }
6211
6212 if python != venv_python_str {
6213 let venv = Command::new("python3")
6214 .arg("-m")
6215 .arg("venv")
6216 .arg(crate_dir.join(".venv"))
6217 .current_dir(&crate_dir)
6218 .output()
6219 .map_err(|e| format!("Failed to create venv: {}", e))?;
6220 if !venv.status.success() {
6221 return Err(format!(
6222 "Failed to create venv: {}",
6223 String::from_utf8_lossy(&venv.stderr)
6224 ));
6225 }
6226 }
6227
6228 let install = Command::new(&venv_python)
6229 .arg("-m")
6230 .arg("pip")
6231 .arg("install")
6232 .arg("numpy")
6233 .arg("scipy")
6234 .current_dir(&crate_dir)
6235 .output()
6236 .map_err(|e| format!("Failed to install numpy/scipy: {}", e))?;
6237 if !install.status.success() {
6238 return Err(format!(
6239 "Failed to install numpy/scipy: {}",
6240 String::from_utf8_lossy(&install.stderr)
6241 ));
6242 }
6243
6244 Ok(venv_python_str)
6245 })
6246 .clone()
6247 }
6248
6249 fn quadratic(x: &Array1<f64>) -> (f64, Array1<f64>) {
6253 (x.dot(x), 2.0 * x)
6254 }
6255
6256 struct FirstOrderFn<F> {
6257 inner: F,
6258 }
6259
6260 impl<F> FirstOrderFn<F> {
6261 fn new(inner: F) -> Self {
6262 Self { inner }
6263 }
6264 }
6265
6266 impl<F> ZerothOrderObjective for FirstOrderFn<F>
6267 where
6268 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6269 {
6270 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6271 Ok((self.inner)(x).0)
6272 }
6273 }
6274
6275 impl<F> FirstOrderObjective for FirstOrderFn<F>
6276 where
6277 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6278 {
6279 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6280 let (f, g) = (self.inner)(x);
6281 Ok(FirstOrderSample {
6282 value: f,
6283 gradient: g,
6284 })
6285 }
6286 }
6287
6288 fn bfgs_oracle<F>(fg: F) -> FirstOrderFn<F>
6289 where
6290 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>),
6291 {
6292 FirstOrderFn::new(fg)
6293 }
6294
6295 struct SecondOrderFn<F> {
6296 inner: F,
6297 }
6298
6299 impl<F> SecondOrderFn<F> {
6300 fn new(inner: F) -> Self {
6301 Self { inner }
6302 }
6303 }
6304
6305 impl<F> ZerothOrderObjective for SecondOrderFn<F>
6306 where
6307 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6308 {
6309 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6310 Ok((self.inner)(x).0)
6311 }
6312 }
6313
6314 impl<F> FirstOrderObjective for SecondOrderFn<F>
6315 where
6316 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6317 {
6318 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6319 let (f, g, _) = (self.inner)(x);
6320 Ok(FirstOrderSample {
6321 value: f,
6322 gradient: g,
6323 })
6324 }
6325 }
6326
6327 impl<F> SecondOrderObjective for SecondOrderFn<F>
6328 where
6329 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6330 {
6331 fn eval_hessian(
6332 &mut self,
6333 x: &Array1<f64>,
6334 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6335 let (f, g, h) = (self.inner)(x);
6336 Ok(SecondOrderSample {
6337 value: f,
6338 gradient: g,
6339 hessian: Some(h),
6340 })
6341 }
6342 }
6343
6344 struct CountingSecondOrder<F> {
6345 inner: F,
6346 first_order_calls: Arc<Mutex<usize>>,
6347 second_order_calls: Arc<Mutex<usize>>,
6348 }
6349
6350 impl<F> CountingSecondOrder<F> {
6351 fn new(
6352 inner: F,
6353 first_order_calls: Arc<Mutex<usize>>,
6354 second_order_calls: Arc<Mutex<usize>>,
6355 ) -> Self {
6356 Self {
6357 inner,
6358 first_order_calls,
6359 second_order_calls,
6360 }
6361 }
6362 }
6363
6364 impl<F> ZerothOrderObjective for CountingSecondOrder<F>
6365 where
6366 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6367 {
6368 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6369 Ok((self.inner)(x).0)
6370 }
6371 }
6372
6373 impl<F> FirstOrderObjective for CountingSecondOrder<F>
6374 where
6375 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6376 {
6377 fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
6378 *self
6379 .first_order_calls
6380 .lock()
6381 .expect("lock first-order calls") += 1;
6382 let (f, g, _) = (self.inner)(x);
6383 Ok(FirstOrderSample {
6384 value: f,
6385 gradient: g,
6386 })
6387 }
6388 }
6389
6390 impl<F> SecondOrderObjective for CountingSecondOrder<F>
6391 where
6392 F: FnMut(&Array1<f64>) -> (f64, Array1<f64>, Array2<f64>),
6393 {
6394 fn eval_hessian(
6395 &mut self,
6396 x: &Array1<f64>,
6397 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6398 *self
6399 .second_order_calls
6400 .lock()
6401 .expect("lock second-order calls") += 1;
6402 let (f, g, h) = (self.inner)(x);
6403 Ok(SecondOrderSample {
6404 value: f,
6405 gradient: g,
6406 hessian: Some(h),
6407 })
6408 }
6409 }
6410
6411 fn gradient_norm(solution: &Solution) -> f64 {
6412 solution
6413 .final_gradient_norm
6414 .expect("gradient-based solution should carry a final gradient norm")
6415 }
6416
6417 fn step_norm(solution: &Solution) -> f64 {
6418 solution
6419 .final_step_norm
6420 .expect("fixed-point solution should carry a final step norm")
6421 }
6422
6423 fn tol(value: f64) -> Tolerance {
6424 Tolerance::new(value).unwrap()
6425 }
6426
6427 fn iters(value: usize) -> MaxIterations {
6428 MaxIterations::new(value).unwrap()
6429 }
6430
6431 fn bounds(lower: Array1<f64>, upper: Array1<f64>, tol: f64) -> Bounds {
6432 Bounds::new(lower, upper, tol).unwrap()
6433 }
6434
6435 fn rosenbrock(x: &Array1<f64>) -> (f64, Array1<f64>) {
6437 let a = 1.0;
6438 let b = 100.0;
6439 let f = (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2);
6440 let g = array![
6441 -2.0 * (a - x[0]) - 4.0 * b * (x[1] - x[0].powi(2)) * x[0],
6442 2.0 * b * (x[1] - x[0].powi(2))
6443 ];
6444 (f, g)
6445 }
6446
6447 fn rosenbrock_with_hessian(x: &Array1<f64>) -> (f64, Array1<f64>, Array2<f64>) {
6448 let a = 1.0;
6449 let b = 100.0;
6450 let f = (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2);
6451 let g = array![
6452 -2.0 * (a - x[0]) - 4.0 * b * (x[1] - x[0].powi(2)) * x[0],
6453 2.0 * b * (x[1] - x[0].powi(2))
6454 ];
6455 let h = array![
6456 [1200.0 * x[0] * x[0] - 400.0 * x[1] + 2.0, -400.0 * x[0]],
6457 [-400.0 * x[0], 200.0]
6458 ];
6459 (f, g, h)
6460 }
6461
6462 fn nonconvex_quartic_with_hessian(x: &Array1<f64>) -> (f64, Array1<f64>, Array2<f64>) {
6463 let f = x[0] * x[0] - x[1] * x[1] + 0.1 * x[1].powi(4);
6464 let g = array![2.0 * x[0], -2.0 * x[1] + 0.4 * x[1].powi(3)];
6465 let h = array![[2.0, 0.0], [0.0, -2.0 + 1.2 * x[1] * x[1]]];
6466 (f, g, h)
6467 }
6468
6469 fn non_convex_max(x: &Array1<f64>) -> (f64, Array1<f64>) {
6471 (-x.dot(x), -2.0 * x)
6472 }
6473
6474 #[test]
6475 fn probe_best_ignores_nonfinite() {
6476 let x0 = array![0.0];
6477 let g0 = array![1.0];
6478 let mut best = super::ProbeBest::new(&x0, 0.0, &g0);
6479 let x1 = array![1.0];
6480 let g1 = array![f64::NAN];
6481 best.consider(&x1, -1.0, &g1);
6482 assert!(best.f.is_finite());
6483 assert_eq!(best.x[0], 0.0);
6484 }
6485
6486 #[test]
6487 fn second_order_cache_reuses_same_point_full_sample() {
6488 let x = array![1.0, -2.0];
6489 let call_count = Arc::new(Mutex::new(0usize));
6490 let call_count_c = call_count.clone();
6491 let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6492 let mut func_evals = 0usize;
6493 let mut grad_evals = 0usize;
6494 let mut hess_evals = 0usize;
6495 let mut obj = SecondOrderFn::new(move |x: &Array1<f64>| {
6496 *call_count_c.lock().expect("lock call count") += 1;
6497 let f = x.dot(x);
6498 let g = 2.0 * x;
6499 let h = Array2::<f64>::eye(x.len()) * 2.0;
6500 (f, g, h)
6501 });
6502
6503 let first = oracle
6504 .eval_cost_grad_hessian(
6505 &mut obj,
6506 &x,
6507 None,
6508 &mut func_evals,
6509 &mut grad_evals,
6510 &mut hess_evals,
6511 )
6512 .expect("initial full sample should succeed");
6513 let second = oracle
6514 .eval_cost_grad_hessian(
6515 &mut obj,
6516 &x,
6517 None,
6518 &mut func_evals,
6519 &mut grad_evals,
6520 &mut hess_evals,
6521 )
6522 .expect("same-point derivative request should hit cache");
6523
6524 assert_eq!(*call_count.lock().expect("lock call count"), 1);
6525 assert_eq!(func_evals, 1);
6526 assert_eq!(grad_evals, 1);
6527 assert_eq!(hess_evals, 1);
6528 assert_eq!(first.0, second.0);
6529 }
6530
6531 #[test]
6532 fn first_order_cache_merges_same_point_requests() {
6533 let x = array![0.5];
6534 let call_count = Arc::new(Mutex::new(0usize));
6535 let call_count_c = call_count.clone();
6536 let mut oracle = super::FirstOrderCache::new(x.len());
6537 let mut func_evals = 0usize;
6538 let mut grad_evals = 0usize;
6539 let mut obj = FirstOrderFn::new(move |x: &Array1<f64>| {
6540 *call_count_c.lock().expect("lock call count") += 1;
6541 let f = 0.5 * x[0] * x[0];
6542 let g = array![x[0]];
6543 (f, g)
6544 });
6545
6546 let cost_only = oracle
6547 .eval_cost(&mut obj, &x, &mut func_evals)
6548 .expect("cost-only request should succeed");
6549 let full = oracle
6550 .eval_cost_grad(&mut obj, &x, &mut func_evals, &mut grad_evals)
6551 .expect("cost+grad request should succeed");
6552 let cached_grad = oracle
6553 .eval_cost_grad(&mut obj, &x, &mut func_evals, &mut grad_evals)
6554 .expect("merged same-point request should hit cache");
6555
6556 assert_eq!(*call_count.lock().expect("lock call count"), 2);
6557 assert_eq!(func_evals, 2);
6558 assert_eq!(grad_evals, 1);
6559 assert_eq!(cost_only, full.0);
6560 assert_eq!(full.0, cached_grad.0);
6561 assert_eq!(full.1, cached_grad.1);
6562 }
6563
6564 #[test]
6565 fn second_order_cache_fd_fills_nonfinite_hessian() {
6566 struct NonfiniteHessianObjective;
6567
6568 impl ZerothOrderObjective for NonfiniteHessianObjective {
6569 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6570 Ok((x[0] - 1.0).powi(2))
6571 }
6572 }
6573
6574 impl FirstOrderObjective for NonfiniteHessianObjective {
6575 fn eval_grad(
6576 &mut self,
6577 x: &Array1<f64>,
6578 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6579 Ok(FirstOrderSample {
6580 value: (x[0] - 1.0).powi(2),
6581 gradient: array![2.0 * (x[0] - 1.0)],
6582 })
6583 }
6584 }
6585
6586 impl SecondOrderObjective for NonfiniteHessianObjective {
6587 fn eval_hessian(
6588 &mut self,
6589 x: &Array1<f64>,
6590 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6591 Ok(SecondOrderSample {
6592 value: (x[0] - 1.0).powi(2),
6593 gradient: array![2.0 * (x[0] - 1.0)],
6594 hessian: Some(array![[f64::NAN]]),
6595 })
6596 }
6597 }
6598
6599 let x = array![2.0];
6600 let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6601 let mut func_evals = 0usize;
6602 let mut grad_evals = 0usize;
6603 let mut hess_evals = 0usize;
6604 let mut obj = NonfiniteHessianObjective;
6605 let (value, gradient, hessian) = oracle
6606 .eval_cost_grad_hessian(
6607 &mut obj,
6608 &x,
6609 None,
6610 &mut func_evals,
6611 &mut grad_evals,
6612 &mut hess_evals,
6613 )
6614 .expect("non-finite Hessian should trigger internal finite differences");
6615
6616 assert_eq!(value, 1.0);
6617 assert_eq!(gradient, array![2.0]);
6618 assert!((hessian[[0, 0]] - 2.0).abs() < 1e-6);
6619 assert_eq!(func_evals, 3);
6620 assert_eq!(grad_evals, 3);
6621 assert_eq!(hess_evals, 0);
6622 }
6623
6624 #[test]
6625 fn finite_diff_gradient_returns_recoverable_on_nonfinite_probe() {
6626 struct WallObjective;
6627
6628 impl ZerothOrderObjective for WallObjective {
6629 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6630 if x[0].abs() >= 0.5 {
6631 Ok(f64::INFINITY)
6632 } else {
6633 Ok(x[0] * x[0])
6634 }
6635 }
6636 }
6637
6638 let mut objective = FiniteDiffGradient::new(WallObjective).with_step(1.0);
6639 let err = objective
6640 .eval_grad(&array![0.0])
6641 .expect_err("non-finite finite-difference probes should be recoverable");
6642 assert!(matches!(err, ObjectiveEvalError::Recoverable { .. }));
6643 }
6644
6645 #[test]
6646 fn finite_diff_gradient_respects_bounds_with_one_sided_stencil() {
6647 struct LinearObjective;
6648
6649 impl ZerothOrderObjective for LinearObjective {
6650 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6651 if x[0] < 0.0 || x[0] > 1.0 {
6652 return Err(ObjectiveEvalError::recoverable(
6653 "sample left the feasible interval",
6654 ));
6655 }
6656 Ok(x[0])
6657 }
6658 }
6659
6660 let mut objective = FiniteDiffGradient::new(LinearObjective)
6661 .with_step(1.0)
6662 .with_bounds(bounds(array![0.0], array![1.0], 1e-8));
6663 let sample = objective
6664 .eval_grad(&array![0.0])
6665 .expect("one-sided finite difference should stay feasible");
6666 assert!((sample.gradient[0] - 1.0).abs() < 1e-12);
6667 }
6668
6669 #[test]
6670 fn finite_diff_gradient_prefers_one_sided_stencil_near_bounds() {
6671 struct TrackingObjective {
6672 seen: Arc<Mutex<Vec<f64>>>,
6673 }
6674
6675 impl ZerothOrderObjective for TrackingObjective {
6676 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6677 self.seen.lock().expect("lock seen samples").push(x[0]);
6678 Ok(x[0] * x[0])
6679 }
6680 }
6681
6682 let seen = Arc::new(Mutex::new(Vec::new()));
6683 let mut objective = FiniteDiffGradient::new(TrackingObjective { seen: seen.clone() })
6684 .with_step(0.1)
6685 .with_bounds(bounds(array![0.0], array![1.0], 1e-8));
6686 let x0 = 0.05f64;
6687 let h = 0.1 * (1.0 + x0);
6688 let sample = objective
6689 .eval_grad(&array![x0])
6690 .expect("near-bound gradient should use a feasible one-sided stencil");
6691
6692 let expected = ((x0 + h) * (x0 + h) - x0 * x0) / h;
6693 assert!((sample.gradient[0] - expected).abs() < 1e-12);
6694 let seen = seen.lock().expect("lock seen samples");
6695 assert_eq!(seen.len(), 2);
6696 assert!(seen.iter().any(|&x| (x - x0).abs() < 1e-12));
6697 assert!(seen.iter().any(|&x| (x - (x0 + h)).abs() < 1e-12));
6698 assert!(!seen.iter().any(|&x| x <= 1e-12));
6699 }
6700
6701 #[test]
6702 fn bfgs_with_bounds_wires_finite_diff_gradient_bounds_automatically() {
6703 struct LinearObjective;
6704
6705 impl ZerothOrderObjective for LinearObjective {
6706 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6707 if x[0] < 0.0 || x[0] > 1.0 {
6708 return Err(ObjectiveEvalError::recoverable(
6709 "sample left the feasible interval",
6710 ));
6711 }
6712 Ok(x[0])
6713 }
6714 }
6715
6716 let result = Bfgs::new(
6717 array![0.0],
6718 FiniteDiffGradient::new(LinearObjective).with_step(1.0),
6719 )
6720 .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
6721 .run();
6722
6723 let solution = result.expect("solver should wire bounds into finite differences");
6724 assert!(solution.final_point[0].abs() < 1e-12);
6725 assert!(gradient_norm(&solution) <= 1e-12);
6726 }
6727
6728 #[test]
6729 fn optimize_problem_with_bounds_wires_finite_diff_gradient_automatically() {
6730 struct LinearObjective;
6731
6732 impl ZerothOrderObjective for LinearObjective {
6733 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6734 if x[0] < 0.0 || x[0] > 1.0 {
6735 return Err(ObjectiveEvalError::recoverable(
6736 "sample left the feasible interval",
6737 ));
6738 }
6739 Ok(x[0])
6740 }
6741 }
6742
6743 let mut solver = optimize(
6744 Problem::new(
6745 array![0.0],
6746 FiniteDiffGradient::new(LinearObjective).with_step(1.0),
6747 )
6748 .with_bounds(bounds(array![0.0], array![1.0], 1e-8)),
6749 );
6750
6751 let solution = solver
6752 .run()
6753 .expect("problem wrapper should wire bounds into finite differences");
6754 assert!(solution.final_point[0].abs() < 1e-12);
6755 assert!(gradient_norm(&solution) <= 1e-12);
6756 }
6757
6758 #[test]
6759 fn second_order_cache_fd_hessian_respects_bounds() {
6760 struct NoHessianObjective;
6761
6762 impl ZerothOrderObjective for NoHessianObjective {
6763 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6764 if x[0] < 0.0 || x[0] > 1.0 {
6765 return Err(ObjectiveEvalError::recoverable(
6766 "sample left the feasible interval",
6767 ));
6768 }
6769 Ok((x[0] - 0.25).powi(2))
6770 }
6771 }
6772
6773 impl FirstOrderObjective for NoHessianObjective {
6774 fn eval_grad(
6775 &mut self,
6776 x: &Array1<f64>,
6777 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6778 if x[0] < 0.0 || x[0] > 1.0 {
6779 return Err(ObjectiveEvalError::recoverable(
6780 "sample left the feasible interval",
6781 ));
6782 }
6783 Ok(FirstOrderSample {
6784 value: (x[0] - 0.25).powi(2),
6785 gradient: array![2.0 * (x[0] - 0.25)],
6786 })
6787 }
6788 }
6789
6790 impl SecondOrderObjective for NoHessianObjective {
6791 fn eval_hessian(
6792 &mut self,
6793 x: &Array1<f64>,
6794 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6795 Ok(SecondOrderSample {
6796 value: (x[0] - 0.25).powi(2),
6797 gradient: array![2.0 * (x[0] - 0.25)],
6798 hessian: None,
6799 })
6800 }
6801 }
6802
6803 let x = array![0.0];
6804 let mut oracle = super::SecondOrderCache::new(x.len(), 1e-4);
6805 let mut func_evals = 0usize;
6806 let mut grad_evals = 0usize;
6807 let mut hess_evals = 0usize;
6808 let mut obj = NoHessianObjective;
6809 let bounds = bounds(array![0.0], array![1.0], 1e-8);
6810
6811 let (value, gradient, hessian) = oracle
6812 .eval_cost_grad_hessian(
6813 &mut obj,
6814 &x,
6815 Some(&bounds.spec),
6816 &mut func_evals,
6817 &mut grad_evals,
6818 &mut hess_evals,
6819 )
6820 .expect("finite-difference Hessian should stay feasible near bounds");
6821
6822 assert!((value - 0.0625).abs() < 1e-12);
6823 assert!((gradient[0] + 0.5).abs() < 1e-12);
6824 assert!((hessian[[0, 0]] - 2.0).abs() < 1e-6);
6825 assert_eq!(hess_evals, 0);
6826 }
6827
6828 #[test]
6829 fn second_order_cache_fd_hessian_prefers_one_sided_stencil_near_bounds() {
6830 struct NearWallObjective;
6831
6832 impl ZerothOrderObjective for NearWallObjective {
6833 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6834 if x[0] < 0.01 || x[0] > 1.0 {
6835 return Err(ObjectiveEvalError::recoverable(
6836 "sample left the finite-difference band",
6837 ));
6838 }
6839 Ok(x[0] * x[0])
6840 }
6841 }
6842
6843 impl FirstOrderObjective for NearWallObjective {
6844 fn eval_grad(
6845 &mut self,
6846 x: &Array1<f64>,
6847 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6848 if x[0] < 0.01 || x[0] > 1.0 {
6849 return Err(ObjectiveEvalError::recoverable(
6850 "sample left the finite-difference band",
6851 ));
6852 }
6853 Ok(FirstOrderSample {
6854 value: x[0] * x[0],
6855 gradient: array![2.0 * x[0]],
6856 })
6857 }
6858 }
6859
6860 impl SecondOrderObjective for NearWallObjective {
6861 fn eval_hessian(
6862 &mut self,
6863 x: &Array1<f64>,
6864 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6865 Ok(SecondOrderSample {
6866 value: x[0] * x[0],
6867 gradient: array![2.0 * x[0]],
6868 hessian: None,
6869 })
6870 }
6871 }
6872
6873 let x = array![0.05];
6874 let mut oracle = super::SecondOrderCache::new(x.len(), 0.1);
6875 let mut func_evals = 0usize;
6876 let mut grad_evals = 0usize;
6877 let mut hess_evals = 0usize;
6878 let mut obj = NearWallObjective;
6879 let bounds = bounds(array![0.0], array![1.0], 1e-8);
6880
6881 let (_, _, hessian) = oracle
6882 .eval_cost_grad_hessian(
6883 &mut obj,
6884 &x,
6885 Some(&bounds.spec),
6886 &mut func_evals,
6887 &mut grad_evals,
6888 &mut hess_evals,
6889 )
6890 .expect("near-bound Hessian should use a feasible one-sided stencil");
6891
6892 assert!((hessian[[0, 0]] - 2.0).abs() < 1e-12);
6893 }
6894
6895 #[test]
6896 fn newton_trust_region_wires_fd_hessian_bounds_automatically() {
6897 struct NoHessianObjective;
6898
6899 impl ZerothOrderObjective for NoHessianObjective {
6900 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6901 if x[0] < 0.0 || x[0] > 1.0 {
6902 return Err(ObjectiveEvalError::recoverable(
6903 "sample left the feasible interval",
6904 ));
6905 }
6906 Ok(x[0])
6907 }
6908 }
6909
6910 impl FirstOrderObjective for NoHessianObjective {
6911 fn eval_grad(
6912 &mut self,
6913 x: &Array1<f64>,
6914 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6915 if x[0] < 0.0 || x[0] > 1.0 {
6916 return Err(ObjectiveEvalError::recoverable(
6917 "sample left the feasible interval",
6918 ));
6919 }
6920 Ok(FirstOrderSample {
6921 value: x[0],
6922 gradient: array![1.0],
6923 })
6924 }
6925 }
6926
6927 impl SecondOrderObjective for NoHessianObjective {
6928 fn eval_hessian(
6929 &mut self,
6930 x: &Array1<f64>,
6931 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6932 Ok(SecondOrderSample {
6933 value: x[0],
6934 gradient: array![1.0],
6935 hessian: None,
6936 })
6937 }
6938 }
6939
6940 let result = NewtonTrustRegion::new(array![0.0], NoHessianObjective)
6941 .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
6942 .run();
6943
6944 let solution = result.expect("solver should wire bounds into Hessian finite differences");
6945 assert!(solution.final_point[0].abs() < 1e-12);
6946 assert!(gradient_norm(&solution) <= 1e-12);
6947 }
6948
6949 #[test]
6950 fn optimize_second_order_problem_with_bounds_wires_fd_hessian_automatically() {
6951 struct NoHessianObjective;
6952
6953 impl ZerothOrderObjective for NoHessianObjective {
6954 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
6955 if x[0] < 0.0 || x[0] > 1.0 {
6956 return Err(ObjectiveEvalError::recoverable(
6957 "sample left the feasible interval",
6958 ));
6959 }
6960 Ok(x[0])
6961 }
6962 }
6963
6964 impl FirstOrderObjective for NoHessianObjective {
6965 fn eval_grad(
6966 &mut self,
6967 x: &Array1<f64>,
6968 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
6969 if x[0] < 0.0 || x[0] > 1.0 {
6970 return Err(ObjectiveEvalError::recoverable(
6971 "sample left the feasible interval",
6972 ));
6973 }
6974 Ok(FirstOrderSample {
6975 value: x[0],
6976 gradient: array![1.0],
6977 })
6978 }
6979 }
6980
6981 impl SecondOrderObjective for NoHessianObjective {
6982 fn eval_hessian(
6983 &mut self,
6984 x: &Array1<f64>,
6985 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
6986 Ok(SecondOrderSample {
6987 value: x[0],
6988 gradient: array![1.0],
6989 hessian: None,
6990 })
6991 }
6992 }
6993
6994 let mut solver = optimize(
6995 SecondOrderProblem::new(array![0.0], NoHessianObjective).with_bounds(bounds(
6996 array![0.0],
6997 array![1.0],
6998 1e-8,
6999 )),
7000 );
7001
7002 let solution = solver.run().expect(
7003 "second-order problem wrapper should wire bounds into Hessian finite differences",
7004 );
7005 assert!(solution.final_point[0].abs() < 1e-12);
7006 assert!(gradient_norm(&solution) <= 1e-12);
7007 }
7008
7009 #[test]
7010 fn arc_wires_fd_hessian_bounds_automatically() {
7011 struct NoHessianObjective;
7012
7013 impl ZerothOrderObjective for NoHessianObjective {
7014 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7015 if x[0] < 0.0 || x[0] > 1.0 {
7016 return Err(ObjectiveEvalError::recoverable(
7017 "sample left the feasible interval",
7018 ));
7019 }
7020 Ok(x[0])
7021 }
7022 }
7023
7024 impl FirstOrderObjective for NoHessianObjective {
7025 fn eval_grad(
7026 &mut self,
7027 x: &Array1<f64>,
7028 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7029 if x[0] < 0.0 || x[0] > 1.0 {
7030 return Err(ObjectiveEvalError::recoverable(
7031 "sample left the feasible interval",
7032 ));
7033 }
7034 Ok(FirstOrderSample {
7035 value: x[0],
7036 gradient: array![1.0],
7037 })
7038 }
7039 }
7040
7041 impl SecondOrderObjective for NoHessianObjective {
7042 fn eval_hessian(
7043 &mut self,
7044 x: &Array1<f64>,
7045 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7046 Ok(SecondOrderSample {
7047 value: x[0],
7048 gradient: array![1.0],
7049 hessian: None,
7050 })
7051 }
7052 }
7053
7054 let result = super::Arc::new(array![0.0], NoHessianObjective)
7055 .with_bounds(bounds(array![0.0], array![1.0], 1e-8))
7056 .run();
7057
7058 let solution = result.expect("solver should wire bounds into Hessian finite differences");
7059 assert!(solution.final_point[0].abs() < 1e-12);
7060 assert!(gradient_norm(&solution) <= 1e-12);
7061 }
7062
7063 #[test]
7064 fn fixed_point_converges_on_linear_contraction() {
7065 struct LinearContraction;
7066
7067 impl FixedPointObjective for LinearContraction {
7068 fn eval_step(
7069 &mut self,
7070 x: &Array1<f64>,
7071 ) -> Result<FixedPointSample, ObjectiveEvalError> {
7072 Ok(FixedPointSample {
7073 value: x.dot(x),
7074 step: -0.5 * x,
7075 status: FixedPointStatus::Continue,
7076 })
7077 }
7078 }
7079
7080 let mut solver = FixedPoint::new(array![2.0, -1.0], LinearContraction)
7081 .with_tolerance(tol(1e-8))
7082 .with_max_iterations(iters(128));
7083 let solution = solver
7084 .run()
7085 .expect("fixed-point solver should converge on a contraction");
7086
7087 assert!(solution.final_point.dot(&solution.final_point).sqrt() < 1e-6);
7088 assert!(step_norm(&solution) < 1e-8);
7089 }
7090
7091 #[test]
7092 fn fixed_point_stop_returns_current_point() {
7093 struct ImmediateStop;
7094
7095 impl FixedPointObjective for ImmediateStop {
7096 fn eval_step(
7097 &mut self,
7098 _x: &Array1<f64>,
7099 ) -> Result<FixedPointSample, ObjectiveEvalError> {
7100 Ok(FixedPointSample {
7101 value: 7.0,
7102 step: array![1.0],
7103 status: FixedPointStatus::Stop,
7104 })
7105 }
7106 }
7107
7108 let solution = FixedPoint::new(array![3.0], ImmediateStop)
7109 .run()
7110 .expect("stop status should finalize immediately");
7111
7112 assert_eq!(solution.final_point, array![3.0]);
7113 assert_eq!(solution.final_value, 7.0);
7114 assert_eq!(step_norm(&solution), 0.0);
7115 }
7116
7117 #[test]
7118 fn dense_solve_shifted_solves_small_system() {
7119 let a = array![[4.0, 1.0], [1.0, 3.0]];
7120 let b = array![1.0, 2.0];
7121 let x = super::dense_solve_shifted(&a, &b, 0.0).expect("dense solve should succeed");
7122 let ax = a.dot(&x);
7123 assert!((&ax - &b).iter().all(|v| v.abs() < 1e-10));
7124 }
7125
7126 #[test]
7127 fn cg_solve_adaptive_uses_direct_path_for_small_dense_systems() {
7128 let n = 8usize;
7129 let mut a = Array2::<f64>::eye(n) * 3.0;
7130 for i in 0..n {
7131 for j in 0..n {
7132 if i != j {
7133 a[[i, j]] = 0.05 * ((i + j + 1) as f64);
7134 }
7135 }
7136 }
7137 let b = Array1::from_iter((0..n).map(|i| (i + 1) as f64));
7138 let x = super::cg_solve_adaptive(&a, &b, 5, 1e-12, 1e-10)
7139 .expect("small dense system should use the direct solve path");
7140 let mut ax = a.dot(&x);
7141 for i in 0..n {
7142 ax[i] += 1e-10 * x[i];
7143 }
7144 let residual = (&ax - &b).dot(&(&ax - &b)).sqrt();
7145 assert!(residual < 1e-8, "expected small residual, got {residual:e}");
7146 }
7147
7148 #[test]
7149 fn cg_solve_from_refines_existing_iterate() {
7150 let n = 256usize;
7151 let mut a = Array2::<f64>::eye(n) * 4.0;
7152 for i in 0..(n - 1) {
7153 a[[i, i + 1]] = 0.5;
7154 a[[i + 1, i]] = 0.5;
7155 }
7156 let b = Array1::from_elem(n, 1.0);
7157 let first = super::cg_solve_from(&a, &b, Array1::zeros(n), 3, 1e-12, 0.0)
7158 .expect("initial CG stage should succeed");
7159 let second = super::cg_solve_from(&a, &b, first.x.clone(), 3, 1e-12, 0.0)
7160 .expect("refinement CG stage should succeed");
7161 assert!(
7162 second.rel_resid < first.rel_resid,
7163 "continued CG should improve residual"
7164 );
7165 }
7166
7167 #[test]
7168 fn steihaug_toint_uses_exact_small_dense_newton_step_when_feasible() {
7169 let core = super::NewtonTrustRegionCore::new(array![0.0, 0.0]);
7170 let h = array![[4.0, 1.0], [1.0, 3.0]];
7171 let g = array![1.0, 2.0];
7172 let rhs = -g.clone();
7173 let expected =
7174 super::dense_solve_shifted(&h, &rhs, 0.0).expect("direct dense solve should work");
7175 let (step, pred) = core
7176 .steihaug_toint_step(&h, &g, 10.0, None)
7177 .expect("small dense exact step should be accepted");
7178 assert!((&step - &expected).iter().all(|v| v.abs() < 1e-10));
7179 assert!(pred > 0.0);
7180 }
7181
7182 #[test]
7183 fn dense_trust_region_step_handles_small_dense_indefinite_boundary_case() {
7184 let h = array![[-1.0, 0.0], [0.0, 2.0]];
7185 let g = array![1.0, 0.5];
7186 let (step, pred) =
7187 super::dense_trust_region_step(&h, &g, 0.5, None).expect("direct trust-region step");
7188 let norm = step.dot(&step).sqrt();
7189 assert!(norm <= 0.5 + 1e-8, "step norm should respect trust radius");
7190 assert!(pred > 0.0, "predicted decrease should be positive");
7191 }
7192
7193 #[test]
7194 fn arc_small_dense_masked_subproblem_uses_direct_masked_solve() {
7195 let core = super::ArcCore::new(array![0.0, 0.0]);
7196 let h = array![[4.0, 1.0], [1.0, 3.0]];
7197 let g = array![2.0, -3.0];
7198 let active = [true, false];
7199 let step = core
7200 .solve_arc_subproblem(&h, &g, 1.0, Some(&active))
7201 .expect("masked direct ARC subproblem solve should succeed");
7202 assert!(
7203 step[0].abs() < 1e-12,
7204 "active coordinate should remain fixed"
7205 );
7206 assert!(step[1].is_finite(), "free coordinate step should be finite");
7207 let (m_delta, _, grad_m) = core.arc_model_value(&g, &h, 1.0, &step, Some(&active));
7208 assert!(m_delta <= 1e-8, "ARC model should not increase materially");
7209 assert!(grad_m.iter().all(|v| v.is_finite()));
7210 }
7211
7212 #[test]
7213 fn bfgs_local_mode_forces_strict_search_policy() {
7214 let mut core = super::BfgsCore::new(array![0.0, 0.0]);
7215 core.initial_grad_norm = 10.0;
7216 core.primary_strategy = super::LineSearchStrategy::Backtracking;
7217 core.c1_adapt = 1e-3;
7218 core.c2_adapt = 0.1;
7219 core.flat_accept_streak = 3;
7220 core.curv_slack_scale = 0.25;
7221 core.grad_drop_factor = 0.95;
7222 core.gll.set_cap(8);
7223
7224 core.refresh_local_mode(1e-3);
7225
7226 assert!(core.local_mode);
7227 assert!(matches!(
7228 core.primary_strategy,
7229 super::LineSearchStrategy::StrongWolfe
7230 ));
7231 assert!((core.c1_adapt - core.c1).abs() < 1e-16);
7232 assert!((core.c2_adapt - core.c2).abs() < 1e-16);
7233 assert_eq!(core.flat_accept_streak, 0);
7234 assert!((core.curv_slack_scale - 1.0).abs() < 1e-16);
7235 assert!((core.grad_drop_factor - 0.9).abs() < 1e-16);
7236 assert_eq!(core.gll.cap, 1);
7237 }
7238
7239 #[test]
7240 fn probe_alphas_respects_armijo() {
7241 let x_k = array![1.0];
7242 let f_k = 1.0;
7243 let g_k = array![2.0];
7244 let d_k = array![2.0]; let mut core = super::BfgsCore::new(x_k.clone());
7246 let mut oracle = super::FirstOrderCache::new(x_k.len());
7247 let tau_g = core.tau_g;
7248 let drop_factor = core.grad_drop_factor;
7249 let mut fe = 0usize;
7250 let mut ge = 0usize;
7251 let res = super::probe_alphas(
7252 &mut core,
7253 &mut bfgs_oracle(|x: &Array1<f64>| (x.dot(x), 2.0 * x)),
7254 &mut oracle,
7255 &x_k,
7256 &d_k,
7257 f_k,
7258 &g_k,
7259 0.0,
7260 1.0,
7261 tau_g,
7262 drop_factor,
7263 &mut fe,
7264 &mut ge,
7265 );
7266 assert!(res.is_none());
7267 }
7268
7269 #[test]
7270 fn zoom_tiny_bracket_rejects_armijo_without_curvature() {
7271 let x_k = array![1.0];
7272 let mut core = super::BfgsCore::new(x_k.clone());
7273 let mut oracle = super::FirstOrderCache::new(x_k.len());
7274 let (f_k, g_k) = non_convex_max(&x_k);
7275 let g_proj_k = core.projected_gradient(&x_k, &g_k);
7276 let d_k = array![1.0];
7277 let alpha_lo = 1.0;
7278 let alpha_hi = 1.0 + 5e-13;
7279 let (x_lo, s_lo, _) = core.project_with_step(&x_k, &d_k, alpha_lo);
7280 let (f_lo, g_lo) = non_convex_max(&x_lo);
7281 let g_lo_dot_d = super::directional_derivative(
7282 &core.projected_gradient(&x_lo, &g_lo),
7283 &s_lo,
7284 alpha_lo,
7285 &d_k,
7286 );
7287 let (x_hi, s_hi, _) = core.project_with_step(&x_k, &d_k, alpha_hi);
7288 let (f_hi, g_hi) = non_convex_max(&x_hi);
7289 let g_hi_dot_d = super::directional_derivative(
7290 &core.projected_gradient(&x_hi, &g_hi),
7291 &s_hi,
7292 alpha_hi,
7293 &d_k,
7294 );
7295 let c1 = core.c1;
7296 let c2 = core.c2;
7297
7298 let r = super::zoom(
7299 &mut core,
7300 &mut bfgs_oracle(non_convex_max),
7301 &mut oracle,
7302 &x_k,
7303 &d_k,
7304 f_k,
7305 &g_k,
7306 &g_proj_k,
7307 g_proj_k.dot(&d_k),
7308 c1,
7309 c2,
7310 alpha_lo,
7311 alpha_hi,
7312 f_lo,
7313 f_hi,
7314 g_lo_dot_d,
7315 g_hi_dot_d,
7316 0,
7317 0,
7318 );
7319
7320 assert!(matches!(r, Err(super::LineSearchError::MaxAttempts(_))));
7321 }
7322
7323 #[test]
7324 fn zoom_flat_midpoint_rejects_uphill_descent_only_candidate() {
7325 let x_k = array![0.0];
7326 let mut core = super::BfgsCore::new(x_k.clone());
7327 let mut oracle = super::FirstOrderCache::new(x_k.len());
7328 let slope = 2.0e-13;
7329 let fake_grad = -1.0e-14;
7330 let f_k = 0.0;
7331 let g_k = array![fake_grad];
7332 let g_proj_k = core.projected_gradient(&x_k, &g_k);
7333 let d_k = array![1.0];
7334 let alpha_lo = 1.0;
7335 let alpha_hi = 2.0;
7336 let fg = move |x: &Array1<f64>| (slope * x[0], array![fake_grad]);
7337 let (x_lo, s_lo, _) = core.project_with_step(&x_k, &d_k, alpha_lo);
7338 let (f_lo, g_lo) = fg(&x_lo);
7339 let g_lo_dot_d = super::directional_derivative(
7340 &core.projected_gradient(&x_lo, &g_lo),
7341 &s_lo,
7342 alpha_lo,
7343 &d_k,
7344 );
7345 let (x_hi, s_hi, _) = core.project_with_step(&x_k, &d_k, alpha_hi);
7346 let (f_hi, g_hi) = fg(&x_hi);
7347 let g_hi_dot_d = super::directional_derivative(
7348 &core.projected_gradient(&x_hi, &g_hi),
7349 &s_hi,
7350 alpha_hi,
7351 &d_k,
7352 );
7353 let c1 = core.c1;
7354 let c2 = core.c2;
7355
7356 let r = super::zoom(
7357 &mut core,
7358 &mut bfgs_oracle(fg),
7359 &mut oracle,
7360 &x_k,
7361 &d_k,
7362 f_k,
7363 &g_k,
7364 &g_proj_k,
7365 g_proj_k.dot(&d_k),
7366 c1,
7367 c2,
7368 alpha_lo,
7369 alpha_hi,
7370 f_lo,
7371 f_hi,
7372 g_lo_dot_d,
7373 g_hi_dot_d,
7374 0,
7375 0,
7376 );
7377
7378 assert!(matches!(r, Err(super::LineSearchError::MaxAttempts(_))));
7379 }
7380
7381 #[test]
7382 fn line_search_rejects_fully_clipped_projected_step() {
7383 let x_k = array![1.0];
7384 let lower = array![0.0];
7385 let upper = array![1.0];
7386 let mut core = super::BfgsCore::new(x_k.clone());
7387 core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
7388 let mut oracle = super::FirstOrderCache::new(x_k.len());
7389 let fg = |x: &Array1<f64>| {
7390 let dx = x[0] - 2.0;
7391 (dx * dx, array![2.0 * dx])
7392 };
7393 let (f_k, g_k) = fg(&x_k);
7394 let d_k = array![1.0];
7395 let c1 = core.c1;
7396 let c2 = core.c2;
7397
7398 let r = super::line_search(
7399 &mut core,
7400 &mut bfgs_oracle(fg),
7401 &mut oracle,
7402 &x_k,
7403 &d_k,
7404 f_k,
7405 &g_k,
7406 c1,
7407 c2,
7408 );
7409
7410 assert!(matches!(r, Err(super::LineSearchError::StepSizeTooSmall)));
7411 }
7412
7413 #[test]
7414 fn backtracking_accepts_strong_wolfe_in_local_mode() {
7415 let x_k = array![1.0];
7416 let mut core = super::BfgsCore::new(x_k.clone());
7417 core.local_mode = true;
7418
7419 let mut oracle = super::FirstOrderCache::new(x_k.len());
7420 let f_k = x_k.dot(&x_k);
7421 let g_k = 2.0 * x_k.clone();
7422 let d_k = -g_k.clone();
7423
7424 let (alpha, f_new, g_new, _, _, kind) = super::backtracking_line_search(
7425 &mut core,
7426 &mut bfgs_oracle(|x: &Array1<f64>| (x.dot(x), 2.0 * x)),
7427 &mut oracle,
7428 &x_k,
7429 &d_k,
7430 f_k,
7431 &g_k,
7432 )
7433 .expect("local mode should still accept strong-Wolfe decreases");
7434
7435 assert!((alpha - 0.5).abs() < 1e-12);
7436 assert!(f_new < f_k);
7437 assert!(g_new.iter().all(|v| v.is_finite()));
7438 assert!(matches!(kind, super::AcceptKind::StrongWolfe));
7439 }
7440
7441 #[test]
7442 fn backtracking_rejects_armijo_without_curvature() {
7443 let x_k = array![1.0];
7444 let mut core = super::BfgsCore::new(x_k.clone());
7445 let mut oracle = super::FirstOrderCache::new(x_k.len());
7446 let (f_k, g_k) = non_convex_max(&x_k);
7447 let d_k = array![1.0];
7448
7449 let r = super::backtracking_line_search(
7450 &mut core,
7451 &mut bfgs_oracle(non_convex_max),
7452 &mut oracle,
7453 &x_k,
7454 &d_k,
7455 f_k,
7456 &g_k,
7457 );
7458
7459 assert!(r.is_err());
7460 }
7461
7462 #[test]
7463 fn local_mode_disables_only_gll_extension() {
7464 let mut core = super::BfgsCore::new(array![0.0]);
7465 let fmax = 2.0;
7466 let gk_ts = -0.1;
7467 let f_trial = 1.5;
7468
7469 assert!(!core.accept_armijo(1.0, gk_ts, f_trial));
7470 assert!(core.accept_gll_nonmonotone(fmax, gk_ts, f_trial));
7471
7472 core.local_mode = true;
7473 assert!(!core.accept_gll_nonmonotone(fmax, gk_ts, f_trial));
7474 }
7475
7476 #[test]
7477 fn line_search_ignores_nonfinite_best() {
7478 let x0 = array![0.0];
7479 let mut core = super::BfgsCore::new(x0.clone());
7480 let mut oracle = super::FirstOrderCache::new(x0.len());
7481 let c1 = core.c1;
7482 let c2 = core.c2;
7483 let fg = |x: &Array1<f64>| {
7484 if x[0] > 0.0 {
7485 (f64::NEG_INFINITY, array![1.0])
7486 } else {
7487 (0.0, array![1.0])
7488 }
7489 };
7490 let (f_k, g_k) = fg(&x0);
7491 let mut obj = bfgs_oracle(fg);
7492 core.global_best = Some(super::ProbeBest::new(&x0, f_k, &g_k));
7493 let d_k = array![1.0];
7494 let r = super::line_search(
7495 &mut core,
7496 &mut obj,
7497 &mut oracle,
7498 &x0,
7499 &d_k,
7500 f_k,
7501 &g_k,
7502 c1,
7503 c2,
7504 );
7505 assert!(r.is_err());
7506 assert!(
7507 core.global_best
7508 .as_ref()
7509 .map(|b| b.f.is_finite())
7510 .unwrap_or(false)
7511 );
7512 }
7513
7514 #[test]
7515 fn newton_trust_region_converges_on_rosenbrock() {
7516 let x0 = array![-1.2, 1.0];
7517 let mut solver = NewtonTrustRegion::new(x0, SecondOrderFn::new(rosenbrock_with_hessian))
7518 .with_profile(Profile::Robust)
7519 .with_tolerance(tol(1e-8))
7520 .with_max_iterations(iters(100));
7521 let solution = solver.run().expect("Newton trust-region should converge");
7522 assert!((solution.final_point[0] - 1.0).abs() < 1e-6);
7523 assert!((solution.final_point[1] - 1.0).abs() < 1e-6);
7524 assert!(gradient_norm(&solution) < 1e-6);
7525 }
7526
7527 #[test]
7528 fn newton_trust_region_uses_single_full_trial_requests() {
7529 let x0 = array![-1.2, 1.0];
7530 let first_order_calls = Arc::new(Mutex::new(0usize));
7531 let second_order_calls = Arc::new(Mutex::new(0usize));
7532 let objective = CountingSecondOrder::new(
7533 rosenbrock_with_hessian,
7534 first_order_calls.clone(),
7535 second_order_calls.clone(),
7536 );
7537 let mut solver = NewtonTrustRegion::new(x0, objective)
7538 .with_profile(Profile::Robust)
7539 .with_tolerance(tol(1e-8))
7540 .with_max_iterations(iters(100));
7541 let _ = solver.run().expect("Newton trust-region should converge");
7542 assert_eq!(
7543 *first_order_calls.lock().expect("lock first-order calls"),
7544 0,
7545 "Newton TR should not use first-order-only objective paths"
7546 );
7547 assert!(
7548 *second_order_calls.lock().expect("lock second-order calls") > 0,
7549 "expected Newton TR to use second-order evaluations"
7550 );
7551 }
7552
7553 #[test]
7554 fn newton_trust_region_handles_indefinite_hessian() {
7555 let x0 = array![1.0, 0.5]; let mut solver =
7557 NewtonTrustRegion::new(x0, SecondOrderFn::new(nonconvex_quartic_with_hessian))
7558 .with_profile(Profile::Robust)
7559 .with_tolerance(tol(1e-7))
7560 .with_max_iterations(iters(200));
7561
7562 let sol = solver
7563 .run()
7564 .expect("TR-Newton should handle indefinite Hessians");
7565 assert!(sol.final_value.is_finite());
7566 assert!(gradient_norm(&sol) < 1e-4);
7567 }
7568
7569 #[test]
7570 fn newton_trust_region_respects_single_variable_bound() {
7571 let x0 = array![0.2];
7573 let lower = array![0.0];
7574 let upper = array![1.0];
7575 let mut solver = NewtonTrustRegion::new(
7576 x0,
7577 SecondOrderFn::new(|x: &Array1<f64>| {
7578 let dx = x[0] - 2.0;
7579 let f = dx * dx;
7580 let g = array![2.0 * dx];
7581 let h = array![[2.0]];
7582 (f, g, h)
7583 }),
7584 )
7585 .with_bounds(bounds(lower, upper, 1e-8))
7586 .with_profile(Profile::Robust)
7587 .with_tolerance(tol(1e-10))
7588 .with_max_iterations(iters(100));
7589
7590 let sol = solver
7591 .run()
7592 .expect("Projected Newton should converge at upper bound");
7593 assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7594 assert!(gradient_norm(&sol) <= 1e-8);
7595 }
7596
7597 #[test]
7598 fn newton_trust_region_active_set_leaves_free_coordinate() {
7599 let x0 = array![0.4, -2.0];
7601 let lower = array![0.0, -10.0];
7602 let upper = array![1.0, 10.0];
7603 let mut solver = NewtonTrustRegion::new(
7604 x0,
7605 SecondOrderFn::new(|x: &Array1<f64>| {
7606 let d0 = x[0] - 2.0;
7607 let d1 = x[1] - 3.0;
7608 let f = d0 * d0 + d1 * d1;
7609 let g = array![2.0 * d0, 2.0 * d1];
7610 let h = array![[2.0, 0.0], [0.0, 2.0]];
7611 (f, g, h)
7612 }),
7613 )
7614 .with_bounds(bounds(lower, upper, 1e-8))
7615 .with_profile(Profile::Robust)
7616 .with_tolerance(tol(1e-9))
7617 .with_max_iterations(iters(100));
7618
7619 let sol = solver.run().expect("Projected Newton should converge");
7620 assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7621 assert!((sol.final_point[1] - 3.0).abs() < 1e-7);
7622 assert!(gradient_norm(&sol) <= 1e-7);
7623 }
7624
7625 #[test]
7626 fn newton_trust_region_retries_on_recoverable_trial_errors() {
7627 struct RecoverableTrialObjective {
7628 calls: usize,
7629 }
7630
7631 impl ZerothOrderObjective for RecoverableTrialObjective {
7632 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7633 Ok(0.5 * (x[0] - 1.0).powi(2))
7634 }
7635 }
7636
7637 impl FirstOrderObjective for RecoverableTrialObjective {
7638 fn eval_grad(
7639 &mut self,
7640 x: &Array1<f64>,
7641 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7642 Ok(FirstOrderSample {
7643 value: 0.5 * (x[0] - 1.0).powi(2),
7644 gradient: array![x[0] - 1.0],
7645 })
7646 }
7647 }
7648
7649 impl SecondOrderObjective for RecoverableTrialObjective {
7650 fn eval_hessian(
7651 &mut self,
7652 x: &Array1<f64>,
7653 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7654 self.calls += 1;
7655 if self.calls == 2 {
7656 return Err(ObjectiveEvalError::recoverable("simulated PIRLS breakdown"));
7657 }
7658 Ok(SecondOrderSample {
7659 value: 0.5 * (x[0] - 1.0).powi(2),
7660 gradient: array![x[0] - 1.0],
7661 hessian: Some(array![[1.0]]),
7662 })
7663 }
7664 }
7665
7666 let x0 = array![2.0];
7667 let mut solver = NewtonTrustRegion::new(x0, RecoverableTrialObjective { calls: 0 })
7668 .with_profile(Profile::Deterministic)
7669 .with_tolerance(tol(1e-8))
7670 .with_max_iterations(iters(200));
7671
7672 let sol = solver
7673 .run()
7674 .expect("recoverable trial errors should shrink trust region and recover");
7675 assert!((sol.final_point[0] - 1.0).abs() < 1e-6);
7676 assert!(gradient_norm(&sol) < 1e-6);
7677 }
7678
7679 #[test]
7680 fn newton_trust_region_surfaces_fatal_objective_errors() {
7681 struct FatalObjective;
7682
7683 impl ZerothOrderObjective for FatalObjective {
7684 fn eval_cost(&mut self, _x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7685 Err(ObjectiveEvalError::fatal(
7686 "fatal synthetic objective failure",
7687 ))
7688 }
7689 }
7690
7691 impl FirstOrderObjective for FatalObjective {
7692 fn eval_grad(
7693 &mut self,
7694 _x: &Array1<f64>,
7695 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7696 Err(ObjectiveEvalError::fatal(
7697 "fatal synthetic objective failure",
7698 ))
7699 }
7700 }
7701
7702 impl SecondOrderObjective for FatalObjective {
7703 fn eval_hessian(
7704 &mut self,
7705 _x: &Array1<f64>,
7706 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7707 Err(ObjectiveEvalError::fatal(
7708 "fatal synthetic objective failure",
7709 ))
7710 }
7711 }
7712
7713 let x0 = array![0.0];
7714 let mut solver = NewtonTrustRegion::new(x0, FatalObjective).with_max_iterations(iters(5));
7715
7716 let err = solver.run().expect_err("fatal errors must propagate");
7717 match err {
7718 super::NewtonTrustRegionError::ObjectiveFailed { message } => {
7719 assert!(message.contains("fatal synthetic objective failure"));
7720 }
7721 other => panic!("unexpected error variant: {other:?}"),
7722 }
7723 }
7724
7725 #[test]
7726 fn arc_converges_on_rosenbrock() {
7727 let x0 = array![-1.2, 1.0];
7728 let mut solver = super::Arc::new(x0, SecondOrderFn::new(rosenbrock_with_hessian))
7729 .with_profile(Profile::Robust)
7730 .with_tolerance(tol(1e-7))
7731 .with_max_iterations(iters(250));
7732
7733 let solution = solver.run().expect("ARC should converge");
7734 assert!((solution.final_point[0] - 1.0).abs() < 1e-4);
7735 assert!((solution.final_point[1] - 1.0).abs() < 1e-4);
7736 assert!(gradient_norm(&solution) < 1e-5);
7737 }
7738
7739 #[test]
7740 fn arc_uses_single_full_trial_requests() {
7741 let x0 = array![-1.2, 1.0];
7742 let first_order_calls = Arc::new(Mutex::new(0usize));
7743 let second_order_calls = Arc::new(Mutex::new(0usize));
7744 let objective = CountingSecondOrder::new(
7745 rosenbrock_with_hessian,
7746 first_order_calls.clone(),
7747 second_order_calls.clone(),
7748 );
7749 let mut solver = super::Arc::new(x0, objective)
7750 .with_profile(Profile::Robust)
7751 .with_tolerance(tol(1e-7))
7752 .with_max_iterations(iters(250));
7753
7754 let _ = solver.run().expect("ARC should converge");
7755 assert_eq!(
7756 *first_order_calls.lock().expect("lock first-order calls"),
7757 0,
7758 "ARC should not use first-order-only objective paths"
7759 );
7760 assert!(
7761 *second_order_calls.lock().expect("lock second-order calls") > 0,
7762 "expected ARC to use second-order evaluations"
7763 );
7764 }
7765
7766 #[test]
7767 fn arc_accepted_step_uses_single_evaluation() {
7768 let first_order_calls = Arc::new(Mutex::new(0usize));
7769 let second_order_calls = Arc::new(Mutex::new(0usize));
7770 let objective = CountingSecondOrder::new(
7771 |x: &Array1<f64>| {
7772 let f = 0.5 * x[0] * x[0];
7773 let g = array![x[0]];
7774 let h = array![[1.0]];
7775 (f, g, h)
7776 },
7777 first_order_calls.clone(),
7778 second_order_calls.clone(),
7779 );
7780 let mut solver = super::Arc::new(array![1.0], objective)
7781 .with_profile(Profile::Deterministic)
7782 .with_tolerance(tol(1e-9))
7783 .with_max_iterations(iters(1));
7784
7785 let err = solver
7786 .run()
7787 .expect_err("one ARC iteration should exhaust the budget after a single accepted step");
7788 match err {
7789 ArcError::MaxIterationsReached { .. } => {}
7790 other => panic!("unexpected error variant: {other:?}"),
7791 }
7792 assert_eq!(
7793 *first_order_calls.lock().expect("lock first-order calls"),
7794 0,
7795 "ARC should not issue first-order-only evaluations"
7796 );
7797 assert_eq!(
7798 *second_order_calls.lock().expect("lock second-order calls"),
7799 2,
7800 "expected one initial and one trial second-order evaluation"
7801 );
7802 }
7803
7804 #[test]
7805 fn arc_rejects_materially_projected_steps() {
7806 let x0 = array![0.8];
7807 let lower = array![0.0];
7808 let upper = array![1.0];
7809 let clipped_counts = Arc::new(Mutex::new((0usize, 0usize)));
7810 let clipped_counts_c = clipped_counts.clone();
7811 struct ProjectedArcObjective {
7812 clipped_counts: Arc<Mutex<(usize, usize)>>,
7813 }
7814
7815 impl ZerothOrderObjective for ProjectedArcObjective {
7816 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7817 if (x[0] - 1.0).abs() < 1e-12 {
7818 self.clipped_counts.lock().expect("lock clipped counts").0 += 1;
7819 }
7820 let dx = x[0] - 2.0;
7821 Ok(0.5 * dx * dx)
7822 }
7823 }
7824
7825 impl FirstOrderObjective for ProjectedArcObjective {
7826 fn eval_grad(
7827 &mut self,
7828 x: &Array1<f64>,
7829 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7830 let dx = x[0] - 2.0;
7831 Ok(FirstOrderSample {
7832 value: 0.5 * dx * dx,
7833 gradient: array![dx],
7834 })
7835 }
7836 }
7837
7838 impl SecondOrderObjective for ProjectedArcObjective {
7839 fn eval_hessian(
7840 &mut self,
7841 x: &Array1<f64>,
7842 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7843 if (x[0] - 1.0).abs() < 1e-12 {
7844 self.clipped_counts.lock().expect("lock clipped counts").1 += 1;
7845 }
7846 let dx = x[0] - 2.0;
7847 Ok(SecondOrderSample {
7848 value: 0.5 * dx * dx,
7849 gradient: array![dx],
7850 hessian: Some(array![[1.0]]),
7851 })
7852 }
7853 }
7854
7855 let mut solver = super::Arc::new(
7856 x0.clone(),
7857 ProjectedArcObjective {
7858 clipped_counts: clipped_counts_c,
7859 },
7860 )
7861 .with_profile(Profile::Deterministic)
7862 .with_bounds(bounds(lower, upper, 1e-12))
7863 .with_max_iterations(iters(1));
7864 solver.core.sigma_min = 1e-12;
7865 solver.core.sigma = 1e-12;
7866
7867 let err = solver
7868 .run()
7869 .expect_err("single projected iteration should exhaust the budget");
7870 match err {
7871 ArcError::MaxIterationsReached { last_solution } => {
7872 assert!(last_solution.final_point[0] <= 1.0 + 1e-12);
7873 }
7874 other => panic!("unexpected error variant: {other:?}"),
7875 }
7876 let counts = clipped_counts.lock().expect("lock clipped counts");
7877 assert_eq!(
7878 counts.0, 0,
7879 "materially projected ARC steps must not use CostOnly rho evaluation"
7880 );
7881 assert!(
7882 counts.1 > 0,
7883 "materially projected ARC steps should refresh a coherent CostGradientHessian sample"
7884 );
7885 }
7886
7887 #[test]
7888 fn arc_respects_single_variable_bound() {
7889 let x0 = array![0.2];
7890 let lower = array![0.0];
7891 let upper = array![1.0];
7892 let mut solver = super::Arc::new(
7893 x0,
7894 SecondOrderFn::new(|x: &Array1<f64>| {
7895 let dx = x[0] - 2.0;
7896 let f = dx * dx;
7897 let g = array![2.0 * dx];
7898 let h = array![[2.0]];
7899 (f, g, h)
7900 }),
7901 )
7902 .with_profile(Profile::Robust)
7903 .with_bounds(bounds(lower, upper, 1e-8))
7904 .with_tolerance(tol(1e-9))
7905 .with_max_iterations(iters(200));
7906
7907 let sol = solver
7908 .run()
7909 .expect("Projected ARC should converge at upper bound");
7910 assert!((sol.final_point[0] - 1.0).abs() < 1e-8);
7911 assert!(gradient_norm(&sol) <= 1e-6);
7912 }
7913
7914 #[test]
7915 fn arc_retries_on_recoverable_trial_errors() {
7916 struct RecoverableArcTrialObjective {
7917 calls: usize,
7918 }
7919
7920 impl ZerothOrderObjective for RecoverableArcTrialObjective {
7921 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
7922 Ok(0.5 * (x[0] - 1.0).powi(2))
7923 }
7924 }
7925
7926 impl FirstOrderObjective for RecoverableArcTrialObjective {
7927 fn eval_grad(
7928 &mut self,
7929 x: &Array1<f64>,
7930 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
7931 Ok(FirstOrderSample {
7932 value: 0.5 * (x[0] - 1.0).powi(2),
7933 gradient: array![x[0] - 1.0],
7934 })
7935 }
7936 }
7937
7938 impl SecondOrderObjective for RecoverableArcTrialObjective {
7939 fn eval_hessian(
7940 &mut self,
7941 x: &Array1<f64>,
7942 ) -> Result<SecondOrderSample, ObjectiveEvalError> {
7943 self.calls += 1;
7944 if self.calls == 2 {
7945 return Err(ObjectiveEvalError::recoverable(
7946 "simulated recoverable trial failure",
7947 ));
7948 }
7949 Ok(SecondOrderSample {
7950 value: 0.5 * (x[0] - 1.0).powi(2),
7951 gradient: array![x[0] - 1.0],
7952 hessian: Some(array![[1.0]]),
7953 })
7954 }
7955 }
7956
7957 let x0 = array![2.0];
7958 let mut solver = super::Arc::new(x0, RecoverableArcTrialObjective { calls: 0 })
7959 .with_profile(Profile::Deterministic)
7960 .with_tolerance(tol(1e-8))
7961 .with_max_iterations(iters(300));
7962
7963 let sol = solver
7966 .run()
7967 .expect("recoverable ARC trial failures should trigger retries and recover");
7968 assert!((sol.final_point[0] - 1.0).abs() < 1e-6);
7969 assert!(gradient_norm(&sol) < 1e-6);
7970 }
7971
7972 #[test]
7973 fn arc_sigma_escalation_uses_gamma2_then_gamma3() {
7974 let mut core = super::ArcCore::new(array![0.0]);
7975 core.sigma = 1.0;
7976 core.gamma2 = 2.0;
7977 core.gamma3 = 3.0;
7978 let mut streak = 0usize;
7979
7980 core.escalate_sigma_on_failure(&mut streak);
7982 assert_eq!(streak, 1);
7983 assert!((core.sigma - 2.0).abs() < 1e-12);
7984
7985 core.escalate_sigma_on_failure(&mut streak);
7986 assert_eq!(streak, 2);
7987 assert!((core.sigma - 4.0).abs() < 1e-12);
7988
7989 core.escalate_sigma_on_failure(&mut streak);
7991 assert_eq!(streak, 3);
7992 assert!((core.sigma - 12.0).abs() < 1e-12);
7993 }
7994
7995 fn linear_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
7997 (2.0 * x[0] + 3.0 * x[1], array![2.0, 3.0])
7998 }
7999
8000 fn huge_offset_linear_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
8001 (1.0e16 + 2.0 * x[0] + 3.0 * x[1], array![2.0, 3.0])
8002 }
8003
8004 fn ill_conditioned_quadratic(x: &Array1<f64>) -> (f64, Array1<f64>) {
8007 let scale = 1000.0;
8008 let f = scale * x[0].powi(2) + x[1].powi(2);
8009 let g = array![2.0 * scale * x[0], 2.0 * x[1]];
8010 (f, g)
8011 }
8012
8013 fn singular_hessian_function(x: &Array1<f64>) -> (f64, Array1<f64>) {
8016 let val = (x[0] + x[1]).powi(2);
8017 (val, array![2.0 * (x[0] + x[1]), 2.0 * (x[0] + x[1])])
8018 }
8019
8020 fn wall_with_minimum(x: &Array1<f64>) -> (f64, Array1<f64>) {
8022 if x[0] > 70.0 {
8023 (f64::INFINITY, array![f64::INFINITY])
8025 } else {
8026 ((x[0] - 60.0).powi(2), array![2.0 * (x[0] - 60.0)])
8028 }
8029 }
8030
8031 #[test]
8034 fn test_quadratic_bowl_converges() {
8035 let x0 = array![10.0, -5.0];
8036 let Solution { final_point, .. } = Bfgs::new(x0, bfgs_oracle(quadratic)).run().unwrap();
8037 assert_that!(&final_point[0]).is_close_to(0.0, 1e-5);
8038 assert_that!(&final_point[1]).is_close_to(0.0, 1e-5);
8039 }
8040
8041 #[test]
8042 fn test_optimize_first_order_picks_bfgs() {
8043 let x0 = array![10.0, -5.0];
8044 let Solution { final_point, .. } = optimize(Problem::new(x0, bfgs_oracle(quadratic)))
8045 .run()
8046 .unwrap();
8047 assert_that!(&final_point[0]).is_close_to(0.0, 1e-5);
8048 assert_that!(&final_point[1]).is_close_to(0.0, 1e-5);
8049 }
8050
8051 #[test]
8052 fn test_optimize_second_order_picks_newton_by_default() {
8053 let x0 = array![-1.2, 1.0];
8054 let Solution { final_point, .. } = optimize(SecondOrderProblem::new(
8055 x0,
8056 SecondOrderFn::new(rosenbrock_with_hessian),
8057 ))
8058 .run()
8059 .unwrap();
8060 assert_that!(&final_point[0]).is_close_to(1.0, 1e-5);
8061 assert_that!(&final_point[1]).is_close_to(1.0, 1e-5);
8062 }
8063
8064 #[test]
8065 fn test_optimize_second_order_uses_arc_for_aggressive_profile() {
8066 let x0 = array![1.0];
8067 let objective = SecondOrderFn::new(|x: &Array1<f64>| {
8068 let f = x[0] * x[0];
8069 let g = array![2.0 * x[0]];
8070 let h = array![[2.0]];
8071 (f, g, h)
8072 });
8073 let solver =
8074 optimize(SecondOrderProblem::new(x0, objective).with_profile(Profile::Aggressive));
8075 assert!(matches!(solver, AutoSecondOrderSolver::Arc(_)));
8076 }
8077
8078 #[test]
8079 fn test_quadratic_still_converges_strongly() {
8080 let x0 = array![20.0, -30.0];
8081 let sol = Bfgs::new(x0, bfgs_oracle(quadratic))
8082 .with_tolerance(tol(1e-8))
8083 .with_max_iterations(iters(1000))
8084 .run()
8085 .unwrap();
8086 assert_that!(&sol.final_point[0]).is_close_to(0.0, 1e-6);
8087 assert_that!(&sol.final_point[1]).is_close_to(0.0, 1e-6);
8088 }
8089
8090 #[test]
8091 fn test_rosenbrock_converges() {
8092 let x0 = array![-1.2, 1.0];
8093 let Solution { final_point, .. } = Bfgs::new(x0, bfgs_oracle(rosenbrock)).run().unwrap();
8094 assert_that!(&final_point[0]).is_close_to(1.0, 1e-5);
8095 assert_that!(&final_point[1]).is_close_to(1.0, 1e-5);
8096 }
8097
8098 #[test]
8101 fn test_begin_at_minimum_terminates_immediately() {
8102 let x0 = array![0.0, 0.0];
8103 let Solution { iterations, .. } = Bfgs::new(x0, bfgs_oracle(quadratic))
8104 .with_tolerance(tol(1e-5))
8105 .run()
8106 .unwrap();
8107 assert_that(&iterations).is_less_than_or_equal_to(1);
8108 }
8109
8110 #[test]
8111 fn test_max_iterations_error_is_returned() {
8112 let x0 = array![-1.2, 1.0];
8113 let max_iterations = 5;
8114 let result = Bfgs::new(x0, bfgs_oracle(rosenbrock))
8115 .with_max_iterations(iters(max_iterations))
8116 .run();
8117
8118 match result {
8119 Err(BfgsError::MaxIterationsReached { last_solution }) => {
8120 assert_eq!(last_solution.iterations, max_iterations);
8121 assert_that!(&last_solution.final_point.dot(&last_solution.final_point))
8123 .is_greater_than(0.0);
8124 }
8125 _ => panic!("Expected MaxIterationsReached error, but got {:?}", result),
8126 }
8127 }
8128
8129 #[test]
8130 fn test_non_convex_function_is_handled() {
8131 let x0 = array![2.0];
8132 let result = Bfgs::new(x0.clone(), bfgs_oracle(non_convex_max)).run();
8133 eprintln!("non_convex result: {:?}", result);
8134 assert!(matches!(
8137 result,
8138 Err(BfgsError::MaxIterationsReached { .. })
8139 | Err(BfgsError::LineSearchFailed { .. })
8140 | Err(BfgsError::GradientIsNaN)
8141 ));
8142 }
8143
8144 #[test]
8145 fn test_zero_curvature_is_handled() {
8146 let x0 = array![10.0, 10.0];
8147 let result = Bfgs::new(x0, bfgs_oracle(linear_function))
8148 .with_profile(Profile::Deterministic)
8149 .run();
8150 match result {
8153 Ok(sol) => {
8154 assert!(sol.final_value.is_finite());
8155 assert!(gradient_norm(&sol).is_finite());
8156 }
8157 Err(BfgsError::MaxIterationsReached { .. })
8158 | Err(BfgsError::LineSearchFailed { .. })
8159 | Err(BfgsError::StepSizeTooSmall) => {}
8160 Err(other) => panic!("unexpected error: {other:?}"),
8161 }
8162 }
8163
8164 #[test]
8165 fn test_no_improve_streak_requires_stationarity_or_tiny_step() {
8166 let x0 = array![10.0, 10.0];
8167 let result = Bfgs::new(x0, bfgs_oracle(huge_offset_linear_function))
8168 .with_profile(Profile::Deterministic)
8169 .with_max_iterations(iters(8))
8170 .run();
8171
8172 match result {
8173 Ok(sol) => panic!(
8174 "solver falsely reported convergence with ||g||={:.3e}",
8175 gradient_norm(&sol)
8176 ),
8177 Err(BfgsError::MaxIterationsReached { last_solution })
8178 | Err(BfgsError::LineSearchFailed { last_solution, .. }) => {
8179 assert!(gradient_norm(&last_solution) > 1e-3);
8180 }
8181 Err(BfgsError::StepSizeTooSmall) => {}
8182 Err(other) => panic!("unexpected error: {other:?}"),
8183 }
8184 }
8185
8186 #[test]
8187 fn stagnation_guard_requires_gradient_or_tiny_feasible_step() {
8188 let core = super::BfgsCore::new(array![0.0, 0.0]);
8189 let x_prev = array![1.0, 1.0];
8190 let x_far = array![2.0, 2.0];
8191 let x_same = x_prev.clone();
8192 let g_large = array![1.0, -1.0];
8193 let g_small = array![1e-6, 0.0];
8194
8195 assert!(!core.stagnation_converged(&x_prev, &x_far, &g_large));
8196 assert!(core.stagnation_converged(&x_prev, &x_same, &g_large));
8197 assert!(core.stagnation_converged(&x_prev, &x_far, &g_small));
8198 }
8199
8200 #[test]
8201 fn test_nan_gradient_returns_error() {
8202 let nan_fn = |x: &Array1<f64>| {
8204 if x[0].abs() < 1e-12 {
8205 (f64::NAN, array![f64::NAN])
8206 } else {
8207 (x[0].powi(2), array![2.0 * x[0]])
8208 }
8209 };
8210 let x0 = array![0.1];
8212 let result = Bfgs::new(x0, bfgs_oracle(nan_fn))
8213 .with_profile(Profile::Deterministic)
8214 .with_tolerance(tol(1e-15)) .run();
8216
8217 match result {
8218 Ok(sol) => {
8219 assert!(sol.final_value.is_finite());
8220 assert!(sol.final_point[0].abs() < 1e-4);
8221 }
8222 Err(BfgsError::GradientIsNaN)
8223 | Err(BfgsError::LineSearchFailed { .. })
8224 | Err(BfgsError::MaxIterationsReached { .. })
8225 | Err(BfgsError::StepSizeTooSmall) => {}
8226 Err(other) => panic!("unexpected error: {other:?}"),
8227 }
8228 }
8229
8230 #[test]
8231 fn test_linesearch_failed_reports_nonzero_attempts() {
8232 struct AlwaysRecoverableTrials;
8233
8234 impl ZerothOrderObjective for AlwaysRecoverableTrials {
8235 fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
8236 if x.iter().all(|v| *v == 0.0) {
8237 Ok(833.403058988699)
8238 } else {
8239 Err(ObjectiveEvalError::recoverable(
8240 "synthetic recoverable trial failure",
8241 ))
8242 }
8243 }
8244 }
8245
8246 impl FirstOrderObjective for AlwaysRecoverableTrials {
8247 fn eval_grad(
8248 &mut self,
8249 x: &Array1<f64>,
8250 ) -> Result<FirstOrderSample, ObjectiveEvalError> {
8251 if x.iter().all(|v| *v == 0.0) {
8252 Ok(FirstOrderSample {
8253 value: 833.403058988699,
8254 gradient: array![1.1751972450892738, 0.0, 0.0],
8255 })
8256 } else {
8257 Err(ObjectiveEvalError::recoverable(
8258 "synthetic recoverable trial failure",
8259 ))
8260 }
8261 }
8262 }
8263
8264 let x0 = array![0.0, 0.0, 0.0];
8265 let f_k = 833.403058988699;
8266 let g_k = array![1.1751972450892738, 0.0, 0.0];
8267 let d_k = -g_k.clone();
8268 let mut core = super::BfgsCore::new(x0.clone());
8269 let mut oracle = super::FirstOrderCache::new(x0.len());
8270 let err = super::backtracking_line_search(
8271 &mut core,
8272 &mut AlwaysRecoverableTrials,
8273 &mut oracle,
8274 &x0,
8275 &d_k,
8276 f_k,
8277 &g_k,
8278 )
8279 .expect_err("line search should fail when every trial is recoverable");
8280
8281 let (max_attempts, failure_reason) = match err {
8282 super::LineSearchError::MaxAttempts(attempts) => {
8283 (attempts, LineSearchFailureReason::MaxAttempts)
8284 }
8285 super::LineSearchError::StepSizeTooSmall => (
8286 BACKTRACKING_MAX_ATTEMPTS,
8287 LineSearchFailureReason::StepSizeTooSmall,
8288 ),
8289 other => panic!("expected backtracking failure, got: {other:?}"),
8290 };
8291
8292 assert!(max_attempts > 0, "max_attempts should never be 0");
8293 let rendered = format!(
8294 "{}",
8295 BfgsError::LineSearchFailed {
8296 last_solution: Box::new(Solution::gradient_based(
8297 x0,
8298 f_k,
8299 g_k.clone(),
8300 g_k.dot(&g_k).sqrt(),
8301 None,
8302 0,
8303 0,
8304 0,
8305 0,
8306 )),
8307 max_attempts,
8308 failure_reason,
8309 }
8310 );
8311 assert!(
8312 rendered.contains("MaxAttempts") || rendered.contains("StepSizeTooSmall"),
8313 "error should include failure reason, got: {rendered}"
8314 );
8315 }
8316
8317 #[test]
8320 fn test_rosenbrock_matches_scipy_behavior() {
8321 let x0 = array![-1.2, 1.0];
8322 let tolerance = 1e-6;
8323
8324 let our_res = Bfgs::new(x0.clone(), bfgs_oracle(rosenbrock))
8326 .with_tolerance(tol(tolerance))
8327 .run()
8328 .unwrap();
8329
8330 let scipy_res = optimize_with_python(&x0, "rosenbrock", tolerance, 100)
8332 .expect("Python optimization failed");
8333
8334 assert!(
8335 scipy_res.success,
8336 "Scipy optimization failed: {:?}",
8337 scipy_res.error
8338 );
8339 let scipy_point = scipy_res.final_point.unwrap();
8340
8341 let distance = ((our_res.final_point[0] - scipy_point[0]).powi(2)
8343 + (our_res.final_point[1] - scipy_point[1]).powi(2))
8344 .sqrt();
8345 assert_that!(&distance).is_less_than(1e-5);
8346
8347 let iter_diff = (our_res.iterations as i64 - scipy_res.iterations.unwrap() as i64).abs();
8350 assert_that(&iter_diff).is_less_than_or_equal_to(10);
8351
8352 let PythonOptResult {
8353 final_value,
8354 final_gradient_norm,
8355 func_evals,
8356 grad_evals,
8357 message,
8358 ..
8359 } = scipy_res;
8360 if let Some(value) = final_value {
8361 assert!(value.is_finite());
8362 }
8363 if let Some(norm) = final_gradient_norm {
8364 assert!(norm.is_finite());
8365 }
8366 if let Some(count) = func_evals {
8367 assert!(count > 0);
8368 }
8369 if let Some(count) = grad_evals {
8370 assert!(count > 0);
8371 }
8372 if let Some(text) = message {
8373 assert!(!text.is_empty());
8374 }
8375 }
8376
8377 #[test]
8378 fn test_quadratic_matches_scipy_behavior() {
8379 let x0 = array![150.0, -275.5];
8380 let tolerance = 1e-8;
8381
8382 match Bfgs::new(x0.clone(), bfgs_oracle(quadratic))
8384 .with_tolerance(tol(tolerance))
8385 .run()
8386 {
8387 Ok(sol) => sol,
8388 Err(BfgsError::MaxIterationsReached { last_solution }) => *last_solution,
8389 Err(e) => panic!("unexpected error: {:?}", e),
8390 };
8391
8392 let scipy_res = optimize_with_python(&x0, "quadratic", tolerance, 100)
8394 .expect("Python optimization failed");
8395
8396 assert!(
8397 scipy_res.success,
8398 "Scipy optimization failed: {:?}",
8399 scipy_res.error
8400 );
8401
8402 let PythonOptResult {
8403 final_point,
8404 final_value,
8405 final_gradient_norm,
8406 iterations,
8407 func_evals,
8408 grad_evals,
8409 message,
8410 ..
8411 } = scipy_res;
8412 if let Some(point) = final_point {
8413 assert_eq!(point.len(), 2);
8414 }
8415 if let Some(value) = final_value {
8416 assert!(value.is_finite());
8417 }
8418 if let Some(norm) = final_gradient_norm {
8419 assert!(norm.is_finite());
8420 }
8421 if let Some(iters) = iterations {
8422 assert!(iters <= 100);
8423 }
8424 if let Some(count) = func_evals {
8425 assert!(count > 0);
8426 }
8427 if let Some(count) = grad_evals {
8428 assert!(count > 0);
8429 }
8430 if let Some(text) = message {
8431 assert!(!text.is_empty());
8432 }
8433 }
8434
8435 #[test]
8438 fn test_ill_conditioned_problem_converges() {
8439 let x0 = array![1.0, 1000.0]; let res = Bfgs::new(x0, bfgs_oracle(ill_conditioned_quadratic)).run();
8441 assert!(res.is_ok() || matches!(res, Err(BfgsError::MaxIterationsReached { .. })));
8442 }
8443
8444 #[test]
8445 fn test_singular_hessian_is_handled_gracefully() {
8446 let x0 = array![10.0, 20.0];
8447 let result = Bfgs::new(x0, bfgs_oracle(singular_hessian_function))
8448 .with_tolerance(tol(1e-8))
8449 .run();
8450
8451 match result {
8455 Ok(soln) => {
8456 assert_that!(&soln.final_point[0]).is_close_to(-soln.final_point[1], 1e-5);
8458 assert_that!(&gradient_norm(&soln)).is_less_than(1e-8);
8459 }
8460 Err(BfgsError::MaxIterationsReached { .. }) => {
8461 }
8463 Err(e) => {
8464 panic!("Solver failed with an unexpected error: {:?}", e);
8466 }
8467 }
8468 }
8469
8470 #[test]
8471 fn test_line_search_handles_inf() {
8472 let x0 = array![10.0]; let result = Bfgs::new(x0, bfgs_oracle(wall_with_minimum)).run();
8474 assert!(result.is_ok() || matches!(result, Err(BfgsError::MaxIterationsReached { .. })));
8475 }
8476
8477 #[test]
8478 fn test_trust_region_projection_uses_actual_step() {
8479 let x0 = array![0.9];
8480 let lower = array![0.0];
8481 let upper = array![1.0];
8482 let mut core = super::BfgsCore::new(x0.clone());
8483 core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
8484 core.trust_radius = 10.0;
8485 let fg = |x: &Array1<f64>| {
8486 let f = (x[0] - 2.0).powi(2);
8487 let g = array![2.0 * (x[0] - 2.0)];
8488 (f, g)
8489 };
8490 let mut obj = bfgs_oracle(fg);
8491 let x_k = core.project_point(&x0);
8492 let (f_k, g_k) = fg(&x_k);
8493 let mut b_inv = Array2::eye(1);
8494 let mut oracle = super::FirstOrderCache::new(x0.len());
8495 let mut func_evals = 0;
8496 let mut grad_evals = 0;
8497 let res = core.try_trust_region_step(
8498 &mut obj,
8499 &mut oracle,
8500 &mut b_inv,
8501 &x_k,
8502 f_k,
8503 &g_k,
8504 &mut func_evals,
8505 &mut grad_evals,
8506 );
8507 assert!(res.is_some());
8508 let (x_new, f_new, g_new) = res.unwrap();
8509 assert!((x_new[0] - 1.0).abs() < 1e-12);
8510 assert!(f_new.is_finite());
8511 assert!(g_new[0].is_finite());
8512 }
8513
8514 #[test]
8515 fn test_bfgs_trust_region_predicted_decrease_respects_active_mask() {
8516 let core = super::BfgsCore::new(array![0.0, 0.0]);
8517 let b_inv = array![[2.0, 1.0], [1.0, 2.0]];
8518 let g_proj = array![0.0, -1.0];
8519 let s = array![0.0, 1.0];
8520 let active = vec![true, false];
8521
8522 let pred = core
8523 .trust_region_predicted_decrease(&b_inv, &g_proj, &s, Some(&active))
8524 .expect("masked predicted decrease should be well-defined");
8525
8526 assert!(
8527 (pred - 0.75).abs() < 1e-9,
8528 "unexpected predicted decrease: {pred}"
8529 );
8530 }
8531
8532 #[test]
8533 fn test_bfgs_trust_region_fallback_freezes_active_bound_coordinates() {
8534 let x0 = array![0.0, 0.0];
8535 let lower = array![0.0, -10.0];
8536 let upper = array![10.0, 10.0];
8537 let mut core = super::BfgsCore::new(x0.clone());
8538 core.bounds = Some(super::BoxSpec::new(lower, upper, 1e-8));
8539 core.trust_radius = 10.0;
8540
8541 let fg = |x: &Array1<f64>| {
8542 let f = (x[0] + 1.0).powi(2) + (x[1] - 2.0).powi(2);
8543 let g = array![2.0 * (x[0] + 1.0), 2.0 * (x[1] - 2.0)];
8544 (f, g)
8545 };
8546
8547 let mut obj = bfgs_oracle(fg);
8548 let x_k = core.project_point(&x0);
8549 let (f_k, g_k) = fg(&x_k);
8550 let active = core.active_mask(&x_k, &g_k);
8551 assert_eq!(active, vec![true, false]);
8552
8553 let mut b_inv = array![[5.0, 1.0], [1.0, 0.5]];
8554 let mut oracle = super::FirstOrderCache::new(x0.len());
8555 let mut func_evals = 0;
8556 let mut grad_evals = 0;
8557 let res = core.try_trust_region_step(
8558 &mut obj,
8559 &mut oracle,
8560 &mut b_inv,
8561 &x_k,
8562 f_k,
8563 &g_k,
8564 &mut func_evals,
8565 &mut grad_evals,
8566 );
8567
8568 assert!(
8569 res.is_some(),
8570 "masked trust-region fallback should produce a feasible step"
8571 );
8572 let (x_new, f_new, g_new) = res.unwrap();
8573 assert!(
8574 x_new[0].abs() < 1e-12,
8575 "active coordinate moved: {:?}",
8576 x_new
8577 );
8578 assert!(x_new[1] > x_k[1]);
8579 assert!(f_new < f_k);
8580 assert!(g_new.iter().all(|v| v.is_finite()));
8581 }
8582
8583 #[test]
8584 fn test_flat_with_noise_accepts() {
8585 let f = |x: &Array1<f64>| {
8586 let noise = (x.sum() * 1e6).sin() * 1e-12;
8587 let val = 1.0 + noise;
8588 let g = Array1::from_vec(vec![1e-12; x.len()]);
8589 (val, g)
8590 };
8591 let x0 = array![0.0, 0.0];
8592 let res = Bfgs::new(x0, bfgs_oracle(f))
8593 .with_tolerance(tol(1e-10))
8594 .run();
8595 assert!(res.is_ok() || matches!(res, Err(super::BfgsError::MaxIterationsReached { .. })));
8596 }
8597
8598 #[test]
8599 fn test_piecewise_alpha_jump() {
8600 let f = |x: &Array1<f64>| {
8601 let r = x.dot(x).sqrt();
8602 let val = if r < 1.0 { 1.0 } else { 0.9 };
8603 let g = if r < 1.0 {
8604 Array1::zeros(x.len())
8605 } else {
8606 x.mapv(|v| 1e-6 * v)
8607 };
8608 (val, g)
8609 };
8610 let x0 = array![0.5, 0.5];
8611 let res = Bfgs::new(x0, bfgs_oracle(f)).run();
8612 assert!(res.is_ok() || matches!(res, Err(super::BfgsError::MaxIterationsReached { .. })));
8613 }
8614
8615 #[test]
8616 fn test_rng_symmetry() {
8617 let x0 = array![0.0];
8619 let f = |x: &Array1<f64>| (x[0], array![1.0]);
8620 let mut solver = super::Bfgs::new(x0, bfgs_oracle(f));
8621 solver.core.rng_state = 12345;
8622 let mut sum = 0.0f64;
8623 let n = 20_000;
8624 for _ in 0..n {
8625 sum += solver.next_rand_sym();
8626 }
8627 let mean = sum / (n as f64);
8628 assert_that!(&mean.abs()).is_less_than(5e-3);
8629 }
8630}