gam_solve/rho_optimizer/capability.rs
1use super::*;
2
3/// Declares what a specific model path can provide to the outer optimizer.
4///
5/// Each call site that optimizes smoothing parameters constructs one of these
6/// to describe its analytic derivative coverage. The [`plan`] function then
7/// selects the optimizer and Hessian strategy.
8pub(crate) const SMALL_OUTER_BFGS_MAX_PARAMS: usize = 8;
9
10pub(crate) const SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS: usize = 64;
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13pub struct OuterThetaLayout {
14 pub n_params: usize,
15 pub psi_dim: usize,
16}
17
18impl OuterThetaLayout {
19 pub const fn new(n_params: usize, psi_dim: usize) -> Self {
20 Self { n_params, psi_dim }
21 }
22
23 pub const fn rho_dim(&self) -> usize {
24 self.n_params.saturating_sub(self.psi_dim)
25 }
26
27 fn validate_capability(&self, context: &str) -> Result<(), EstimationError> {
28 if self.psi_dim > self.n_params {
29 return Err(EstimationError::RemlOptimizationFailed(format!(
30 "{context}: invalid outer theta layout (psi_dim={} exceeds n_params={})",
31 self.psi_dim, self.n_params
32 )));
33 }
34 Ok::<(), _>(())
35 }
36
37 pub(crate) fn validate_point_len(
38 &self,
39 theta: &Array1<f64>,
40 context: &str,
41 ) -> Result<(), ObjectiveEvalError> {
42 if theta.len() != self.n_params {
43 return Err(ObjectiveEvalError::recoverable(format!(
44 "{context}: outer theta length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
45 theta.len(),
46 self.n_params,
47 self.rho_dim(),
48 self.psi_dim
49 )));
50 }
51 Ok::<(), _>(())
52 }
53
54 pub(crate) fn validate_gradient_len(
55 &self,
56 gradient: &Array1<f64>,
57 context: &str,
58 ) -> Result<(), ObjectiveEvalError> {
59 if gradient.len() != self.n_params {
60 return Err(ObjectiveEvalError::recoverable(format!(
61 "{context}: outer gradient length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
62 gradient.len(),
63 self.n_params,
64 self.rho_dim(),
65 self.psi_dim
66 )));
67 }
68 Ok::<(), _>(())
69 }
70
71 pub(crate) fn validate_hessian_shape(
72 &self,
73 hessian: &Array2<f64>,
74 context: &str,
75 ) -> Result<(), ObjectiveEvalError> {
76 if hessian.nrows() != self.n_params || hessian.ncols() != self.n_params {
77 return Err(ObjectiveEvalError::recoverable(format!(
78 "{context}: outer Hessian shape mismatch: got {}x{}, expected {}x{} (rho_dim={}, psi_dim={})",
79 hessian.nrows(),
80 hessian.ncols(),
81 self.n_params,
82 self.n_params,
83 self.rho_dim(),
84 self.psi_dim
85 )));
86 }
87 Ok::<(), _>(())
88 }
89
90 pub(crate) fn validate_efs_eval(
91 &self,
92 eval: &EfsEval,
93 context: &str,
94 ) -> Result<(), ObjectiveEvalError> {
95 if eval.steps.len() != self.n_params {
96 return Err(ObjectiveEvalError::recoverable(format!(
97 "{context}: outer EFS step length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
98 eval.steps.len(),
99 self.n_params,
100 self.rho_dim(),
101 self.psi_dim
102 )));
103 }
104 if let Some(ref psi_gradient) = eval.psi_gradient
105 && psi_gradient.len() != self.psi_dim
106 {
107 return Err(ObjectiveEvalError::recoverable(format!(
108 "{context}: outer EFS psi-gradient length mismatch: got {}, expected {}",
109 psi_gradient.len(),
110 self.psi_dim
111 )));
112 }
113 if let Some(ref psi_indices) = eval.psi_indices {
114 if psi_indices.len() != self.psi_dim {
115 return Err(ObjectiveEvalError::recoverable(format!(
116 "{context}: outer EFS psi-index count mismatch: got {}, expected {}",
117 psi_indices.len(),
118 self.psi_dim
119 )));
120 }
121 if psi_indices.iter().any(|&idx| idx >= self.n_params) {
122 return Err(ObjectiveEvalError::recoverable(format!(
123 "{context}: outer EFS psi index out of range for n_params={}",
124 self.n_params
125 )));
126 }
127 }
128 Ok(())
129 }
130}
131
132#[derive(Clone, Debug)]
133pub struct OuterCapability {
134 pub gradient: Derivative,
135 /// Declared shape of the analytic Hessian (or its absence). Replaces
136 /// the binary `Derivative` so the planner can route between dense
137 /// ARC and matrix-free trust-region *before* seed evaluation. See
138 /// [`DeclaredHessianForm`].
139 pub hessian: DeclaredHessianForm,
140 /// Number of smoothing (+ any auxiliary hyper-) parameters being optimized.
141 pub n_params: usize,
142 /// Number of ψ (design-moving) coordinates among the extended
143 /// hyperparameter coordinates. When 0, all coords are penalty-like and
144 /// pure EFS is eligible (given `fixed_point_available`). When > 0,
145 /// hybrid EFS is eligible instead: EFS for ρ + preconditioned gradient
146 /// for ψ.
147 ///
148 /// # Hybrid EFS strategy (when `psi_dim > 0`)
149 ///
150 /// Enabled when `psi_dim > 0`,
151 /// `n_params > SMALL_OUTER_BFGS_MAX_PARAMS`, and
152 /// `fixed_point_available`.
153 /// Combines:
154 /// - Standard EFS multiplicative fixed-point updates for ρ coordinates
155 /// - Safeguarded preconditioned gradient steps for ψ coordinates:
156 /// `Δψ = -α G⁺ g_ψ` where G is the trace Gram matrix
157 ///
158 /// Mathematically necessary because no EFS-type fixed-point iteration
159 /// exists for indefinite B_ψ (see response.md Section 2). The structural
160 /// requirement for EFS is `H^{-1/2} B_d H^{-1/2} ≽ 0` (PSD) plus fixed
161 /// nullspace — exactly what penalty-like coords satisfy and design-moving
162 /// coords do not.
163 ///
164 /// The hybrid is O(1) H⁻¹ solves per iteration (same as pure EFS),
165 /// compared to O(dim(θ)) for BFGS.
166 pub psi_dim: usize,
167 /// Whether the objective actually implements `eval_efs()` for fixed-point
168 /// plans. Structural eligibility (`psi_dim == 0` / `psi_dim > 0`)
169 /// is not sufficient by itself: if this is false, the planner must stay on
170 /// Newton/BFGS-style plans even when EFS or Hybrid-EFS would otherwise be
171 /// mathematically admissible.
172 pub fixed_point_available: bool,
173 /// Optional log-barrier configuration for structural monotonicity constraints.
174 /// When present, EFS is still eligible at plan time, but the EFS iteration
175 /// loop performs a quantitative check each step: if
176 /// `barrier_curvature_is_significant(β, ref_diag, threshold)` fires, EFS
177 /// is abandoned and the fallback ladder routes to a first-order joint
178 /// optimizer.
179 ///
180 /// Previously this was a binary `barrier_active: bool` that unconditionally
181 /// blocked EFS. The quantitative check allows EFS when constraints exist but
182 /// the barrier curvature is negligible (coefficients far from their bounds).
183 pub barrier_config: Option<BarrierConfig>,
184 /// Policy hint for derivative-free auxiliary optimizers only. Primary REML
185 /// optimization ignores this flag when an analytic Hessian exists: exact
186 /// second-order geometry must not be hidden behind a quasi-Newton policy.
187 pub prefer_gradient_only: bool,
188 /// Policy hint: even when the objective implements `eval_efs()` and the
189 /// coordinate structure is penalty-like, the planner must NOT select
190 /// EFS/HybridEfs for this problem.
191 ///
192 /// Set by the caller for problem classes where the Wood-Fasiolo structural
193 /// property (`H^{-1/2} B_k H^{-1/2} ≽ 0` plus parameter-independent
194 /// nullspace) is known not to hold — e.g. GAMLSS/location-scale families
195 /// where the joint Hessian is β-dependent and cross-block smoothers
196 /// induce non-diagonal curvature that the EFS multiplicative fixed-point
197 /// cannot resolve. Also set by the automatic fallback cascade when an
198 /// EFS/HybridEfs attempt failed to converge, so the next attempt falls
199 /// back to analytic-gradient BFGS rather than retrying EFS.
200 pub disable_fixed_point: bool,
201}
202
203impl OuterCapability {
204 pub const fn theta_layout(&self) -> OuterThetaLayout {
205 OuterThetaLayout::new(self.n_params, self.psi_dim)
206 }
207
208 pub fn validate_layout(&self, context: &str) -> Result<(), EstimationError> {
209 self.theta_layout().validate_capability(context)
210 }
211
212 /// True when all coordinates are penalty-like (no ψ coords).
213 pub const fn all_penalty_like(&self) -> bool {
214 self.psi_dim == 0
215 }
216 /// True when ψ (design-moving) coordinates are present.
217 pub const fn has_psi_coords(&self) -> bool {
218 self.psi_dim > 0
219 }
220
221 fn efs_plan_eligible(&self) -> bool {
222 self.fixed_point_available
223 && !self.disable_fixed_point
224 && self.all_penalty_like()
225 && self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
226 }
227
228 fn hybrid_efs_plan_eligible(&self) -> bool {
229 self.fixed_point_available
230 && !self.disable_fixed_point
231 && self.has_psi_coords()
232 && self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
233 }
234
235 fn declared_hessian_for_planning(&self) -> Derivative {
236 if self.hessian.is_analytic() {
237 Derivative::Analytic
238 } else {
239 Derivative::Unavailable
240 }
241 }
242}
243
244/// Which solver algorithm to use for the outer optimization.
245#[derive(Clone, Copy, Debug, PartialEq, Eq)]
246pub enum Solver {
247 /// Adaptive Regularized Cubic; fastest convergence, requires Hessian.
248 Arc,
249 /// BFGS; gradient only, builds a dense curvature approximation.
250 Bfgs,
251 /// Extended Fellner-Schall; multiplicative fixed-point iteration.
252 /// Only valid when all hyperparameter coordinates are penalty-like.
253 /// Needs no gradient or Hessian — only traces tr(H^{-1} A_k) and
254 /// Frobenius norms from the inner solution.
255 Efs,
256 /// Hybrid EFS + preconditioned gradient.
257 ///
258 /// Used when ψ (design-moving) coordinates are present alongside ρ
259 /// (penalty-like) coordinates. Combines:
260 /// - Standard EFS multiplicative fixed-point steps for ρ coords
261 /// - Safeguarded preconditioned gradient steps for ψ coords:
262 /// `Δψ = -α G⁺ g_ψ` where `G_{de} = tr(H⁻¹ B_d H⁻¹ B_e)`
263 ///
264 /// This hybrid exists because no EFS-type fixed-point iteration can
265 /// guarantee convergence for indefinite B_ψ (proven by counterexample
266 /// in response.md Section 2). The key structural property that EFS
267 /// needs — `H^{-1/2} B_d H^{-1/2} ≽ 0` plus parameter-independent
268 /// nullspace — holds for penalty-like coords but fails for
269 /// design-moving coords where B_ψ has mixed inertia.
270 ///
271 /// The preconditioned gradient uses the same trace Gram matrix that
272 /// EFS already computes, so the cost is O(1) H⁻¹ solves per iteration
273 /// (same as pure EFS), compared to O(dim(θ)) for full BFGS.
274 HybridEfs,
275}
276
277/// How the Hessian will be obtained for the outer optimizer.
278#[derive(Clone, Copy, Debug, PartialEq, Eq)]
279pub enum HessianSource {
280 /// Exact analytic Hessian provided by the objective.
281 Analytic,
282 /// No explicit Hessian; BFGS builds a rank-2 approximation from
283 /// gradient history.
284 BfgsApprox,
285 /// No explicit Hessian or gradient needed. EFS uses traces and
286 /// Frobenius norms from the inner solution directly.
287 EfsFixedPoint,
288 /// Hybrid EFS + preconditioned gradient for ψ coordinates.
289 /// EFS traces for ρ coords, trace Gram matrix + gradient for ψ coords.
290 HybridEfsFixedPoint,
291}
292
293/// Requested derivative order for an outer objective evaluation.
294///
295/// This enum is for the shared `eval` bridge where the runner needs value-only,
296/// first-order, or second-order information depending on the active plan.
297///
298/// Single-sourced on the lower `gam-model-api` crate so the gam-models
299/// fit_orchestration drivers and the gam-solve runner share one type (#1521).
300pub use gam_model_api::OuterEvalOrder;
301
302/// The outer optimization plan. Produced by [`plan`], consumed by the runner.
303#[derive(Clone, Copy, Debug, PartialEq, Eq)]
304pub struct OuterPlan {
305 pub solver: Solver,
306 pub hessian_source: HessianSource,
307}
308
309pub(crate) const EFS_FIRST_ORDER_FALLBACK_MARKER: &str = "[outer-efs-first-order-fallback]";
310
311/// Whether outer_strategy should automatically derive a retry ladder from the
312/// primary capability, or disable retries entirely.
313#[derive(Clone, Copy, Debug, PartialEq, Eq)]
314pub enum FallbackPolicy {
315 /// Centralized retry path chosen from the declared capability.
316 Automatic,
317 /// No retries; use only the primary plan.
318 Disabled,
319}
320
321impl std::fmt::Display for OuterPlan {
322 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
323 write!(
324 f,
325 "solver={:?}, hessian_source={:?}",
326 self.solver, self.hessian_source
327 )
328 }
329}
330
331impl OuterPlan {
332 /// Stable, grep-friendly routing token for large-scale/log regression
333 /// assertions. Emits `solver=<Solver>;hessian=<Source>;matrix-free=<bool>`.
334 /// Planning alone does not prove the runtime Hessian representation;
335 /// matrix-free routing is decided after the seed evaluation returns an
336 /// operator Hessian, so the static plan token reports `false`.
337 pub fn routing_log_line(&self) -> String {
338 let matrix_free = false;
339 format!(
340 "solver={:?};hessian={:?};matrix-free={}",
341 self.solver, self.hessian_source, matrix_free
342 )
343 }
344}
345
346/// Select the outer optimization strategy from the declared capability.
347///
348/// This is a pure function with no side effects. All policy lives here.
349pub fn plan(cap: &OuterCapability) -> OuterPlan {
350 use Derivative::*;
351 use HessianSource as H;
352 use Solver as S;
353
354 match (cap.gradient, cap.declared_hessian_for_planning()) {
355 (Analytic, Analytic) => OuterPlan {
356 solver: S::Arc,
357 hessian_source: H::Analytic,
358 },
359 // EFS: all penalty-like coords, no analytic Hessian, many params.
360 // Multiplicative fixed-point needs only traces — no gradient evals.
361 // Much cheaper than BFGS for k=10-50 smoothing parameters.
362 //
363 // When a log-barrier is present (monotonicity constraints), EFS is
364 // still selected here. The EFS iteration loop in `run_outer` performs
365 // a quantitative check each step via `barrier_curvature_is_significant`
366 // and bails out early if the barrier curvature becomes non-negligible
367 // relative to the penalized Hessian diagonal.
368 (Analytic, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
369 solver: S::Efs,
370 hessian_source: H::EfsFixedPoint,
371 },
372 (Unavailable, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
373 solver: S::Efs,
374 hessian_source: H::EfsFixedPoint,
375 },
376
377 // Hybrid EFS: ψ (design-moving) coords present alongside ρ coords.
378 //
379 // When ψ coords are present, pure EFS is invalid because B_ψ can be
380 // indefinite (see response.md Section 2 for the counterexample). But
381 // falling back to full BFGS wastes the cheap EFS structure for ρ coords.
382 //
383 // The hybrid strategy uses EFS for ρ-coords and a safeguarded
384 // preconditioned gradient step for ψ-coords:
385 // Δψ = -α G⁺ g_ψ, G_{de} = tr(H⁻¹ B_d H⁻¹ B_e)
386 //
387 // This stays O(1) H⁻¹ solves per iteration (vs O(dim(θ)) for BFGS)
388 // and uses the same trace Gram matrix that EFS already computes.
389 (Analytic, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
390 solver: S::HybridEfs,
391 hessian_source: H::HybridEfsFixedPoint,
392 },
393 (Unavailable, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
394 solver: S::HybridEfs,
395 hessian_source: H::HybridEfsFixedPoint,
396 },
397
398 // Gradient-only problems should use a gradient-only optimizer.
399 (Analytic, Unavailable) => OuterPlan {
400 solver: S::Bfgs,
401 hessian_source: H::BfgsApprox,
402 },
403 // No analytic gradient (with or without a declared Hessian), and the
404 // EFS/HybridEFS fixed-point lane ruled out above. Every outer objective
405 // in the tree now supplies an analytic gradient, so a cost-only
406 // capability is a programming error. Emit a BFGS plan so it surfaces
407 // loudly with context: the runner rejects it because BFGS requires the
408 // analytic gradient this capability declares is absent. We deliberately
409 // do NOT invent a working primary here — a cost-only objective has no
410 // solver, by design.
411 (Unavailable, _) => OuterPlan {
412 solver: S::Bfgs,
413 hessian_source: H::BfgsApprox,
414 },
415 }
416}
417
418/// Log the outer optimization plan. Called once per fit at the start of
419/// outer optimization so the user can see what strategy was selected and why.
420pub fn log_plan(context: &str, cap: &OuterCapability, the_plan: &OuterPlan) {
421 let hess_warning = match the_plan.hessian_source {
422 HessianSource::BfgsApprox if cap.n_params > 0 => {
423 " [no Hessian: BFGS approximation]".to_string()
424 }
425 _ => String::new(),
426 };
427 let barrier_note = if cap.barrier_config.is_some() && cap.efs_plan_eligible() {
428 " [EFS with runtime barrier-curvature guard]"
429 } else {
430 ""
431 };
432 let hybrid_note = if the_plan.solver == Solver::HybridEfs {
433 " [hybrid EFS(ρ) + preconditioned-gradient(ψ)]"
434 } else {
435 ""
436 };
437 // Promoted to info: this fires once per outer optimization dispatch and
438 // tells the user immediately whether ARC, BFGS, EFS, etc. was selected
439 // and why. That information is otherwise inferred only from the per-iter
440 // log tag prefix once the loop has started.
441 log::info!(
442 "[OUTER] {context}: n_params={}, gradient={:?}, hessian={:?} -> {} [{}]{hess_warning}{barrier_note}{hybrid_note}",
443 cap.n_params,
444 cap.gradient,
445 cap.hessian,
446 the_plan,
447 the_plan.routing_log_line(),
448 );
449}
450
451pub(crate) fn requests_immediate_first_order_fallback(message: &str) -> bool {
452 message.contains(EFS_FIRST_ORDER_FALLBACK_MARKER)
453}
454
455/// Disable the EFS/HybridEfs planner path, forcing BFGS-class solvers on the
456/// next attempt. Returns `None` if fixed-point is already disabled.
457pub(crate) fn disable_fixed_point(cap: &OuterCapability) -> Option<OuterCapability> {
458 (!cap.disable_fixed_point && (cap.efs_plan_eligible() || cap.hybrid_efs_plan_eligible())).then(
459 || {
460 let mut degraded = cap.clone();
461 degraded.disable_fixed_point = true;
462 degraded
463 },
464 )
465}
466
467pub(crate) fn automatic_fallback_attempts(cap: &OuterCapability) -> Vec<OuterCapability> {
468 // Production fallback ladder is strictly analytic-gradient.
469 //
470 // The cascade is:
471 // 1. If the primary plan is EFS/HybridEFS AND an analytic gradient is
472 // available, retry with fixed-point disabled so the analytic
473 // derivative declaration is evaluated directly.
474 // 2. If the primary plan is Arc (declared (Analytic, Analytic)
475 // capability), do NOT add a degraded fallback. Demoting to
476 // BFGS+BfgsApprox in this case discards the analytic outer Hessian
477 // ARC was using — a strictly weaker geometry — and silently masks
478 // ARC's actual failure mode (e.g. budget exhaustion, indefinite
479 // curvature) under a BFGS Strong-Wolfe plateau on a flat surface.
480 // ARC retries are handled by the per-attempt budget-bump retry
481 // ladder in `run_outer_with_strategy`; once that is exhausted, the
482 // caller surfaces the underlying ARC failure verbatim.
483 // 3. Otherwise (e.g. (Analytic, Unavailable) without EFS eligibility,
484 // which is the BFGS primary), there is nothing to degrade further
485 // — the caller surfaces the RemlOptimizationFailed error so the
486 // non-convergence is visible.
487 let mut attempts = Vec::new();
488
489 if cap.gradient == Derivative::Analytic
490 && matches!(plan(cap).solver, Solver::Efs | Solver::HybridEfs)
491 && let Some(no_fp_cap) = disable_fixed_point(cap)
492 {
493 attempts.push(no_fp_cap.clone());
494 return attempts;
495 }
496
497 // Arc primary: no lateral demotion to BFGS. The runner's ARC-budget-bump
498 // retry covers cases where ARC needed more iterations; if even that is
499 // exhausted, the caller sees the genuine analytic-Hessian non-convergence
500 // rather than a misleading BFGS-on-flat-surface plateau.
501 if matches!(plan(cap).solver, Solver::Arc) {
502 return attempts;
503 }
504
505 attempts
506}
507
508pub(crate) fn disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(
509 cap: &OuterCapability,
510 config: &OuterConfig,
511) -> bool {
512 config.fallback_policy == FallbackPolicy::Disabled
513 && cap.gradient == Derivative::Analytic
514 && matches!(plan(cap).solver, Solver::HybridEfs)
515}
516
517pub(crate) fn primary_capability_for_config(
518 mut cap: OuterCapability,
519 config: &OuterConfig,
520 context: &str,
521) -> OuterCapability {
522 if disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(&cap, config) {
523 // HybridEFS is not a standalone first-order method for ψ coordinates:
524 // when ψ backtracking proves non-descent, the bridge intentionally
525 // surfaces `EFS_FIRST_ORDER_FALLBACK_MARKER` so the runner can switch
526 // to a joint gradient solver that enforces ∇ψ V = 0. With fallback
527 // disabled and an analytic gradient available, selecting HybridEFS as
528 // the only primary attempt is internally inconsistent; BFGS is the
529 // standalone first-order primary for that capability.
530 log::info!(
531 "[OUTER] {context}: HybridEFS requires the automatic first-order \
532 escape path for ψ coordinates; fallback is disabled, so routing the \
533 primary attempt to analytic-gradient BFGS"
534 );
535 cap.disable_fixed_point = true;
536 }
537 cap
538}