Skip to main content

gam_solve/rho_optimizer/
capability.rs

1use super::*;
2
3/// Declares what a specific model path can provide to the outer optimizer.
4///
5/// Each call site that optimizes smoothing parameters constructs one of these
6/// to describe its analytic derivative coverage. The [`plan`] function then
7/// selects the optimizer and Hessian strategy.
8pub(crate) const SMALL_OUTER_BFGS_MAX_PARAMS: usize = 8;
9
10pub(crate) const SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS: usize = 64;
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13pub struct OuterThetaLayout {
14    pub n_params: usize,
15    pub psi_dim: usize,
16}
17
18impl OuterThetaLayout {
19    pub const fn new(n_params: usize, psi_dim: usize) -> Self {
20        Self { n_params, psi_dim }
21    }
22
23    pub const fn rho_dim(&self) -> usize {
24        self.n_params.saturating_sub(self.psi_dim)
25    }
26
27    fn validate_capability(&self, context: &str) -> Result<(), EstimationError> {
28        if self.psi_dim > self.n_params {
29            return Err(EstimationError::RemlOptimizationFailed(format!(
30                "{context}: invalid outer theta layout (psi_dim={} exceeds n_params={})",
31                self.psi_dim, self.n_params
32            )));
33        }
34        Ok::<(), _>(())
35    }
36
37    pub(crate) fn validate_point_len(
38        &self,
39        theta: &Array1<f64>,
40        context: &str,
41    ) -> Result<(), ObjectiveEvalError> {
42        if theta.len() != self.n_params {
43            return Err(ObjectiveEvalError::recoverable(format!(
44                "{context}: outer theta length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
45                theta.len(),
46                self.n_params,
47                self.rho_dim(),
48                self.psi_dim
49            )));
50        }
51        Ok::<(), _>(())
52    }
53
54    pub(crate) fn validate_gradient_len(
55        &self,
56        gradient: &Array1<f64>,
57        context: &str,
58    ) -> Result<(), ObjectiveEvalError> {
59        if gradient.len() != self.n_params {
60            return Err(ObjectiveEvalError::recoverable(format!(
61                "{context}: outer gradient length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
62                gradient.len(),
63                self.n_params,
64                self.rho_dim(),
65                self.psi_dim
66            )));
67        }
68        Ok::<(), _>(())
69    }
70
71    pub(crate) fn validate_hessian_shape(
72        &self,
73        hessian: &Array2<f64>,
74        context: &str,
75    ) -> Result<(), ObjectiveEvalError> {
76        if hessian.nrows() != self.n_params || hessian.ncols() != self.n_params {
77            return Err(ObjectiveEvalError::recoverable(format!(
78                "{context}: outer Hessian shape mismatch: got {}x{}, expected {}x{} (rho_dim={}, psi_dim={})",
79                hessian.nrows(),
80                hessian.ncols(),
81                self.n_params,
82                self.n_params,
83                self.rho_dim(),
84                self.psi_dim
85            )));
86        }
87        Ok::<(), _>(())
88    }
89
90    pub(crate) fn validate_efs_eval(
91        &self,
92        eval: &EfsEval,
93        context: &str,
94    ) -> Result<(), ObjectiveEvalError> {
95        if eval.steps.len() != self.n_params {
96            return Err(ObjectiveEvalError::recoverable(format!(
97                "{context}: outer EFS step length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
98                eval.steps.len(),
99                self.n_params,
100                self.rho_dim(),
101                self.psi_dim
102            )));
103        }
104        if let Some(ref psi_gradient) = eval.psi_gradient
105            && psi_gradient.len() != self.psi_dim
106        {
107            return Err(ObjectiveEvalError::recoverable(format!(
108                "{context}: outer EFS psi-gradient length mismatch: got {}, expected {}",
109                psi_gradient.len(),
110                self.psi_dim
111            )));
112        }
113        if let Some(ref psi_indices) = eval.psi_indices {
114            if psi_indices.len() != self.psi_dim {
115                return Err(ObjectiveEvalError::recoverable(format!(
116                    "{context}: outer EFS psi-index count mismatch: got {}, expected {}",
117                    psi_indices.len(),
118                    self.psi_dim
119                )));
120            }
121            if psi_indices.iter().any(|&idx| idx >= self.n_params) {
122                return Err(ObjectiveEvalError::recoverable(format!(
123                    "{context}: outer EFS psi index out of range for n_params={}",
124                    self.n_params
125                )));
126            }
127        }
128        Ok(())
129    }
130}
131
132#[derive(Clone, Debug)]
133pub struct OuterCapability {
134    pub gradient: Derivative,
135    /// Declared shape of the analytic Hessian (or its absence). Replaces
136    /// the binary `Derivative` so the planner can route between dense
137    /// ARC and matrix-free trust-region *before* seed evaluation. See
138    /// [`DeclaredHessianForm`].
139    pub hessian: DeclaredHessianForm,
140    /// Number of smoothing (+ any auxiliary hyper-) parameters being optimized.
141    pub n_params: usize,
142    /// Number of ψ (design-moving) coordinates among the extended
143    /// hyperparameter coordinates. When 0, all coords are penalty-like and
144    /// pure EFS is eligible (given `fixed_point_available`). When > 0,
145    /// hybrid EFS is eligible instead: EFS for ρ + preconditioned gradient
146    /// for ψ.
147    ///
148    /// # Hybrid EFS strategy (when `psi_dim > 0`)
149    ///
150    /// Enabled when `psi_dim > 0`,
151    /// `n_params > SMALL_OUTER_BFGS_MAX_PARAMS`, and
152    /// `fixed_point_available`.
153    /// Combines:
154    /// - Standard EFS multiplicative fixed-point updates for ρ coordinates
155    /// - Safeguarded preconditioned gradient steps for ψ coordinates:
156    ///   `Δψ = -α G⁺ g_ψ` where G is the trace Gram matrix
157    ///
158    /// Mathematically necessary because no EFS-type fixed-point iteration
159    /// exists for indefinite B_ψ (see response.md Section 2). The structural
160    /// requirement for EFS is `H^{-1/2} B_d H^{-1/2} ≽ 0` (PSD) plus fixed
161    /// nullspace — exactly what penalty-like coords satisfy and design-moving
162    /// coords do not.
163    ///
164    /// The hybrid is O(1) H⁻¹ solves per iteration (same as pure EFS),
165    /// compared to O(dim(θ)) for BFGS.
166    pub psi_dim: usize,
167    /// Whether the objective actually implements `eval_efs()` for fixed-point
168    /// plans. Structural eligibility (`psi_dim == 0` / `psi_dim > 0`)
169    /// is not sufficient by itself: if this is false, the planner must stay on
170    /// Newton/BFGS-style plans even when EFS or Hybrid-EFS would otherwise be
171    /// mathematically admissible.
172    pub fixed_point_available: bool,
173    /// Optional log-barrier configuration for structural monotonicity constraints.
174    /// When present, EFS is still eligible at plan time, but the EFS iteration
175    /// loop performs a quantitative check each step: if
176    /// `barrier_curvature_is_significant(β, ref_diag, threshold)` fires, EFS
177    /// is abandoned and the fallback ladder routes to a first-order joint
178    /// optimizer.
179    ///
180    /// Previously this was a binary `barrier_active: bool` that unconditionally
181    /// blocked EFS. The quantitative check allows EFS when constraints exist but
182    /// the barrier curvature is negligible (coefficients far from their bounds).
183    pub barrier_config: Option<BarrierConfig>,
184    /// Policy hint for derivative-free auxiliary optimizers only. Primary REML
185    /// optimization ignores this flag when an analytic Hessian exists: exact
186    /// second-order geometry must not be hidden behind a quasi-Newton policy.
187    pub prefer_gradient_only: bool,
188    /// Policy hint: even when the objective implements `eval_efs()` and the
189    /// coordinate structure is penalty-like, the planner must NOT select
190    /// EFS/HybridEfs for this problem.
191    ///
192    /// Set by the caller for problem classes where the Wood-Fasiolo structural
193    /// property (`H^{-1/2} B_k H^{-1/2} ≽ 0` plus parameter-independent
194    /// nullspace) is known not to hold — e.g. GAMLSS/location-scale families
195    /// where the joint Hessian is β-dependent and cross-block smoothers
196    /// induce non-diagonal curvature that the EFS multiplicative fixed-point
197    /// cannot resolve. Also set by the automatic fallback cascade when an
198    /// EFS/HybridEfs attempt failed to converge, so the next attempt falls
199    /// back to analytic-gradient BFGS rather than retrying EFS.
200    pub disable_fixed_point: bool,
201}
202
203impl OuterCapability {
204    pub const fn theta_layout(&self) -> OuterThetaLayout {
205        OuterThetaLayout::new(self.n_params, self.psi_dim)
206    }
207
208    pub fn validate_layout(&self, context: &str) -> Result<(), EstimationError> {
209        self.theta_layout().validate_capability(context)
210    }
211
212    /// True when all coordinates are penalty-like (no ψ coords).
213    pub const fn all_penalty_like(&self) -> bool {
214        self.psi_dim == 0
215    }
216    /// True when ψ (design-moving) coordinates are present.
217    pub const fn has_psi_coords(&self) -> bool {
218        self.psi_dim > 0
219    }
220
221    fn efs_plan_eligible(&self) -> bool {
222        self.fixed_point_available
223            && !self.disable_fixed_point
224            && self.all_penalty_like()
225            && self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
226    }
227
228    fn hybrid_efs_plan_eligible(&self) -> bool {
229        self.fixed_point_available
230            && !self.disable_fixed_point
231            && self.has_psi_coords()
232            && self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
233    }
234
235    fn declared_hessian_for_planning(&self) -> Derivative {
236        if self.hessian.is_analytic() {
237            Derivative::Analytic
238        } else {
239            Derivative::Unavailable
240        }
241    }
242}
243
244/// Which solver algorithm to use for the outer optimization.
245#[derive(Clone, Copy, Debug, PartialEq, Eq)]
246pub enum Solver {
247    /// Adaptive Regularized Cubic; fastest convergence, requires Hessian.
248    Arc,
249    /// BFGS; gradient only, builds a dense curvature approximation.
250    Bfgs,
251    /// Extended Fellner-Schall; multiplicative fixed-point iteration.
252    /// Only valid when all hyperparameter coordinates are penalty-like.
253    /// Needs no gradient or Hessian — only traces tr(H^{-1} A_k) and
254    /// Frobenius norms from the inner solution.
255    Efs,
256    /// Hybrid EFS + preconditioned gradient.
257    ///
258    /// Used when ψ (design-moving) coordinates are present alongside ρ
259    /// (penalty-like) coordinates. Combines:
260    /// - Standard EFS multiplicative fixed-point steps for ρ coords
261    /// - Safeguarded preconditioned gradient steps for ψ coords:
262    ///   `Δψ = -α G⁺ g_ψ` where `G_{de} = tr(H⁻¹ B_d H⁻¹ B_e)`
263    ///
264    /// This hybrid exists because no EFS-type fixed-point iteration can
265    /// guarantee convergence for indefinite B_ψ (proven by counterexample
266    /// in response.md Section 2). The key structural property that EFS
267    /// needs — `H^{-1/2} B_d H^{-1/2} ≽ 0` plus parameter-independent
268    /// nullspace — holds for penalty-like coords but fails for
269    /// design-moving coords where B_ψ has mixed inertia.
270    ///
271    /// The preconditioned gradient uses the same trace Gram matrix that
272    /// EFS already computes, so the cost is O(1) H⁻¹ solves per iteration
273    /// (same as pure EFS), compared to O(dim(θ)) for full BFGS.
274    HybridEfs,
275}
276
277/// How the Hessian will be obtained for the outer optimizer.
278#[derive(Clone, Copy, Debug, PartialEq, Eq)]
279pub enum HessianSource {
280    /// Exact analytic Hessian provided by the objective.
281    Analytic,
282    /// No explicit Hessian; BFGS builds a rank-2 approximation from
283    /// gradient history.
284    BfgsApprox,
285    /// No explicit Hessian or gradient needed. EFS uses traces and
286    /// Frobenius norms from the inner solution directly.
287    EfsFixedPoint,
288    /// Hybrid EFS + preconditioned gradient for ψ coordinates.
289    /// EFS traces for ρ coords, trace Gram matrix + gradient for ψ coords.
290    HybridEfsFixedPoint,
291}
292
293/// Requested derivative order for an outer objective evaluation.
294///
295/// This enum is for the shared `eval` bridge where the runner needs value-only,
296/// first-order, or second-order information depending on the active plan.
297///
298/// Single-sourced on the lower `gam-model-api` crate so the gam-models
299/// fit_orchestration drivers and the gam-solve runner share one type (#1521).
300pub use gam_model_api::OuterEvalOrder;
301
302/// The outer optimization plan. Produced by [`plan`], consumed by the runner.
303#[derive(Clone, Copy, Debug, PartialEq, Eq)]
304pub struct OuterPlan {
305    pub solver: Solver,
306    pub hessian_source: HessianSource,
307}
308
309pub(crate) const EFS_FIRST_ORDER_FALLBACK_MARKER: &str = "[outer-efs-first-order-fallback]";
310
311/// Whether outer_strategy should automatically derive a retry ladder from the
312/// primary capability, or disable retries entirely.
313#[derive(Clone, Copy, Debug, PartialEq, Eq)]
314pub enum FallbackPolicy {
315    /// Centralized retry path chosen from the declared capability.
316    Automatic,
317    /// No retries; use only the primary plan.
318    Disabled,
319}
320
321impl std::fmt::Display for OuterPlan {
322    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
323        write!(
324            f,
325            "solver={:?}, hessian_source={:?}",
326            self.solver, self.hessian_source
327        )
328    }
329}
330
331impl OuterPlan {
332    /// Stable, grep-friendly routing token for large-scale/log regression
333    /// assertions. Emits `solver=<Solver>;hessian=<Source>;matrix-free=<bool>`.
334    /// Planning alone does not prove the runtime Hessian representation;
335    /// matrix-free routing is decided after the seed evaluation returns an
336    /// operator Hessian, so the static plan token reports `false`.
337    pub fn routing_log_line(&self) -> String {
338        let matrix_free = false;
339        format!(
340            "solver={:?};hessian={:?};matrix-free={}",
341            self.solver, self.hessian_source, matrix_free
342        )
343    }
344}
345
346/// Select the outer optimization strategy from the declared capability.
347///
348/// This is a pure function with no side effects. All policy lives here.
349pub fn plan(cap: &OuterCapability) -> OuterPlan {
350    use Derivative::*;
351    use HessianSource as H;
352    use Solver as S;
353
354    match (cap.gradient, cap.declared_hessian_for_planning()) {
355        (Analytic, Analytic) => OuterPlan {
356            solver: S::Arc,
357            hessian_source: H::Analytic,
358        },
359        // EFS: all penalty-like coords, no analytic Hessian, many params.
360        // Multiplicative fixed-point needs only traces — no gradient evals.
361        // Much cheaper than BFGS for k=10-50 smoothing parameters.
362        //
363        // When a log-barrier is present (monotonicity constraints), EFS is
364        // still selected here. The EFS iteration loop in `run_outer` performs
365        // a quantitative check each step via `barrier_curvature_is_significant`
366        // and bails out early if the barrier curvature becomes non-negligible
367        // relative to the penalized Hessian diagonal.
368        (Analytic, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
369            solver: S::Efs,
370            hessian_source: H::EfsFixedPoint,
371        },
372        (Unavailable, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
373            solver: S::Efs,
374            hessian_source: H::EfsFixedPoint,
375        },
376
377        // Hybrid EFS: ψ (design-moving) coords present alongside ρ coords.
378        //
379        // When ψ coords are present, pure EFS is invalid because B_ψ can be
380        // indefinite (see response.md Section 2 for the counterexample). But
381        // falling back to full BFGS wastes the cheap EFS structure for ρ coords.
382        //
383        // The hybrid strategy uses EFS for ρ-coords and a safeguarded
384        // preconditioned gradient step for ψ-coords:
385        //   Δψ = -α G⁺ g_ψ,  G_{de} = tr(H⁻¹ B_d H⁻¹ B_e)
386        //
387        // This stays O(1) H⁻¹ solves per iteration (vs O(dim(θ)) for BFGS)
388        // and uses the same trace Gram matrix that EFS already computes.
389        (Analytic, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
390            solver: S::HybridEfs,
391            hessian_source: H::HybridEfsFixedPoint,
392        },
393        (Unavailable, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
394            solver: S::HybridEfs,
395            hessian_source: H::HybridEfsFixedPoint,
396        },
397
398        // Gradient-only problems should use a gradient-only optimizer.
399        (Analytic, Unavailable) => OuterPlan {
400            solver: S::Bfgs,
401            hessian_source: H::BfgsApprox,
402        },
403        // No analytic gradient (with or without a declared Hessian), and the
404        // EFS/HybridEFS fixed-point lane ruled out above. Every outer objective
405        // in the tree now supplies an analytic gradient, so a cost-only
406        // capability is a programming error. Emit a BFGS plan so it surfaces
407        // loudly with context: the runner rejects it because BFGS requires the
408        // analytic gradient this capability declares is absent. We deliberately
409        // do NOT invent a working primary here — a cost-only objective has no
410        // solver, by design.
411        (Unavailable, _) => OuterPlan {
412            solver: S::Bfgs,
413            hessian_source: H::BfgsApprox,
414        },
415    }
416}
417
418/// Log the outer optimization plan. Called once per fit at the start of
419/// outer optimization so the user can see what strategy was selected and why.
420pub fn log_plan(context: &str, cap: &OuterCapability, the_plan: &OuterPlan) {
421    let hess_warning = match the_plan.hessian_source {
422        HessianSource::BfgsApprox if cap.n_params > 0 => {
423            " [no Hessian: BFGS approximation]".to_string()
424        }
425        _ => String::new(),
426    };
427    let barrier_note = if cap.barrier_config.is_some() && cap.efs_plan_eligible() {
428        " [EFS with runtime barrier-curvature guard]"
429    } else {
430        ""
431    };
432    let hybrid_note = if the_plan.solver == Solver::HybridEfs {
433        " [hybrid EFS(ρ) + preconditioned-gradient(ψ)]"
434    } else {
435        ""
436    };
437    // Promoted to info: this fires once per outer optimization dispatch and
438    // tells the user immediately whether ARC, BFGS, EFS, etc. was selected
439    // and why. That information is otherwise inferred only from the per-iter
440    // log tag prefix once the loop has started.
441    log::info!(
442        "[OUTER] {context}: n_params={}, gradient={:?}, hessian={:?} -> {} [{}]{hess_warning}{barrier_note}{hybrid_note}",
443        cap.n_params,
444        cap.gradient,
445        cap.hessian,
446        the_plan,
447        the_plan.routing_log_line(),
448    );
449}
450
451pub(crate) fn requests_immediate_first_order_fallback(message: &str) -> bool {
452    message.contains(EFS_FIRST_ORDER_FALLBACK_MARKER)
453}
454
455/// Disable the EFS/HybridEfs planner path, forcing BFGS-class solvers on the
456/// next attempt. Returns `None` if fixed-point is already disabled.
457pub(crate) fn disable_fixed_point(cap: &OuterCapability) -> Option<OuterCapability> {
458    (!cap.disable_fixed_point && (cap.efs_plan_eligible() || cap.hybrid_efs_plan_eligible())).then(
459        || {
460            let mut degraded = cap.clone();
461            degraded.disable_fixed_point = true;
462            degraded
463        },
464    )
465}
466
467pub(crate) fn automatic_fallback_attempts(cap: &OuterCapability) -> Vec<OuterCapability> {
468    // Production fallback ladder is strictly analytic-gradient.
469    //
470    // The cascade is:
471    //   1. If the primary plan is EFS/HybridEFS AND an analytic gradient is
472    //      available, retry with fixed-point disabled so the analytic
473    //      derivative declaration is evaluated directly.
474    //   2. If the primary plan is Arc (declared (Analytic, Analytic)
475    //      capability), do NOT add a degraded fallback. Demoting to
476    //      BFGS+BfgsApprox in this case discards the analytic outer Hessian
477    //      ARC was using — a strictly weaker geometry — and silently masks
478    //      ARC's actual failure mode (e.g. budget exhaustion, indefinite
479    //      curvature) under a BFGS Strong-Wolfe plateau on a flat surface.
480    //      ARC retries are handled by the per-attempt budget-bump retry
481    //      ladder in `run_outer_with_strategy`; once that is exhausted, the
482    //      caller surfaces the underlying ARC failure verbatim.
483    //   3. Otherwise (e.g. (Analytic, Unavailable) without EFS eligibility,
484    //      which is the BFGS primary), there is nothing to degrade further
485    //      — the caller surfaces the RemlOptimizationFailed error so the
486    //      non-convergence is visible.
487    let mut attempts = Vec::new();
488
489    if cap.gradient == Derivative::Analytic
490        && matches!(plan(cap).solver, Solver::Efs | Solver::HybridEfs)
491        && let Some(no_fp_cap) = disable_fixed_point(cap)
492    {
493        attempts.push(no_fp_cap.clone());
494        return attempts;
495    }
496
497    // Arc primary: no lateral demotion to BFGS. The runner's ARC-budget-bump
498    // retry covers cases where ARC needed more iterations; if even that is
499    // exhausted, the caller sees the genuine analytic-Hessian non-convergence
500    // rather than a misleading BFGS-on-flat-surface plateau.
501    if matches!(plan(cap).solver, Solver::Arc) {
502        return attempts;
503    }
504
505    attempts
506}
507
508pub(crate) fn disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(
509    cap: &OuterCapability,
510    config: &OuterConfig,
511) -> bool {
512    config.fallback_policy == FallbackPolicy::Disabled
513        && cap.gradient == Derivative::Analytic
514        && matches!(plan(cap).solver, Solver::HybridEfs)
515}
516
517pub(crate) fn primary_capability_for_config(
518    mut cap: OuterCapability,
519    config: &OuterConfig,
520    context: &str,
521) -> OuterCapability {
522    if disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(&cap, config) {
523        // HybridEFS is not a standalone first-order method for ψ coordinates:
524        // when ψ backtracking proves non-descent, the bridge intentionally
525        // surfaces `EFS_FIRST_ORDER_FALLBACK_MARKER` so the runner can switch
526        // to a joint gradient solver that enforces ∇ψ V = 0. With fallback
527        // disabled and an analytic gradient available, selecting HybridEFS as
528        // the only primary attempt is internally inconsistent; BFGS is the
529        // standalone first-order primary for that capability.
530        log::info!(
531            "[OUTER] {context}: HybridEFS requires the automatic first-order \
532             escape path for ψ coordinates; fallback is disabled, so routing the \
533             primary attempt to analytic-gradient BFGS"
534        );
535        cap.disable_fixed_point = true;
536    }
537    cap
538}