gam_solve/rho_optimizer/run.rs
1use super::*;
2
3pub(crate) const OPERATOR_TRUST_RESTART_RADIUS_FLOOR: f64 = 1.0e-6;
4
5/// Configuration for the outer optimization runner.
6#[derive(Clone, Debug)]
7pub(crate) struct OuterConfig {
8 pub(crate) tolerance: f64,
9 /// Optional override for the *relative-cost-decrease* convergence stop,
10 /// decoupled from `tolerance`. `outer_gradient_tolerance` normally derives
11 /// BOTH the absolute projected-gradient floor (`max(tolerance, scale·1e-9)`)
12 /// AND the relative-cost stop (`rel_cost = tolerance`) from the single
13 /// `tolerance`. That conflation forces a caller who needs a *tight absolute
14 /// floor* (to resolve λ to the genuine REML optimum at large `n`, where the
15 /// floor is `scale·1e-9`) to also accept a *tight rel-cost stop*, which on a
16 /// flat REML ridge never trips and grinds the optimizer to `max_iter` —
17 /// dozens of surplus O(D·p³) Laplace-derivative outer iterations (the #1082
18 /// multinomial smooth-by-factor wall-clock blow-up). When `Some(r)`, the
19 /// rel-cost stop uses `r` while the absolute floor keeps using `tolerance`
20 /// via `objective_scale`, so accuracy (absolute floor) and perf (loose
21 /// rel-cost) are selected independently. `None` preserves the legacy coupling
22 /// (`rel_cost = tolerance`) for every existing path byte-for-byte.
23 pub(crate) rel_cost_tolerance: Option<f64>,
24 pub(crate) max_iter: usize,
25 pub(crate) bounds: Option<(Array1<f64>, Array1<f64>)>,
26 pub(crate) seed_config: gam_problem::SeedConfig,
27 pub(crate) rho_bound: f64,
28 pub(crate) heuristic_lambdas: Option<Vec<f64>>,
29 pub(crate) initial_rho: Option<Array1<f64>>,
30 pub(crate) fallback_policy: FallbackPolicy,
31 pub(crate) screening_cap: Option<Arc<AtomicUsize>>,
32 pub(crate) screen_initial_rho: bool,
33 /// Outer-aware inner-PIRLS iteration cap (sibling of `screening_cap`).
34 /// When set, the BFGS bridge drives this atomic on every accepted
35 /// gradient eval to coarsen the inner Newton solve at early outer iters
36 /// (when ρ is far from converged) and lift it back to full as
37 /// convergence approaches. Distinct from `screening_cap` in that it
38 /// does NOT suppress cache writes / warm-start updates / KKT
39 /// enforcement; it is purely a budget. See
40 /// `RemlObjectiveState::outer_inner_cap` for dual-cap semantics.
41 pub(crate) outer_inner_cap: Option<InnerProgressFeedback>,
42 pub(crate) operator_initial_trust_radius: Option<f64>,
43 pub(crate) arc_initial_regularization: Option<f64>,
44 /// Optional scale factor for the objective's natural magnitude.
45 /// Used to widen the absolute gradient-norm floor on objectives whose
46 /// gradient lives on a non-unit scale (e.g. Gaussian-identity REML at
47 /// large `n`, whose ∂/∂logλ inherits the O(n) likelihood constant).
48 /// `None` falls back to the bare `tolerance` floor.
49 pub(crate) objective_scale: Option<f64>,
50 /// BFGS line-search infinity-norm cap applied to the leading `rho_dim`
51 /// outer parameters (log-λ axes). Documented natural step for
52 /// `log(lambda)` is ≈ 5 (`e^5 ≈ 148`-fold smoothing-parameter change
53 /// per accepted outer iter — matches typical quasi-Newton direction
54 /// magnitude on flat REML surfaces). Setting this `None` disables the
55 /// rho-axis cap entirely.
56 pub(crate) bfgs_step_cap: Option<f64>,
57 /// BFGS line-search infinity-norm cap applied to the trailing `psi_dim`
58 /// outer parameters (kappa / aniso-log-scale axes). Required because
59 /// the kernel scale axes need much tighter control (`e^1 ≈ 2.7`-fold
60 /// per iter is plenty) — using the rho-axis cap here lets the optimizer
61 /// jump kappa by orders of magnitude per step and oscillate. Setting
62 /// this `None` disables the psi-axis cap.
63 pub(crate) bfgs_step_cap_psi: Option<f64>,
64 /// Optional persistent-cache session. When `Some`, every finite objective
65 /// evaluation is written through to disk (rate-limited, atomic-rename)
66 /// and the best on-disk rho is prepended as a seed at the start of each
67 /// plan attempt. Defaulted off so test-only paths skip filesystem I/O.
68 pub(crate) cache_session: Option<Arc<CacheSession>>,
69 /// Optional mirror cache sessions. Checkpoints and successful finalize
70 /// writes are also written to each of these sessions (different keys,
71 /// shared store). Used for hierarchical broadcast: the current best ρ is
72 /// written to the exact-key (primary) AND the data-independent
73 /// seed-prefix key so the next fit with related structure can warm-start
74 /// from this one, even after an interrupted run.
75 pub(crate) cache_mirror_sessions: Vec<Arc<CacheSession>>,
76 pub(crate) rho_uncertainty_problem_size: crate::rho_uncertainty::RhoUncertaintyProblemSize,
77 /// Set by the persistent-cache resume path (`run`) when the outer seed
78 /// originates from a warm-start cache *hit* — i.e. `config.initial_rho`
79 /// (and, since 0.1.204, the inner β) was populated from a prior fit's
80 /// persisted near-optimal iterate (`CacheSeedDecision::Seed`). On a hit
81 /// the continuation pre-warm — which exists to anneal a COLD seed toward
82 /// the optimum — is redundant work (the seed is already near-optimal), so
83 /// the pre-warm step budget is dropped to zero and the run proceeds
84 /// straight to the BFGS/Newton certificate from the cached iterate. The
85 /// converged optimum is unchanged: warm start only sets the STARTING
86 /// point. Defaulted `false`, so every cold-start / no-cache path keeps its
87 /// existing continuation pre-warm budget byte-for-byte.
88 pub(crate) warm_start_cache_hit: bool,
89 /// Converged exact outer Hessian `H(θ̂)` transferred from a prior
90 /// structurally-matching fit via the persistent cache (a warm-start *hit*),
91 /// in the full θ layout. When present and SPD, the BFGS host path seeds its
92 /// iter-0 metric with `InitialMetric::DenseInverseHessian(H⁻¹)` so the first
93 /// outer step is quasi-Newton instead of unscaled steepest descent — the
94 /// dominant LOSO line-search-bracketing cost (each bracketing probe is a
95 /// full inner joint-Newton re-solve). Strictly stronger than the scalar
96 /// `1/‖g₀‖` metric: it carries the full anisotropic curvature, which across
97 /// folds (one held-out point) is nearly identical to this fold's. Never
98 /// changes the converged optimum — BFGS reaches `∇V=0` under any SPD initial
99 /// metric. `None` on every cold-start / no-cache / pre-Hessian-schema path,
100 /// which falls back to the scalar warm metric byte-for-byte.
101 pub(crate) warm_start_outer_hessian: Option<Array2<f64>>,
102 /// Per-ρ-coordinate structural keys, in the objective's NATIVE (formula)
103 /// coordinate order, used to make the outer smoothing-parameter search
104 /// invariant to the order the user wrote the smooth terms / tensor margins
105 /// (#1538/#1539).
106 ///
107 /// When `Some` and the keys induce a non-identity canonical permutation,
108 /// [`run_outer`] reorders the coordinate layout the optimizer sees into a
109 /// stable canonical order (derived purely from the keys, never from the
110 /// native position) before seeding/optimizing, and inverts the permutation
111 /// on the returned ρ / gradient / Hessian so the caller still receives the
112 /// native layout. Seeding, multistart and tie-breaking then all operate on
113 /// the identical canonical layout for every term order, so both orders
114 /// reach the same λ̂ and the same fitted surface. `None` (or an identity
115 /// permutation) leaves the legacy native-order path byte-for-byte unchanged.
116 pub(crate) rho_canonical_keys: Option<Vec<u64>>,
117}
118
119impl Default for OuterConfig {
120 fn default() -> Self {
121 Self {
122 tolerance: 1e-5,
123 rel_cost_tolerance: None,
124 max_iter: 200,
125 bounds: None,
126 seed_config: gam_problem::SeedConfig::default(),
127 rho_bound: 30.0,
128 heuristic_lambdas: None,
129 initial_rho: None,
130 fallback_policy: FallbackPolicy::Automatic,
131 screening_cap: None,
132 screen_initial_rho: false,
133 outer_inner_cap: None,
134 operator_initial_trust_radius: None,
135 arc_initial_regularization: None,
136 objective_scale: None,
137 bfgs_step_cap: None,
138 bfgs_step_cap_psi: None,
139 cache_session: None,
140 cache_mirror_sessions: Vec::new(),
141 rho_uncertainty_problem_size:
142 crate::rho_uncertainty::RhoUncertaintyProblemSize::default(),
143 warm_start_cache_hit: false,
144 warm_start_outer_hessian: None,
145 rho_canonical_keys: None,
146 }
147 }
148}
149
150// ─── OuterProblem builder ─────────────────────────────────────────────
151//
152// Declarative builder for outer optimization problems. Derives
153// OuterCapability flags from high-level inputs (gradient/hessian
154// availability, psi dimension, EFS eligibility) so call sites never
155// hand-copy capability flags.
156
157/// Declarative outer-problem builder. Produces both the
158/// [`OuterCapability`] (what the objective can provide) and the
159/// [`OuterConfig`] (how the runner should behave) from a small set
160/// of high-level declarations.
161pub struct OuterProblem {
162 n_params: usize,
163 gradient: Derivative,
164 hessian: DeclaredHessianForm,
165 prefer_gradient_only: bool,
166 disable_fixed_point: bool,
167 psi_dim: usize,
168 barrier_config: Option<BarrierConfig>,
169 tolerance: f64,
170 rel_cost_tolerance: Option<f64>,
171 max_iter: usize,
172 bounds: Option<(Array1<f64>, Array1<f64>)>,
173 rho_bound: f64,
174 seed_config: gam_problem::SeedConfig,
175 heuristic_lambdas: Option<Vec<f64>>,
176 initial_rho: Option<Array1<f64>>,
177 fallback_policy: FallbackPolicy,
178 screening_cap: Option<Arc<AtomicUsize>>,
179 screen_initial_rho: bool,
180 outer_inner_cap: Option<InnerProgressFeedback>,
181 operator_initial_trust_radius: Option<f64>,
182 arc_initial_regularization: Option<f64>,
183 objective_scale: Option<f64>,
184 bfgs_step_cap: Option<f64>,
185 bfgs_step_cap_psi: Option<f64>,
186 cache_session: Option<Arc<CacheSession>>,
187 cache_mirror_sessions: Vec<Arc<CacheSession>>,
188 rho_uncertainty_problem_size: crate::rho_uncertainty::RhoUncertaintyProblemSize,
189 continuation_prewarm: bool,
190 rho_canonical_keys: Option<Vec<u64>>,
191}
192
193impl OuterProblem {
194 pub fn new(n_params: usize) -> Self {
195 Self {
196 n_params,
197 gradient: Derivative::Unavailable,
198 hessian: DeclaredHessianForm::Unavailable,
199 prefer_gradient_only: false,
200 disable_fixed_point: false,
201 psi_dim: 0,
202 barrier_config: None,
203 tolerance: 1e-5,
204 rel_cost_tolerance: None,
205 max_iter: 200,
206 bounds: None,
207 rho_bound: 30.0,
208 seed_config: gam_problem::SeedConfig::default(),
209 heuristic_lambdas: None,
210 initial_rho: None,
211 fallback_policy: FallbackPolicy::Automatic,
212 screening_cap: None,
213 screen_initial_rho: false,
214 outer_inner_cap: None,
215 operator_initial_trust_radius: None,
216 arc_initial_regularization: None,
217 objective_scale: None,
218 bfgs_step_cap: None,
219 bfgs_step_cap_psi: None,
220 cache_session: None,
221 cache_mirror_sessions: Vec::new(),
222 rho_uncertainty_problem_size:
223 crate::rho_uncertainty::RhoUncertaintyProblemSize::default(),
224 continuation_prewarm: true,
225 rho_canonical_keys: None,
226 }
227 }
228
229 /// Supply per-ρ-coordinate structural keys (native/formula order) so the
230 /// outer search is canonicalized to be invariant to the order the smooth
231 /// terms / tensor margins were written (#1538/#1539). See
232 /// [`OuterConfig::rho_canonical_keys`].
233 pub fn with_rho_canonical_keys(mut self, keys: Option<Vec<u64>>) -> Self {
234 self.rho_canonical_keys = keys;
235 self
236 }
237
238 pub fn with_gradient(mut self, d: Derivative) -> Self {
239 self.gradient = d;
240 self
241 }
242 pub fn with_hessian(mut self, form: DeclaredHessianForm) -> Self {
243 self.hessian = form;
244 self
245 }
246 pub fn with_prefer_gradient_only(mut self, prefer_gradient_only: bool) -> Self {
247 self.prefer_gradient_only = prefer_gradient_only;
248 self
249 }
250 /// Forbid the planner from selecting EFS/HybridEfs, even when the
251 /// objective implements `eval_efs()` and the coordinate structure would
252 /// otherwise make pure/hybrid EFS eligible.
253 ///
254 /// Callers use this for families where the Wood-Fasiolo structural
255 /// property is known not to hold (e.g. GAMLSS/location-scale with
256 /// β-dependent joint Hessian), so EFS would stagnate and burn budget
257 /// before the automatic cascade falls back to gradient-based BFGS.
258 pub fn with_disable_fixed_point(mut self, disable: bool) -> Self {
259 self.disable_fixed_point = disable;
260 self
261 }
262 // MEASURE-JET ψ REGISTRATION: the engine below is already complete for a
263 // 3-coordinate measure-jet ψ group (s, α, ln τ) — `psi_dim` is generic,
264 // `with_bounds` carries the s ∈ (0, 2) box (the same convention matern κ
265 // uses for its log-κ window; no logistic reparameterization exists or is
266 // needed in-house), `with_bfgs_step_cap_psi` caps per-iteration ψ moves,
267 // and `DirectionalHyperParam::new_compact` (solver/reml/mod.rs) carries
268 // penalty-only first/second/cross jets with `is_penalty_like`
269 // auto-derived from the identically-zero design drift (∂X/∂ψ ≡ 0).
270 // Every remaining registration arm is formula-layer dispatch in
271 // src/terms/smooth.rs (eligibility in
272 // `spatial_term_supports_hyper_optimization`, dims in
273 // `spatial_dims_per_term`, seed/bounds/write-back on
274 // `SpatialLogKappaCoords`, the per-trial rebuild in
275 // `apply_log_kappa_to_term`, and the derivative bundle in
276 // `try_build_spatial_term_log_kappa_derivative`, which currently returns
277 // `Ok(None)` for `SmoothBasisSpec::MeasureJet`) plus the
278 // `build_measure_jet_basis_psi_derivatives` producer in
279 // src/terms/basis/measure_jet_smooth.rs; both are owned by the
280 // measure-jet terms actor. Registration stays gated on those arms — do
281 // NOT add measure-jet-specific branches to this engine.
282 pub fn with_psi_dim(mut self, dim: usize) -> Self {
283 self.psi_dim = dim;
284 self
285 }
286 pub fn with_barrier(mut self, cfg: Option<BarrierConfig>) -> Self {
287 self.barrier_config = cfg;
288 self
289 }
290 pub fn with_tolerance(mut self, tol: f64) -> Self {
291 self.tolerance = tol;
292 self
293 }
294 pub fn with_max_iter(mut self, n: usize) -> Self {
295 self.max_iter = n;
296 self
297 }
298 pub fn with_bounds(mut self, lo: Array1<f64>, hi: Array1<f64>) -> Self {
299 self.bounds = Some((lo, hi));
300 self
301 }
302 pub fn with_rho_bound(mut self, b: f64) -> Self {
303 self.rho_bound = b;
304 self
305 }
306 pub fn with_seed_config(mut self, sc: gam_problem::SeedConfig) -> Self {
307 self.seed_config = sc;
308 self
309 }
310 pub fn with_heuristic_lambdas(mut self, h: Vec<f64>) -> Self {
311 self.heuristic_lambdas = Some(h);
312 self
313 }
314 pub fn with_initial_rho(mut self, rho: Array1<f64>) -> Self {
315 self.initial_rho = Some(rho);
316 self
317 }
318 /// Toggle the generic rho-continuation seed pre-warm. This does not affect
319 /// objectives that require an explicit continuation path; it only controls
320 /// the cheap-by-default pre-pass gated by `allow_continuation_prewarm()`.
321 pub fn with_continuation_prewarm(mut self, enabled: bool) -> Self {
322 self.continuation_prewarm = enabled;
323 self
324 }
325 pub fn with_screening_cap(mut self, screening_cap: Arc<AtomicUsize>) -> Self {
326 self.screening_cap = Some(screening_cap);
327 self
328 }
329 /// Allow seed screening to rank the explicit initial rho against generated
330 /// candidates even when the effective seed budget is one. The default keeps
331 /// a user-provided initial point authoritative and avoids a separate
332 /// screening pass.
333 pub fn with_screen_initial_rho(mut self, screen_initial_rho: bool) -> Self {
334 self.screen_initial_rho = screen_initial_rho;
335 self
336 }
337 /// Wire the bidirectional inner-PIRLS feedback channel.
338 ///
339 /// The outer bridge writes a coarsened iteration cap into
340 /// `feedback.cap` on every accepted gradient/Hessian eval; the inner
341 /// solver writes back into `feedback.last_iters` /
342 /// `feedback.last_converged` after each non-screening solve so the
343 /// next outer iter's schedule can adapt to the inner solver's
344 /// actual convergence behavior. Typical caller passes
345 /// `InnerProgressFeedback {
346 /// cap: Arc::clone(&reml_state.outer_inner_cap),
347 /// last_iters: Arc::clone(&reml_state.last_inner_iters),
348 /// last_converged: Arc::clone(&reml_state.last_inner_converged),
349 /// }` so the inner and outer observe the same atomics.
350 pub fn with_outer_inner_cap(mut self, feedback: InnerProgressFeedback) -> Self {
351 self.outer_inner_cap = Some(feedback);
352 self
353 }
354 pub fn with_operator_initial_trust_radius(mut self, radius: Option<f64>) -> Self {
355 self.operator_initial_trust_radius = sanitized_operator_trust_restart_radius(radius);
356 self
357 }
358
359 /// Override the ARC initial cubic-regularization parameter sigma
360 /// (default in `opt`: 1.0). Smaller sigma → less cubic penalty on the
361 /// first step → larger first move on benign objectives. The matrix-
362 /// free Newton-TR analog is `with_operator_initial_trust_radius`.
363 ///
364 /// Used by Gaussian-identity REML at large-scale n: the objective is
365 /// quadratic-like in log-λ near the optimum (sigma is the right
366 /// scale), and log-λ moves of 2–4 units in the early iters
367 /// otherwise burn 4–8 iters of trust-region expansion before the
368 /// model trusts the analytic Hessian.
369 pub fn with_arc_initial_regularization(mut self, sigma: Option<f64>) -> Self {
370 self.arc_initial_regularization = sigma.filter(|v| v.is_finite() && *v > 0.0);
371 self
372 }
373
374 /// Set the objective's natural magnitude scale, used to derive an
375 /// `n`-aware absolute gradient-norm floor. When set to `Some(s)`,
376 /// the runner uses `abs_floor = max(tol, s * 1e-9)` for the
377 /// projected-gradient convergence check.
378 ///
379 /// Rationale: a fixed `abs = tol` (e.g. 1e-6) is appropriate when the
380 /// objective and its gradient live on a unit scale, but Gaussian-
381 /// identity REML carries an O(n) likelihood constant that flows into
382 /// ∂/∂logλ. At large-scale n the floor becomes binding even when the
383 /// relative-from-seed component (`rel_initial_grad * ‖g0‖`) declared
384 /// convergence iters earlier — chasing sub-ULP changes in log-λ at
385 /// the cost of repeated k²·n·p² analytic-Hessian assemblies.
386 pub fn with_objective_scale(mut self, scale: Option<f64>) -> Self {
387 self.objective_scale = scale.filter(|v| v.is_finite() && *v > 0.0);
388 self
389 }
390
391 /// Decouple the *relative-cost-decrease* convergence stop from the
392 /// absolute projected-gradient floor. By default both are derived from the
393 /// single `with_tolerance` value (`abs = max(tol, scale·1e-9)`,
394 /// `rel_cost = tol`). Supplying `Some(r)` here makes the rel-cost stop use
395 /// `r` while the absolute floor keeps using `tolerance` (so a caller can
396 /// keep a tight absolute floor for accuracy at large `n` AND a loose
397 /// rel-cost stop for perf on a flat REML ridge — see #1082). `None` keeps
398 /// the legacy coupling.
399 pub fn with_rel_cost_tolerance(mut self, rel_cost: Option<f64>) -> Self {
400 self.rel_cost_tolerance = rel_cost.filter(|v| v.is_finite() && *v > 0.0);
401 self
402 }
403
404 /// Cap the infinity-norm displacement of BFGS cost-only line-search probes
405 /// on the **rho axes** (the first `n_params - psi_dim` outer parameters,
406 /// = log-λ). Also scales the initial inverse metric so the first trial
407 /// direction respects the same local budget coordinate-wise. Documented
408 /// natural step on log-λ is ≈ 5; tighter values throttle BFGS and starve
409 /// convergence on flat REML valleys.
410 pub fn with_bfgs_step_cap(mut self, cap: Option<f64>) -> Self {
411 self.bfgs_step_cap = cap.filter(|v| v.is_finite() && *v > 0.0);
412 self
413 }
414
415 /// Cap the infinity-norm displacement of BFGS cost-only line-search probes
416 /// on the **psi axes** (the trailing `psi_dim` outer parameters, = kappa
417 /// or anisotropic log-scales). Mirrors [`Self::with_bfgs_step_cap`] but
418 /// scoped to kernel-scale parameters whose natural step is much smaller
419 /// than log-λ (≈ ln 2 per iter keeps kappa from oscillating). Without
420 /// this split, a uniform rho-scale cap lets psi explode while a uniform
421 /// psi-scale cap throttles rho — both fail the survival-marginal-slope
422 /// path at large scale, where rho needs |d|≈5 while psi wants |d|≤1.
423 pub fn with_bfgs_step_cap_psi(mut self, cap: Option<f64>) -> Self {
424 self.bfgs_step_cap_psi = cap.filter(|v| v.is_finite() && *v > 0.0);
425 self
426 }
427
428 pub fn with_cache_session(mut self, session: Arc<CacheSession>) -> Self {
429 self.cache_session = Some(session);
430 self
431 }
432
433 /// Attach mirror cache sessions that receive a broadcast copy of
434 /// the final-result finalize write. See
435 /// [`OuterConfig::cache_mirror_sessions`].
436 pub fn with_cache_mirror_sessions(mut self, sessions: Vec<Arc<CacheSession>>) -> Self {
437 self.cache_mirror_sessions = sessions;
438 self
439 }
440
441 pub fn with_problem_size(mut self, n_obs: usize, p_coefficients: usize) -> Self {
442 self.rho_uncertainty_problem_size = crate::rho_uncertainty::RhoUncertaintyProblemSize {
443 n_obs: Some(n_obs),
444 p_coefficients: Some(p_coefficients),
445 };
446 self
447 }
448
449 /// Override the fallback policy. Default is [`FallbackPolicy::Automatic`].
450 ///
451 /// Set [`FallbackPolicy::Disabled`] when the caller requires the primary
452 /// plan to stand on its own. Exact-Hessian objectives use this to ensure
453 /// failures surface on the analytic geometry instead of being reinterpreted
454 /// by a different optimizer class.
455 pub fn with_fallback_policy(mut self, policy: FallbackPolicy) -> Self {
456 self.fallback_policy = policy;
457 self
458 }
459
460 /// Derive the capability flags from the builder state.
461 /// `fixed_point_available` is set to `false` here; `build_objective`
462 /// overrides it based on whether an EFS closure is actually provided.
463 fn capability(&self) -> OuterCapability {
464 OuterCapability {
465 gradient: self.gradient,
466 hessian: self.hessian,
467 prefer_gradient_only: self.prefer_gradient_only,
468 disable_fixed_point: self.disable_fixed_point,
469 n_params: self.n_params,
470 psi_dim: self.psi_dim,
471 fixed_point_available: false,
472 barrier_config: self.barrier_config.clone(),
473 }
474 }
475
476 /// Derive the runner configuration from the builder state.
477 pub(crate) fn config(&self) -> OuterConfig {
478 OuterConfig {
479 tolerance: self.tolerance,
480 rel_cost_tolerance: self.rel_cost_tolerance,
481 max_iter: self.max_iter,
482 bounds: self.bounds.clone(),
483 seed_config: self.seed_config,
484 rho_bound: self.rho_bound,
485 heuristic_lambdas: self.heuristic_lambdas.clone(),
486 initial_rho: self.initial_rho.clone(),
487 fallback_policy: self.fallback_policy,
488 screening_cap: self.screening_cap.clone(),
489 screen_initial_rho: self.screen_initial_rho,
490 outer_inner_cap: self.outer_inner_cap.clone(),
491 operator_initial_trust_radius: self.operator_initial_trust_radius,
492 arc_initial_regularization: self.arc_initial_regularization,
493 objective_scale: self.objective_scale,
494 bfgs_step_cap: self.bfgs_step_cap,
495 bfgs_step_cap_psi: self.bfgs_step_cap_psi,
496 cache_session: self.cache_session.clone(),
497 cache_mirror_sessions: self.cache_mirror_sessions.clone(),
498 rho_uncertainty_problem_size: self.rho_uncertainty_problem_size,
499 // Cold by construction. The persistent-cache resume path in `run`
500 // flips this to `true` only after a warm-start cache *hit* installs
501 // a near-optimal seed; every other entry point keeps the cold-start
502 // continuation pre-warm budget byte-for-byte.
503 warm_start_cache_hit: false,
504 // Populated only by the persistent-cache resume path in `run` after
505 // a warm-start hit decodes a converged outer Hessian; cold by
506 // construction here, like `warm_start_cache_hit`.
507 warm_start_outer_hessian: None,
508 rho_canonical_keys: self.rho_canonical_keys.clone(),
509 }
510 }
511
512 /// Construct a [`ClosureObjective`] with capability flags derived from the
513 /// builder state **and** the closures actually provided.
514 ///
515 /// `fixed_point_available` is set to `true` when `efs_fn` is `Some`,
516 /// regardless of whether `.with_efs()` was called. This is the canonical
517 /// way to create production objectives — it eliminates the drift risk of
518 /// manually entering capability flags.
519 pub fn build_objective<S, Fc, Fe, Fr, Fefs>(
520 &self,
521 state: S,
522 cost_fn: Fc,
523 eval_fn: Fe,
524 reset_fn: Option<Fr>,
525 efs_fn: Option<Fefs>,
526 ) -> ClosureObjective<S, Fc, Fe, Fr, Fefs>
527 where
528 Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
529 Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
530 Fr: FnMut(&mut S),
531 Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
532 {
533 let mut cap = self.capability();
534 // Derive fixed_point_available from whether the caller actually
535 // provided an EFS hook, rather than relying on manual flags.
536 cap.fixed_point_available = efs_fn.is_some();
537 ClosureObjective {
538 state,
539 cap,
540 cost_fn,
541 eval_fn,
542 eval_order_fn: None,
543 reset_fn,
544 efs_fn,
545 screening_proxy_fn: None::<fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>>,
546 seed_fn: None::<fn(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>>,
547 continuation_prewarm: self.continuation_prewarm,
548 }
549 }
550
551 /// Construct a [`ClosureObjective`] with an order-aware evaluation hook.
552 ///
553 /// This lets the runner request first-order vs second-order work based on
554 /// the active outer plan while preserving the legacy eager `eval_fn`.
555 pub fn build_objective_with_eval_order<S, Fc, Fe, Feo, Fr, Fefs>(
556 &self,
557 state: S,
558 cost_fn: Fc,
559 eval_fn: Fe,
560 eval_order_fn: Feo,
561 reset_fn: Option<Fr>,
562 efs_fn: Option<Fefs>,
563 ) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo>
564 where
565 Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
566 Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
567 Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
568 Fr: FnMut(&mut S),
569 Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
570 {
571 let mut cap = self.capability();
572 cap.fixed_point_available = efs_fn.is_some();
573 ClosureObjective {
574 state,
575 cap,
576 cost_fn,
577 eval_fn,
578 eval_order_fn: Some(eval_order_fn),
579 reset_fn,
580 efs_fn,
581 screening_proxy_fn: None::<fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>>,
582 seed_fn: None::<fn(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>>,
583 continuation_prewarm: self.continuation_prewarm,
584 }
585 }
586
587 /// Construct a [`ClosureObjective`] with both an order-aware evaluation
588 /// hook and a custom seed-screening ranking proxy. The proxy fires only
589 /// when the cascade in `rank_seeds_with_screening` calls it; outside
590 /// screening the regular cost path is unaffected.
591 pub fn build_objective_with_screening_proxy<S, Fc, Fe, Feo, Fr, Fefs, Fsp>(
592 &self,
593 state: S,
594 cost_fn: Fc,
595 eval_fn: Fe,
596 eval_order_fn: Feo,
597 reset_fn: Option<Fr>,
598 efs_fn: Option<Fefs>,
599 screening_proxy_fn: Fsp,
600 ) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp>
601 where
602 Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
603 Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
604 Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
605 Fr: FnMut(&mut S),
606 Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
607 Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
608 {
609 let mut cap = self.capability();
610 cap.fixed_point_available = efs_fn.is_some();
611 ClosureObjective {
612 state,
613 cap,
614 cost_fn,
615 eval_fn,
616 eval_order_fn: Some(eval_order_fn),
617 reset_fn,
618 efs_fn,
619 screening_proxy_fn: Some(screening_proxy_fn),
620 seed_fn: None::<fn(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>>,
621 continuation_prewarm: self.continuation_prewarm,
622 }
623 }
624
625 /// Run the outer optimization with a given objective.
626 pub fn run(
627 &self,
628 obj: &mut dyn OuterObjective,
629 context: &str,
630 ) -> Result<OuterResult, EstimationError> {
631 let mut config = self.config();
632 let Some(session) = config.cache_session.clone() else {
633 return run_outer(obj, &config, context);
634 };
635 let key_hex = session.key().to_hex();
636 let short_key = &key_hex[..8.min(key_hex.len())];
637 let mut had_hit = false;
638 let mut cached_inner_beta: Option<Array1<f64>> = None;
639 if let Some(loaded) = session.try_load_with_source() {
640 match classify_cache_entry_for_outer(&loaded, self.n_params) {
641 CacheSeedDecision::ExactFinal {
642 rho,
643 beta: _beta_final,
644 final_value,
645 iterations,
646 prior_obj_display,
647 } => {
648 let cap = primary_capability_for_config(obj.capability(), &config, context);
649 let plan_used = plan(&cap);
650 log::info!(
651 "[CACHE] final-hit key={}.. context={} rho_dim={} prior_obj={:.6e} iter={} action=skip-outer-validation",
652 short_key,
653 context,
654 rho.len(),
655 prior_obj_display,
656 iterations,
657 );
658 let mut result =
659 OuterResult::new(rho, final_value, iterations, true, plan_used);
660 result.rho_uncertainty_diagnostic = Some(compute_rho_uncertainty_diagnostic(
661 obj,
662 &config,
663 context,
664 &mut result,
665 ));
666 return Ok(result);
667 }
668 CacheSeedDecision::Seed {
669 rho,
670 beta,
671 hessian,
672 prior_obj_display,
673 iteration,
674 } => {
675 let beta_len = beta.len();
676 let beta_arr = if beta.is_empty() {
677 None
678 } else {
679 Some(Array1::from_vec(beta))
680 };
681 // Adopt the transferred converged outer Hessian only when it
682 // matches this fit's full-θ dimension; a dimension drift
683 // (structural change the cache key did not capture) falls
684 // back to the scalar warm metric in run_plan.
685 config.warm_start_outer_hessian = hessian.and_then(|(dim, flat)| {
686 if dim == self.n_params && flat.len() == dim * dim {
687 Array2::from_shape_vec((dim, dim), flat).ok()
688 } else {
689 None
690 }
691 });
692 if config
693 .initial_rho
694 .as_ref()
695 .is_none_or(|initial| initial != rho)
696 {
697 log::info!(
698 "[CACHE] hit key={}.. context={} rho_dim={} beta_dim={} prior_obj={:.6e} iter={}",
699 short_key,
700 context,
701 rho.len(),
702 beta_len,
703 prior_obj_display,
704 iteration,
705 );
706 config.initial_rho = Some(rho);
707 config.screen_initial_rho = false;
708 had_hit = true;
709 } else {
710 log::info!(
711 "[CACHE] hit key={}.. context={} rho_dim={} beta_dim={} already-aligned prior_obj={:.6e}",
712 short_key,
713 context,
714 rho.len(),
715 beta_len,
716 prior_obj_display,
717 );
718 had_hit = true;
719 }
720 cached_inner_beta = beta_arr;
721 }
722 CacheSeedDecision::Discard {
723 reason: "payload-shape-mismatch",
724 ..
725 } => {
726 log::info!(
727 "[CACHE] skip key={}.. context={} reason=payload-shape-mismatch n_params={}",
728 short_key,
729 context,
730 self.n_params,
731 );
732 }
733 CacheSeedDecision::Discard {
734 reason,
735 prior_obj_display,
736 all_rho_finite,
737 } => {
738 log::info!(
739 "[CACHE] skip key={}.. context={} reason={} prior_obj={:.6e} all_rho_finite={}",
740 short_key,
741 context,
742 reason,
743 prior_obj_display,
744 all_rho_finite.unwrap_or(false),
745 );
746 }
747 }
748 } else {
749 log::info!(
750 "[CACHE] miss key={}.. context={} reason=fresh-fingerprint n_params={}",
751 short_key,
752 context,
753 self.n_params,
754 );
755 }
756 // Propagate the warm-start cache-hit signal into the config the runner
757 // sees. On a hit the installed seed (ρ, and since 0.1.204 the inner β)
758 // is already near-optimal, so the continuation pre-warm — which exists
759 // purely to anneal a COLD seed — is redundant and is skipped downstream
760 // (`run_plan::continuation_prewarm_step_budget`). The outer BFGS/Newton
761 // still runs to its REML/KKT certificate, so the optimum is identical.
762 config.warm_start_cache_hit = had_hit;
763 let mut checkpointing = CheckpointingObjective::new(
764 obj,
765 Arc::clone(&session),
766 config.cache_mirror_sessions.clone(),
767 );
768 // Inject the cached inner β (when present) so the family's PIRLS
769 // opens at the prior converged iterate. Families that don't expose
770 // a β slot inherit the trait's no-op default and silently ignore
771 // the hint — that's a ρ-only resume, identical to the pre-β-cache
772 // behavior, but never a regression. Families that DO expose β
773 // (PIRLS-based GAMs, custom-family marginal slope, …) override
774 // `seed_inner_state` to install β before the first eval.
775 if let Some(beta) = cached_inner_beta.as_ref() {
776 match checkpointing.seed_inner_state(beta) {
777 Ok(SeedOutcome::Installed) => log::info!(
778 "[CACHE] beta-warm key={}.. context={} beta_dim={} action=installed",
779 short_key,
780 context,
781 beta.len(),
782 ),
783 Ok(SeedOutcome::NoSlot) => log::warn!(
784 "[CACHE] beta-warm key={}.. context={} beta_dim={} action=skip \
785 reason=objective_has_no_inner_beta_slot",
786 short_key,
787 context,
788 beta.len(),
789 ),
790 Ok(SeedOutcome::Incompatible) => log::info!(
791 // Not a warning: a row-relaxed cross-fit prefix seed
792 // (`cache_seed_key`) legitimately carries a β whose length
793 // reflects the PARENT fold's realized basis rank, which
794 // differs from this fold's per-block widths. The ρ seed is
795 // kept (already installed above); cross-length β transfer is
796 // the gauge-projected FitArtifact channel's job. This is a
797 // clean ρ-only resume, NOT a regression to full cold start.
798 "[CACHE] beta-warm key={}.. context={} beta_dim={} action=rho-only \
799 reason=seed_beta_length_incompatible_with_inner_blocks",
800 short_key,
801 context,
802 beta.len(),
803 ),
804 Err(err) => log::warn!(
805 "[CACHE] beta-warm key={}.. context={} beta_dim={} action=skip err={}",
806 short_key,
807 context,
808 beta.len(),
809 err,
810 ),
811 }
812 }
813 let result = run_outer(&mut checkpointing, &config, context);
814 // Pull the most-recent inner β surfaced by the inner solver so the
815 // finalize write encodes the (ρ, β) pair the BFGS optimum was
816 // actually fitted at, not a ρ-only seed that resumes at cold β.
817 let final_beta = checkpointing.last_inner_beta();
818 if let Ok(result) = result.as_ref()
819 && result.final_value.is_finite()
820 && result.converged
821 && let Some(bytes) = encode_iterate(
822 &result.rho,
823 final_beta.as_ref(),
824 result.final_hessian.as_ref(),
825 result.final_value,
826 result.iterations as u64,
827 )
828 {
829 let saved = session.finalize(
830 &bytes,
831 Some(result.final_value),
832 Some(result.iterations as u64),
833 );
834 if saved {
835 log::info!(
836 "[CACHE] save key={}.. context={} final_obj={:.6e} iter={} resumed={}",
837 short_key,
838 context,
839 result.final_value,
840 result.iterations,
841 had_hit,
842 );
843 }
844 // Broadcast finalize to mirror keys. The seed-prefix mirror
845 // exists so future fits with related-but-not-identical
846 // structure can warm-start from this run via the dispatcher's
847 // prefix lookup.
848 for mirror in &config.cache_mirror_sessions {
849 let mirror_saved = mirror.finalize(
850 &bytes,
851 Some(result.final_value),
852 Some(result.iterations as u64),
853 );
854 if mirror_saved {
855 let mirror_hex = mirror.key().to_hex();
856 log::info!(
857 "[CACHE] save key={}.. context={} mirror final_obj={:.6e} iter={}",
858 &mirror_hex[..8.min(mirror_hex.len())],
859 context,
860 result.final_value,
861 result.iterations,
862 );
863 }
864 }
865 }
866 result
867 }
868}
869
870/// Result of a completed outer optimization.
871#[derive(Clone, Debug)]
872pub struct OuterResult {
873 /// Optimized log-smoothing parameters.
874 pub rho: Array1<f64>,
875 /// Final objective value.
876 pub final_value: f64,
877 /// Total outer iterations across all solver restarts.
878 pub iterations: usize,
879 /// Final gradient norm, when the solver computed an actual gradient.
880 pub final_grad_norm: Option<f64>,
881 /// Final gradient when the solver is gradient-based.
882 pub final_gradient: Option<Array1<f64>>,
883 /// Final Hessian when the solver tracks one.
884 pub final_hessian: Option<Array2<f64>>,
885 /// Whether the optimizer converged to a stationary point.
886 pub converged: bool,
887 /// Which plan was actually used (may differ from initial if fallback fired).
888 pub plan_used: OuterPlan,
889 /// Final trust radius for the internal operator trust-region solver.
890 ///
891 /// A non-converged operator-ARC attempt may be restarted by the budget
892 /// ladder. Restarting only from the last θ but resetting the trust radius
893 /// is not a warm start: it replays the same rejected large trial steps.
894 /// Carry this globalization state so retries resume from the scale the
895 /// previous attempt already learned.
896 pub operator_trust_radius: Option<f64>,
897 /// Why the internal operator trust-region solver stopped.
898 pub operator_stop_reason: Option<OperatorTrustRegionStopReason>,
899 /// First-order optimality self-audit at the returned point (#934).
900 ///
901 /// `None` when no analytic gradient was measured at termination
902 /// (gradient-free solvers, cache-hit short-circuits, per-atom EFS) or
903 /// when an audit probe failed to evaluate. Populated once by
904 /// [`run_outer`] after the solver ladder returns, outside all hot loops.
905 pub criterion_certificate: Option<CriterionCertificate>,
906 /// Post-fit PSIS diagnostic for whether sampled smoothing-parameter weights
907 /// show evidence that plug-in REML/LAML intervals are unreliable. Populated
908 /// once by [`run_outer`] when the exact rho Hessian is cheap enough to use.
909 pub rho_uncertainty_diagnostic: Option<crate::rho_uncertainty::RhoUncertaintyDiagnostic>,
910}
911
912impl OuterResult {
913 pub fn new(
914 rho: Array1<f64>,
915 final_value: f64,
916 iterations: usize,
917 converged: bool,
918 plan_used: OuterPlan,
919 ) -> Self {
920 Self {
921 rho,
922 final_value,
923 iterations,
924 final_grad_norm: None,
925 final_gradient: None,
926 final_hessian: None,
927 converged,
928 plan_used,
929 operator_trust_radius: None,
930 operator_stop_reason: None,
931 criterion_certificate: None,
932 rho_uncertainty_diagnostic: None,
933 }
934 }
935
936 /// Human-readable rendering of `final_grad_norm` for diagnostics. Returns
937 /// `"n/a"` when no gradient was measured (gradient-free / cache-hit paths).
938 pub fn final_grad_norm_report(&self) -> String {
939 match self.final_grad_norm {
940 Some(g) => format!("{g:.3e}"),
941 None => "n/a".to_string(),
942 }
943 }
944}
945
946// ─── First-order optimality certificate (#934) ────────────────────────
947//
948// The objective↔gradient desync bug genus (#748, #752, #808, #901, …) has a
949// universal signature: at the returned "optimum" the analytic gradient says
950// converged while a finite difference of the ACTUAL criterion value says
951// otherwise (or the optimizer stalls and rails λ). Every such bug was
952// diagnosed by a human running exactly that FD comparison by hand. The
953// certificate makes the engine run it on itself, once, at θ̂, on every fit:
954// two central-difference pairs of the VALUE path along one deterministic
955// random direction, compared against ∇F(θ̂)·v from the analytic path, plus
956// the two ancillary facts every desync postmortem asks for (is the outer
957// curvature PD here; did any λ rail to a bound). It is the runtime
958// enforcement layer for the criterion-atom architecture (#931): atoms make
959// desync structurally hard, the certificate makes any residue observable.
960//
961// Cost discipline: at most four value-path evaluations at the single final
962// point, outside every hot loop. The value path is evaluated through
963// `eval_cost` at θ̂±hv — points the gradient path never visited, so the
964// existing ρ-keyed caches naturally miss and the true value code runs.
965// Disagreement does not fail the fit: it names the broken criterion loudly
966// in the result, the log, and the report.
967
968/// Deterministic unit direction on the θ sphere for the certificate audit.
969///
970/// Seeded from the problem fingerprint (context string + θ̂ bits) via FNV-1a
971/// and expanded with SplitMix64 + Box–Muller — no clock, no global RNG, so
972/// the audit direction is reproducible across runs of the same fit.
973pub(crate) fn certificate_audit_direction(theta: &Array1<f64>, context: &str) -> Array1<f64> {
974 let mut seed: u64 = 0xcbf2_9ce4_8422_2325;
975 let mut fnv = |byte: u8| {
976 seed ^= u64::from(byte);
977 seed = seed.wrapping_mul(0x0000_0100_0000_01b3);
978 };
979 for byte in context.bytes() {
980 fnv(byte);
981 }
982 for &x in theta.iter() {
983 for byte in x.to_bits().to_le_bytes() {
984 fnv(byte);
985 }
986 }
987 let mut state = seed;
988 let mut next_unit = move || {
989 state = state.wrapping_add(0x9e37_79b9_7f4a_7c15);
990 let mut z = state;
991 z = (z ^ (z >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
992 z = (z ^ (z >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
993 z ^= z >> 31;
994 // Uniform in (0, 1): 53 mantissa bits, nudged off zero for the log.
995 ((z >> 11) as f64 + 0.5) / (1u64 << 53) as f64
996 };
997 let mut direction = Array1::<f64>::zeros(theta.len());
998 let mut i = 0;
999 while i < direction.len() {
1000 let (u1, u2) = (next_unit(), next_unit());
1001 let radius = (-2.0 * u1.ln()).sqrt();
1002 let angle = 2.0 * std::f64::consts::PI * u2;
1003 direction[i] = radius * angle.cos();
1004 if i + 1 < direction.len() {
1005 direction[i + 1] = radius * angle.sin();
1006 }
1007 i += 2;
1008 }
1009 let norm = direction.dot(&direction).sqrt();
1010 if norm.is_finite() && norm > f64::EPSILON {
1011 direction.mapv_inplace(|v| v / norm);
1012 direction
1013 } else {
1014 // Degenerate draw (probability ~0): fall back to the first axis.
1015 let mut fallback = Array1::<f64>::zeros(theta.len());
1016 fallback[0] = 1.0;
1017 fallback
1018 }
1019}
1020
1021/// Plain Cholesky positive-definiteness probe for the (small, outer-dim)
1022/// final Hessian. Returns `None` when the matrix is empty, non-square, or
1023/// non-finite; `Some(false)` on any non-positive pivot.
1024pub(crate) fn certificate_hessian_is_pd(hessian: &Array2<f64>) -> Option<bool> {
1025 let n = hessian.nrows();
1026 if n == 0 || hessian.ncols() != n || hessian.iter().any(|v| !v.is_finite()) {
1027 return None;
1028 }
1029 let mut chol = hessian.clone();
1030 for j in 0..n {
1031 for k in 0..j {
1032 let l_jk = chol[[j, k]];
1033 for i in j..n {
1034 chol[[i, j]] -= chol[[i, k]] * l_jk;
1035 }
1036 }
1037 let pivot = chol[[j, j]];
1038 if !(pivot > 0.0) || !pivot.is_finite() {
1039 return Some(false);
1040 }
1041 let inv_sqrt = 1.0 / pivot.sqrt();
1042 for i in j..n {
1043 chol[[i, j]] *= inv_sqrt;
1044 }
1045 }
1046 Some(true)
1047}
1048
1049/// Smoothing coordinates (leading ρ block) railed against the outer box.
1050pub(crate) fn certificate_railed_lambdas(
1051 rho: &Array1<f64>,
1052 rho_dim: usize,
1053 config: &OuterConfig,
1054) -> Vec<usize> {
1055 (0..rho_dim.min(rho.len()))
1056 .filter(|&k| {
1057 let (lo, hi) = match config.bounds.as_ref() {
1058 Some((lo, hi)) if k < lo.len() && k < hi.len() => (lo[k], hi[k]),
1059 Some(_) => return false,
1060 None => (-config.rho_bound, config.rho_bound),
1061 };
1062 (rho[k] - lo).abs() <= CERTIFICATE_RAIL_MARGIN
1063 || (hi - rho[k]).abs() <= CERTIFICATE_RAIL_MARGIN
1064 })
1065 .collect()
1066}
1067
1068/// Perform the randomized first-order self-audit at the returned optimum.
1069///
1070/// Requires an analytic final gradient (the thing being audited); returns
1071/// `None` — never an error — when the gradient is absent/non-finite or when
1072/// any of the four value probes fails to evaluate, so the audit can never
1073/// fail a fit that the optimizer accepted.
1074pub(crate) fn audit_first_order_optimality(
1075 obj: &mut dyn OuterObjective,
1076 config: &OuterConfig,
1077 context: &str,
1078 result: &OuterResult,
1079) -> Option<CriterionCertificate> {
1080 let gradient = result.final_gradient.as_ref()?;
1081 if gradient.is_empty()
1082 || gradient.len() != result.rho.len()
1083 || gradient.iter().any(|g| !g.is_finite())
1084 || result.rho.iter().any(|r| !r.is_finite())
1085 {
1086 return None;
1087 }
1088
1089 let theta = &result.rho;
1090 let rho_dim = obj.capability().theta_layout().rho_dim();
1091 let railed = certificate_railed_lambdas(theta, rho_dim, config);
1092
1093 // The full-space audit direction is unit-norm over all θ coordinates.
1094 let full_direction = certificate_audit_direction(theta, context);
1095 // At an active box bound the constrained first-order optimality condition
1096 // is KKT: ∇F·e_k need NOT vanish along a railed coordinate k (the bound
1097 // multiplier balances it), AND the central FD steps ρ_k across the bound
1098 // into the infeasible/clamped region, corrupting the value path. An
1099 // unconstrained FD-vs-analytic directional check that spans a railed
1100 // coordinate is therefore ill-posed and produces a spurious disagreement
1101 // (the railed-coordinate audit artifact). Restrict the comparison to the
1102 // free (non-railed, box-interior) subspace: zero the railed components of
1103 // the audit direction and re-normalize. When nothing is railed this is the
1104 // exact original unit direction (byte-identical), so the interior desync
1105 // check the certificate exists for (#748/#752/#808) is unchanged.
1106 let direction = if railed.is_empty() {
1107 full_direction
1108 } else {
1109 let mut projected = full_direction;
1110 for &k in &railed {
1111 if k < projected.len() {
1112 projected[k] = 0.0;
1113 }
1114 }
1115 let norm = projected.dot(&projected).sqrt();
1116 if norm.is_finite() && norm > f64::EPSILON {
1117 projected.mapv_inplace(|v| v / norm);
1118 projected
1119 } else {
1120 // Every audited coordinate is railed (free subspace empty): there
1121 // is no interior direction to audit, so the directional check is
1122 // vacuous. Skip the certificate rather than divide by ~0.
1123 log::debug!(
1124 "[CERTIFICATE] {context}: every audited coordinate railed \
1125 (railed={railed:?}); no free direction to audit, certificate skipped"
1126 );
1127 return None;
1128 }
1129 };
1130 // Central-difference step on the optimal ε^(1/3) scale, sized to the
1131 // iterate so saturated ρ (|ρ| up to rho_bound) keeps θ̂±2hv resolvable.
1132 let theta_scale = theta.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs()));
1133 let step = f64::EPSILON.cbrt() * (1.0 + theta_scale);
1134
1135 let mut probe = |scale: f64| -> Option<f64> {
1136 let point = theta + &(scale * &direction);
1137 match obj.eval_cost(&point) {
1138 Ok(value) if value.is_finite() => Some(value),
1139 Ok(value) => {
1140 log::debug!(
1141 "[CERTIFICATE] {context}: audit probe at θ̂{scale:+.3e}·v returned \
1142 non-finite criterion value {value}; certificate skipped"
1143 );
1144 None
1145 }
1146 Err(err) => {
1147 log::debug!(
1148 "[CERTIFICATE] {context}: audit probe at θ̂{scale:+.3e}·v failed ({err}); \
1149 certificate skipped"
1150 );
1151 None
1152 }
1153 }
1154 };
1155 let f_plus_h = probe(step)?;
1156 let f_minus_h = probe(-step)?;
1157 let f_plus_2h = probe(2.0 * step)?;
1158 let f_minus_2h = probe(-2.0 * step)?;
1159
1160 let d_h = (f_plus_h - f_minus_h) / (2.0 * step);
1161 let d_2h = (f_plus_2h - f_minus_2h) / (4.0 * step);
1162 // FD-OK: FD-audit certificate construction (Richardson FD oracle auditing the analytic gradient, never feeds the optimizer)
1163 let fd_directional = (4.0 * d_h - d_2h) / 3.0; // fd-ok: FD-audit certificate, not in math path
1164 // Error bar: the Richardson residual measures truncation + value-path
1165 // noise (inner-solve tolerance) empirically; the roundoff bound floors
1166 // it when the residual is accidentally tiny.
1167 let value_scale = f_plus_h
1168 .abs()
1169 .max(f_minus_h.abs())
1170 .max(f_plus_2h.abs())
1171 .max(f_minus_2h.abs());
1172 let roundoff = f64::EPSILON * (1.0 + value_scale) / step;
1173 let fd_error = (d_h - d_2h).abs().max(roundoff); // fd-ok: FD-audit certificate, not in math path
1174
1175 let analytic_directional = gradient.dot(&direction);
1176 let grad_norm = gradient.dot(gradient).sqrt();
1177 let agreement_z = (analytic_directional - fd_directional).abs() / fd_error; // fd-ok: FD-audit certificate, not in math path
1178
1179 let certificate = CriterionCertificate {
1180 grad_norm,
1181 analytic_directional,
1182 fd_directional, // fd-ok: FD-audit certificate, not in math path
1183 fd_error, // fd-ok: FD-audit certificate, not in math path
1184 agreement_z,
1185 fd_step: step, // fd-ok: FD-audit certificate, not in math path
1186 // END-FD-OK
1187 hessian_pd: result
1188 .final_hessian
1189 .as_ref()
1190 .and_then(certificate_hessian_is_pd),
1191 lambdas_railed: railed,
1192 };
1193 if certificate.is_clean() {
1194 log::info!("[CERTIFICATE] {context}: {}", certificate.summary());
1195 } else {
1196 log::warn!(
1197 "[CERTIFICATE warning] {context}: optimality self-audit flagged the returned \
1198 optimum — {}",
1199 certificate.summary(),
1200 );
1201 }
1202 Some(certificate)
1203}
1204
1205pub(crate) fn compute_rho_uncertainty_diagnostic(
1206 obj: &mut dyn OuterObjective,
1207 config: &OuterConfig,
1208 context: &str,
1209 result: &mut OuterResult,
1210) -> crate::rho_uncertainty::RhoUncertaintyDiagnostic {
1211 let cap = obj.capability();
1212 let layout = cap.theta_layout();
1213 let rho_dim = layout.rho_dim();
1214 let gate = crate::rho_uncertainty::RhoUncertaintyCostGate {
1215 sample_count: 32,
1216 problem_size: config.rho_uncertainty_problem_size,
1217 };
1218 if let Err(reason) = crate::rho_uncertainty::cost_gate_allows(rho_dim, gate) {
1219 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(reason, 0);
1220 }
1221 if result.rho.len() != layout.n_params {
1222 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(
1223 format!(
1224 "final outer point length {} does not match objective dimension {}",
1225 result.rho.len(),
1226 layout.n_params
1227 ),
1228 0,
1229 );
1230 }
1231
1232 let final_eval = match obj.eval_with_order(&result.rho, OuterEvalOrder::ValueGradientHessian) {
1233 Ok(eval) => eval,
1234 Err(err) => {
1235 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(
1236 format!("final exact Hessian evaluation failed: {err}"),
1237 1,
1238 );
1239 }
1240 };
1241 let hessian = match final_eval.hessian.materialize_dense() {
1242 Ok(Some(hessian)) => hessian,
1243 Ok(None) => {
1244 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(
1245 "exact outer Hessian unavailable at fitted rho",
1246 1,
1247 );
1248 }
1249 Err(message) => {
1250 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(
1251 format!("exact outer Hessian materialization failed: {message}"),
1252 1,
1253 );
1254 }
1255 };
1256 if hessian.nrows() != layout.n_params || hessian.ncols() != layout.n_params {
1257 return crate::rho_uncertainty::RhoUncertaintyDiagnostic::skipped(
1258 format!(
1259 "exact outer Hessian shape {}x{} does not match objective dimension {}",
1260 hessian.nrows(),
1261 hessian.ncols(),
1262 layout.n_params
1263 ),
1264 1,
1265 );
1266 }
1267 // Persist the exact outer curvature at θ̂ when the solver did not already
1268 // track one. A gradient-based BFGS solve keeps its inverse-Hessian
1269 // internally and `opt` does not surface it, so `result.final_hessian` is
1270 // `None` on the BFGS path — yet the exact analytic `H(θ̂)` was just
1271 // materialized here for the rho-uncertainty diagnostic and is otherwise
1272 // discarded. Stashing it lets the persistent-cache finalize write carry the
1273 // converged curvature, so the NEXT structurally-matching fit (e.g. the next
1274 // LOSO fold, whose θ̂ and curvature are nearly identical) can seed BFGS with
1275 // `InitialMetric::DenseInverseHessian` and take a quasi-Newton first step
1276 // instead of rediscovering curvature through line-search bracketing. This
1277 // never changes a converged optimum (BFGS converges to ∇V=0 under any SPD
1278 // initial metric); it only reshapes the starting line-search path. Guarded
1279 // on finiteness and on the solver not already owning a Hessian, so the
1280 // exact-Newton / ARC paths (which DO populate `final_hessian`) are untouched.
1281 if result.final_hessian.is_none() && hessian.iter().all(|v| v.is_finite()) {
1282 result.final_hessian = Some(hessian.clone());
1283 }
1284 let mut hessian_rho = Array2::<f64>::zeros((rho_dim, rho_dim));
1285 for row in 0..rho_dim {
1286 for col in 0..rho_dim {
1287 hessian_rho[[row, col]] = hessian[[row, col]];
1288 }
1289 }
1290 let rho_hat = result.rho.slice(ndarray::s![..rho_dim]).to_owned();
1291 let theta_hat = result.rho.clone();
1292 let cost_hat = final_eval.cost;
1293 let final_beta_hint = final_eval.inner_beta_hint.clone();
1294 let diagnostic = {
1295 let mut served_hat_cost = false;
1296 let mut criterion = |rho: &Array1<f64>| -> Option<f64> {
1297 let is_hat = rho.len() == rho_hat.len()
1298 && rho
1299 .iter()
1300 .zip(rho_hat.iter())
1301 .all(|(&left, &right)| left.to_bits() == right.to_bits());
1302 if is_hat && !served_hat_cost {
1303 served_hat_cost = true;
1304 return Some(cost_hat);
1305 }
1306 let mut theta = theta_hat.clone();
1307 for idx in 0..rho_dim {
1308 theta[idx] = rho[idx];
1309 }
1310 if let Some(beta) = final_beta_hint.as_ref()
1311 && obj.seed_inner_state(beta).is_err()
1312 {
1313 return None;
1314 }
1315 obj.eval_cost(&theta).ok()
1316 };
1317 crate::rho_uncertainty::rho_uncertainty_diagnostic(
1318 &rho_hat,
1319 &hessian_rho,
1320 gate,
1321 &mut criterion,
1322 )
1323 };
1324 if let Some(beta) = final_beta_hint.as_ref()
1325 && let Err(err) = obj.seed_inner_state(beta)
1326 {
1327 log::debug!(
1328 "[RHO uncertainty] {context}: final inner-state restore skipped after diagnostic ({err})"
1329 );
1330 }
1331 match &diagnostic.status {
1332 crate::rho_uncertainty::RhoUncertaintyStatus::NoEvidenceOfHeavyTails => {
1333 log::info!(
1334 "[RHO uncertainty] {context}: no heavy-tail evidence at sampled rho proposals k_hat={:.3} evals={}",
1335 diagnostic.k_hat.unwrap_or(f64::NAN),
1336 diagnostic.n_evaluations,
1337 );
1338 }
1339 crate::rho_uncertainty::RhoUncertaintyStatus::HeavyTailsDetected { k_hat } => {
1340 log::warn!(
1341 "[RHO uncertainty] {context}: heavy rho-importance tail detected k_hat={:.3} evals={}",
1342 k_hat,
1343 diagnostic.n_evaluations,
1344 );
1345 }
1346 crate::rho_uncertainty::RhoUncertaintyStatus::Skipped { reason } => {
1347 log::info!("[RHO uncertainty] {context}: skipped ({reason})");
1348 }
1349 }
1350 diagnostic
1351}
1352
1353#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1354pub enum OperatorTrustRegionStopReason {
1355 Converged,
1356 RejectFloor,
1357 IterationBudget,
1358 CostStallFlatValley,
1359 /// Family returned a non-operator Hessian mid-flight after routing into
1360 /// the operator path. Best-effort `x_k` returned with this reason; the
1361 /// caller should consider re-fitting under a different solver class
1362 /// (e.g. BFGS gradient-only) instead of trusting the partial result.
1363 RoutingMismatch,
1364}
1365
1366/// Run the outer smoothing-parameter optimization.
1367///
1368/// This is the single entry point that replaces the scattered optimizer wiring
1369/// across estimate.rs, joint.rs, and custom_family.rs. It:
1370///
1371/// 1. Queries and canonicalizes the objective's capability declaration.
1372/// 2. Calls `plan()` to select solver + hessian source.
1373/// 3. Logs the plan and the analytic derivative capabilities it will consume.
1374/// 4. Generates seed candidates.
1375/// 5. Runs the chosen solver on candidates in heuristic order up to budget.
1376/// 6. If the configured fallback policy allows it, re-plans with degraded
1377/// capabilities chosen centrally inside outer_strategy and retries.
1378/// 7. Returns the best result (including which plan was actually used).
1379///
1380/// Do not wrap `run_outer` calls in try/catch with ad-hoc solver recovery.
1381/// Callers should declare only the primary capability and, at most, whether
1382/// automatic fallback is enabled at all.
1383pub(crate) fn run_outer(
1384 obj: &mut dyn OuterObjective,
1385 config: &OuterConfig,
1386 context: &str,
1387) -> Result<OuterResult, EstimationError> {
1388 // Permutation-invariant outer search (#1538/#1539). When the caller has
1389 // supplied per-coordinate structural keys that induce a non-identity
1390 // canonical order, run the ENTIRE outer pipeline (seeding, multistart,
1391 // optimization, and the #934 certificate / uncertainty audits) in that
1392 // canonical layout against a permuting wrapper, then map the result back to
1393 // the native layout. Seeding/tie-breaking then see byte-identical
1394 // coordinates for every term order, so both orders select the same λ̂.
1395 if let Some(keys) = config.rho_canonical_keys.as_ref()
1396 && let Some(perm) = canonical_permutation(keys)
1397 {
1398 let canonical_config = canonicalize_outer_config(config, &perm);
1399 let mut canonical_obj = CanonicalizedObjective::new(obj, perm.clone());
1400 let result = run_outer(&mut canonical_obj, &canonical_config, context)?;
1401 return Ok(outer_result_to_native(result, &perm));
1402 }
1403 let mut result = run_outer_uncertified(obj, config, context)?;
1404 if config.max_iter <= 1 {
1405 return Ok(result);
1406 }
1407 // First-order optimality self-audit (#934): once, at the returned θ̂,
1408 // outside all hot loops, for every entry point of the solver ladder
1409 // (dense, device, per-atom EFS, fallback plans). Probes evaluate the
1410 // value path at θ̂±hv AFTER the solve, so the only state they perturb
1411 // is warm-start residue O(h) from the optimum — every caller recovers
1412 // its fitted state from `result.rho`, not from last-eval residue.
1413 result.criterion_certificate = audit_first_order_optimality(obj, config, context, &result);
1414 result.rho_uncertainty_diagnostic = Some(compute_rho_uncertainty_diagnostic(
1415 obj,
1416 config,
1417 context,
1418 &mut result,
1419 ));
1420 Ok(result)
1421}
1422
1423/// Build a CANONICAL-order copy of an [`OuterConfig`] for the
1424/// permutation-invariant outer search (#1538/#1539).
1425///
1426/// `perm[c]` is the native coordinate at canonical slot `c`. Every
1427/// per-coordinate config field (initial ρ seed, heuristic-λ seed, per-axis
1428/// bounds, transferred warm Hessian) is reordered native→canonical so the
1429/// optimizer's seeding and multistart operate entirely in canonical space;
1430/// scalar fields are copied verbatim. `rho_canonical_keys` is cleared so the
1431/// recursive [`run_outer`] frame runs the normal (identity-order) pipeline on
1432/// the already-canonical objective.
1433fn canonicalize_outer_config(config: &OuterConfig, perm: &[usize]) -> OuterConfig {
1434 // Permute a per-coordinate slice native→canonical; pass through any length
1435 // that does not match the permutation (defensive — should not occur).
1436 let permute_vec = |v: &[f64]| -> Vec<f64> {
1437 if v.len() == perm.len() {
1438 perm.iter().map(|&i| v[i]).collect()
1439 } else {
1440 v.to_vec()
1441 }
1442 };
1443 let permute_arr = |a: &Array1<f64>| -> Array1<f64> {
1444 if a.len() == perm.len() {
1445 Array1::from_iter(perm.iter().map(|&i| a[i]))
1446 } else {
1447 a.clone()
1448 }
1449 };
1450 let mut canonical = config.clone();
1451 canonical.rho_canonical_keys = None;
1452 if let Some(initial) = config.initial_rho.as_ref() {
1453 canonical.initial_rho = Some(permute_arr(initial));
1454 }
1455 if let Some(h) = config.heuristic_lambdas.as_ref() {
1456 canonical.heuristic_lambdas = Some(permute_vec(h));
1457 }
1458 if let Some((lower, upper)) = config.bounds.as_ref() {
1459 canonical.bounds = Some((permute_arr(lower), permute_arr(upper)));
1460 }
1461 // A transferred dense outer Hessian is in native coordinate order; permute
1462 // it into canonical order so the BFGS warm metric stays aligned. (None on
1463 // the cold-start canonicalized path, so this is usually a no-op.)
1464 if let Some(h) = config.warm_start_outer_hessian.as_ref()
1465 && h.nrows() == perm.len()
1466 && h.ncols() == perm.len()
1467 {
1468 let mut hc = Array2::<f64>::zeros((perm.len(), perm.len()));
1469 for (a, &ia) in perm.iter().enumerate() {
1470 for (b, &ib) in perm.iter().enumerate() {
1471 hc[[a, b]] = h[[ia, ib]];
1472 }
1473 }
1474 canonical.warm_start_outer_hessian = Some(hc);
1475 }
1476 canonical
1477}
1478
1479/// The solver ladder behind [`run_outer`], without the #934 self-audit.
1480pub(crate) fn run_outer_uncertified(
1481 obj: &mut dyn OuterObjective,
1482 config: &OuterConfig,
1483 context: &str,
1484) -> Result<OuterResult, EstimationError> {
1485 let cap = primary_capability_for_config(obj.capability(), config, context);
1486 cap.validate_layout(context)?;
1487 if let Some(initial_rho) = config.initial_rho.as_ref() {
1488 cap.theta_layout()
1489 .validate_point_len(initial_rho, "initial outer seed")
1490 .map_err(|err| match err {
1491 ObjectiveEvalError::Recoverable { message }
1492 | ObjectiveEvalError::Fatal { message } => {
1493 EstimationError::RemlOptimizationFailed(format!("{context}: {message}"))
1494 }
1495 })?;
1496 }
1497 crate::estimate::reml::outer_eval::clear_outer_ift_residual_energy_for_fit();
1498
1499 // Frontier ρ-scaling auto-switch (#986): at per-atom-EFS-eligible frontier
1500 // rho dimension the decoupled per-atom fixed point is the primary outer
1501 // iteration; everything else falls through to the dense / standard path
1502 // below. Routed here so every entry point inherits it (magic by default).
1503 if let Some(result) = run_per_atom_efs_if_frontier(obj, config, context)? {
1504 return Ok(result);
1505 }
1506
1507 if cap.n_params == 0 {
1508 let cost = obj.eval_cost(&Array1::zeros(0))?;
1509 let the_plan = plan(&cap);
1510 return Ok(outer_result_with_gradient_norm(
1511 Array1::zeros(0),
1512 cost,
1513 0,
1514 Some(0.0),
1515 true,
1516 the_plan,
1517 ));
1518 }
1519
1520 // Build the ordered list of capabilities to attempt: primary first, then
1521 // any centrally-derived degraded capabilities. Aux direct-search has no
1522 // degraded ladder — a single attempt either succeeds or the failure is
1523 // surfaced to the caller.
1524 let fallback_attempts = match config.fallback_policy {
1525 FallbackPolicy::Automatic => automatic_fallback_attempts(&cap),
1526 FallbackPolicy::Disabled => Vec::new(),
1527 };
1528 let mut attempts: Vec<OuterCapability> = Vec::with_capacity(1 + fallback_attempts.len());
1529 attempts.push(cap.clone());
1530 for degraded in fallback_attempts {
1531 attempts.push(degraded);
1532 }
1533
1534 let mut last_error: Option<EstimationError> = None;
1535
1536 for (attempt_idx, attempt_cap) in attempts.iter().enumerate() {
1537 let the_plan = plan(attempt_cap);
1538 if attempt_idx > 0 {
1539 log::debug!("[OUTER] {context}: primary plan failed; falling back to {the_plan}");
1540 }
1541 log_plan(context, attempt_cap, &the_plan);
1542
1543 obj.reset();
1544
1545 // ARC budget-exhaustion retry: when an Arc attempt runs out of
1546 // outer iterations, reseed a fresh Arc run from the previous
1547 // attempt's last ρ and trust radius. Inner caches (PIRLS LRU,
1548 // eval bundle, warm-start predictor, adaptive signals) are wiped
1549 // by `obj.reset()`; the operator-TR's Cauchy/Newton/CG state has
1550 // no resume API and is not preserved. The lever that changes for
1551 // the resumed run is the inner-PIRLS cap (uncapped via the
1552 // feedback handle), not `max_iter` — empirically the prior stall
1553 // was an inner-tolerance / model-fidelity issue, not an outer
1554 // budget shortfall, and doubling `max_iter` only replays the
1555 // same trajectory byte-for-byte. The retry is gated on observed
1556 // `‖g‖` progress so trajectories that made no headway fall
1557 // through to the degraded plan instead of replaying.
1558 let mut arc_retries_left: u32 = if matches!(the_plan.solver, Solver::Arc) {
1559 2
1560 } else {
1561 0
1562 };
1563 let mut retry_config: Option<OuterConfig> = None;
1564 // Tracks the previous ARC attempt's terminal `‖g‖`. The retry
1565 // gate compares attempt-over-attempt: if a retry didn't move
1566 // the gradient norm, the trajectory replayed (same seed, same
1567 // trust radius, cold caches, deterministic optimizer) and
1568 // further retries cannot help. First retry is unconditional
1569 // (no prior attempt to compare against).
1570 let mut prev_attempt_grad_norm: Option<f64> = None;
1571
1572 let outcome = loop {
1573 // Bind the active config by cloning into a local owned value so
1574 // subsequent retry-config assignment does not collide with the
1575 // borrow used inside this iteration body.
1576 let active_config_owned: OuterConfig =
1577 retry_config.clone().unwrap_or_else(|| config.clone());
1578 let active_config: &OuterConfig = &active_config_owned;
1579 match run_outer_with_plan(obj, active_config, context, attempt_cap, &the_plan) {
1580 Ok(result) => {
1581 if result.converged
1582 || arc_retries_left == 0
1583 || matches!(
1584 result.operator_stop_reason,
1585 Some(OperatorTrustRegionStopReason::RejectFloor)
1586 )
1587 {
1588 break Ok(result);
1589 }
1590 // Gate the retry on attempt-over-attempt `‖g‖`
1591 // progress. The first retry is unconditional (no
1592 // prior attempt). Subsequent retries fall through
1593 // to the degraded plan when the gradient norm did
1594 // not materially shrink — the deterministic
1595 // optimizer with the same seed and trust radius
1596 // would replay the same trajectory.
1597 let Some(cur_grad_norm) = result.final_grad_norm else {
1598 log::info!(
1599 "[OUTER] {context}: ARC attempt exhausted budget at \
1600 iter={} cost={:.6e} without a final gradient norm; \
1601 falling through to degraded plan",
1602 result.iterations,
1603 result.final_value,
1604 );
1605 break Ok(result);
1606 };
1607 if let Some(prev_g) = prev_attempt_grad_norm {
1608 let progressed = cur_grad_norm.is_finite()
1609 && prev_g.is_finite()
1610 && cur_grad_norm < 0.5 * prev_g;
1611 if !progressed {
1612 log::info!(
1613 "[OUTER] {context}: ARC retry stalled at \
1614 iter={} cost={:.6e} |g|={:.6e} (prev |g|={:.6e}); \
1615 deterministic replay suspected, falling through \
1616 to degraded plan",
1617 result.iterations,
1618 result.final_value,
1619 cur_grad_norm,
1620 prev_g,
1621 );
1622 break Ok(result);
1623 }
1624 }
1625 let next_trust_radius =
1626 sanitized_operator_trust_restart_radius(result.operator_trust_radius);
1627 log::info!(
1628 "[OUTER] {context}: ARC attempt exhausted budget at \
1629 iter={} cost={:.6e} |g|={:.6e}; resuming from last \
1630 rho + trust_radius={:?}, inner-PIRLS uncapped \
1631 (objective caches wiped; operator-TR Cauchy/Newton \
1632 state is not resumable)",
1633 result.iterations,
1634 result.final_value,
1635 cur_grad_norm,
1636 next_trust_radius,
1637 );
1638 // Snapshot the cap-feedback handle before we
1639 // reassign `retry_config` (which currently backs
1640 // `active_config`'s borrow). `InnerProgressFeedback`
1641 // is an Arc-wrapper bundle, so the clone is cheap.
1642 let cap_feedback = active_config.outer_inner_cap.clone();
1643 let mut next = active_config.clone();
1644 prev_attempt_grad_norm = Some(cur_grad_norm);
1645 next.initial_rho = Some(result.rho.clone());
1646 next.operator_initial_trust_radius = next_trust_radius;
1647 retry_config = Some(next);
1648 arc_retries_left -= 1;
1649 obj.reset();
1650 // Lift any inner-PIRLS cap for the resumed run. The
1651 // schedule's cold-start ladder (3/5/10) would
1652 // re-coarsen exactly the inner solves whose tolerance
1653 // is suspected to have starved the prior trajectory.
1654 // The next outer iter consumes ρ near a near-stationary
1655 // point where exact β / gradient / Hessian is the
1656 // load-bearing input to the operator-TR geometry.
1657 if let Some(feedback) = cap_feedback.as_ref() {
1658 feedback.cap.store(0, Ordering::Relaxed);
1659 }
1660 }
1661 Err(e) => break Err(e),
1662 }
1663 };
1664
1665 match outcome {
1666 Ok(result) => {
1667 if result.converged || attempt_idx + 1 == attempts.len() {
1668 if !result.converged {
1669 log::warn!(
1670 "[OUTER warning] {context}: final outer attempt returned without convergence \
1671 (plan={the_plan}, iterations={}, final_value={:.6e}, |g|={})",
1672 result.iterations,
1673 result.final_value,
1674 result.final_grad_norm_report(),
1675 );
1676 }
1677 return Ok(result);
1678 }
1679
1680 let message = format!(
1681 "{context}: attempt {} (plan={the_plan}) exhausted without convergence",
1682 attempt_idx + 1
1683 );
1684 log::debug!("[OUTER] {message}; trying degraded fallback plan");
1685 last_error = Some(EstimationError::RemlOptimizationFailed(message));
1686 }
1687 Err(e) => {
1688 log::debug!(
1689 "[OUTER] {context}: attempt {} (plan={the_plan}) failed: {e}",
1690 attempt_idx + 1
1691 );
1692 last_error = Some(e);
1693 }
1694 }
1695 }
1696
1697 Err(last_error.unwrap_or_else(|| {
1698 EstimationError::RemlOptimizationFailed(format!("all plan attempts exhausted ({context})"))
1699 }))
1700}
1701
1702// ─── Frontier ρ-scaling auto-switch (issue #986) ─────────────────────────
1703//
1704// ARD-per-atom assigns one smoothing coordinate per dictionary atom, so the
1705// ρ-vector reaches 10^4–10^5 coordinates. A dense outer quasi-Newton over that
1706// materializes an O(K²) Hessian and is impossible at scale. When the ρ-dimension
1707// is frontier-scale AND every coordinate is penalty-like with a working
1708// fixed-point hook, route the PRIMARY outer iteration to the per-atom decoupled
1709// EFS path (`crate::estimate::reml::per_atom_efs`) instead of the dense
1710// ARC/BFGS lane. The decision is auto-derived from the coordinate count alone —
1711// there is no flag — and it is additive: the dense path is unchanged for small K
1712// and for any objective that is not per-atom-EFS-eligible.
1713
1714/// Whether this capability is in the frontier ρ-scaling regime where the
1715/// per-atom decoupled EFS primary should take over from the dense outer.
1716///
1717/// Delegates the eligibility decision to
1718/// [`crate::estimate::reml::per_atom_efs::per_atom_efs_eligible`], which
1719/// requires all-penalty-like coordinates, a working `eval_efs` hook,
1720/// fixed-point not disabled, and a frontier-scale ρ-dimension. This is the
1721/// single auto-switch predicate; `plan` keeps selecting the
1722/// dense or standard-EFS solver for everything below the frontier threshold.
1723pub fn is_per_atom_efs_frontier(cap: &OuterCapability) -> bool {
1724 crate::estimate::reml::per_atom_efs::per_atom_efs_eligible(cap)
1725}
1726
1727/// Auto-switch entry point: when `cap` is frontier-scale per-atom-EFS-eligible,
1728/// run the per-atom decoupled EFS primary and return its [`OuterResult`];
1729/// otherwise return `Ok(None)` so the caller falls through to the existing dense
1730/// / standard-EFS path via [`OuterProblem::run`] / [`run_outer`].
1731///
1732/// Builds the same bounded seed and tolerance/budget the standard plan path
1733/// uses, picks the seed (initial-ρ if supplied, else the first generated
1734/// candidate — the per-atom fixed point is a contraction near the optimum and
1735/// does not need the multi-seed cascade the dense path runs for its non-convex
1736/// quasi-Newton surface), then drives the per-atom EFS loop. The shared-border
1737/// topology defaults to disjoint (every atom owns a private penalty block — the
1738/// common ARD-per-atom case); callers with a known arrow-border overlap can run
1739/// the module's `run_per_atom_efs` directly with a populated
1740/// `SharedBorderTopology`.
1741///
1742/// Additive: this function neither mutates nor bypasses the dense path; it is
1743/// the pre-dispatch shortcut [`run_outer`] calls before the dense ladder.
1744pub(crate) fn run_per_atom_efs_if_frontier(
1745 obj: &mut dyn OuterObjective,
1746 config: &OuterConfig,
1747 context: &str,
1748) -> Result<Option<OuterResult>, EstimationError> {
1749 let cap = primary_capability_for_config(obj.capability(), config, context);
1750 cap.validate_layout(context)?;
1751 if !is_per_atom_efs_frontier(&cap) {
1752 return Ok(None);
1753 }
1754
1755 let the_plan = plan(&cap);
1756 let rho_dim = cap.theta_layout().rho_dim();
1757
1758 let (lower, upper) = outer_bounds_template(config, cap.n_params);
1759
1760 // Seed: cache/explicit initial ρ if present, otherwise the first generated
1761 // candidate. The per-atom multiplicative fixed point is locally
1762 // contractive, so a single seed suffices; the heavy multi-seed cascade
1763 // exists for the dense quasi-Newton's non-convex surface, not for EFS.
1764 let seed = match config.initial_rho.as_ref() {
1765 Some(initial) if initial.len() == cap.n_params => initial.clone(),
1766 _ => {
1767 let generated = crate::seeding::generate_rho_candidates(
1768 cap.n_params,
1769 config.heuristic_lambdas.as_deref(),
1770 &config.seed_config,
1771 );
1772 match generated.into_iter().next() {
1773 Some(first) => first,
1774 None => Array1::<f64>::zeros(cap.n_params),
1775 }
1776 }
1777 };
1778
1779 log::info!(
1780 "[OUTER] {context}: frontier ρ-scaling (rho_dim={rho_dim}) → per-atom decoupled EFS primary"
1781 );
1782
1783 let pa_cfg = crate::estimate::reml::per_atom_efs::PerAtomEfsConfig::new(
1784 config.tolerance,
1785 config.max_iter,
1786 lower,
1787 upper,
1788 );
1789 let topology =
1790 crate::estimate::reml::per_atom_efs::SharedBorderTopology::disjoint(rho_dim);
1791
1792 obj.reset();
1793 let result = crate::estimate::reml::per_atom_efs::run_per_atom_efs(
1794 obj, &seed, &pa_cfg, &topology,
1795 )?;
1796 Ok(Some(result.into_outer_result(the_plan)))
1797}
1798
1799pub(crate) fn outer_bounds(lo: &Array1<f64>, hi: &Array1<f64>) -> Result<Bounds, EstimationError> {
1800 Bounds::new(lo.clone(), hi.clone(), 1e-6).map_err(|err| {
1801 EstimationError::InvalidInput(format!("outer rho bounds are invalid: {err}"))
1802 })
1803}
1804
1805pub(crate) fn outer_bounds_template(config: &OuterConfig, n: usize) -> (Array1<f64>, Array1<f64>) {
1806 config.bounds.clone().unwrap_or_else(|| {
1807 (
1808 Array1::<f64>::from_elem(n, -config.rho_bound),
1809 Array1::<f64>::from_elem(n, config.rho_bound),
1810 )
1811 })
1812}
1813
1814pub(crate) fn outer_tolerance(value: f64) -> Result<Tolerance, EstimationError> {
1815 Tolerance::new(value)
1816 .map_err(|err| EstimationError::InvalidInput(format!("outer tolerance is invalid: {err}")))
1817}
1818
1819pub(crate) fn outer_gradient_tolerance(config: &OuterConfig) -> GradientTolerance {
1820 let abs = config
1821 .objective_scale
1822 .map(|scale| config.tolerance.max(scale * 1.0e-9))
1823 .unwrap_or(config.tolerance);
1824 GradientTolerance {
1825 abs,
1826 rel_initial_grad: None,
1827 rel_cost: Some(config.rel_cost_tolerance.unwrap_or(config.tolerance)),
1828 projected: true,
1829 }
1830}
1831
1832pub(crate) fn outer_max_iterations(value: usize) -> Result<MaxIterations, EstimationError> {
1833 MaxIterations::new(value)
1834 .map_err(|err| EstimationError::InvalidInput(format!("outer max_iter is invalid: {err}")))
1835}
1836
1837pub(crate) fn sanitized_operator_trust_restart_radius(radius: Option<f64>) -> Option<f64> {
1838 radius
1839 .filter(|value| value.is_finite() && *value > 0.0)
1840 .map(|value| value.max(OPERATOR_TRUST_RESTART_RADIUS_FLOOR))
1841}
1842
1843pub(crate) fn bfgs_axis_step_caps(
1844 config: &OuterConfig,
1845 layout: OuterThetaLayout,
1846) -> Option<Array1<f64>> {
1847 if config.bfgs_step_cap.is_none() && config.bfgs_step_cap_psi.is_none() {
1848 return None;
1849 }
1850 let mut caps = Array1::from_elem(layout.n_params, f64::INFINITY);
1851 if let Some(cap) = config.bfgs_step_cap {
1852 for i in 0..layout.rho_dim() {
1853 caps[i] = cap;
1854 }
1855 }
1856 if let Some(cap) = config.bfgs_step_cap_psi {
1857 for i in layout.rho_dim()..layout.n_params {
1858 caps[i] = cap;
1859 }
1860 }
1861 Some(caps)
1862}
1863
1864pub(crate) enum FixedPointOuterRunError {
1865 SeedRejected(EstimationError),
1866 ImmediateFallback(EstimationError),
1867 Failed(EstimationError),
1868}
1869
1870pub(crate) fn run_fixed_point_outer_solver(
1871 obj: &mut dyn OuterObjective,
1872 layout: OuterThetaLayout,
1873 barrier_config: Option<BarrierConfig>,
1874 config: &OuterConfig,
1875 context: &str,
1876 seed: &Array1<f64>,
1877 the_plan: OuterPlan,
1878 label: &str,
1879 failure_prefix: &str,
1880) -> Result<OuterResult, FixedPointOuterRunError> {
1881 let mut objective = OuterFixedPointBridge {
1882 obj,
1883 layout,
1884 barrier_config,
1885 fixed_point_tolerance: config.tolerance,
1886 consecutive_psi_zero_iters: 0,
1887 };
1888 match objective.eval_step(seed) {
1889 Ok(_) => {}
1890 Err(err) => {
1891 let err = match err {
1892 ObjectiveEvalError::Recoverable { message }
1893 | ObjectiveEvalError::Fatal { message } => {
1894 EstimationError::RemlOptimizationFailed(message)
1895 }
1896 };
1897 if requests_immediate_first_order_fallback(&err.to_string()) {
1898 return Err(FixedPointOuterRunError::ImmediateFallback(err));
1899 }
1900 return Err(FixedPointOuterRunError::SeedRejected(err));
1901 }
1902 };
1903 let (lo, hi) = outer_bounds_template(config, layout.n_params);
1904 let bounds = outer_bounds(&lo, &hi).map_err(FixedPointOuterRunError::Failed)?;
1905 let tol = outer_tolerance(config.tolerance).map_err(FixedPointOuterRunError::Failed)?;
1906 let max_iter =
1907 outer_max_iterations(config.max_iter).map_err(FixedPointOuterRunError::Failed)?;
1908 let mut optimizer = FixedPoint::new(seed.clone(), objective)
1909 .with_bounds(bounds)
1910 .with_tolerance(tol)
1911 .with_max_iterations(max_iter);
1912 match optimizer.run() {
1913 Ok(sol) => Ok(solution_into_outer_result(sol, true, the_plan)),
1914 Err(FixedPointError::MaxIterationsReached { last_solution }) => {
1915 log::warn!(
1916 "[OUTER warning] {context}: {label} hit max_iter={} at final_value={:.6e} step_norm={:.3e}",
1917 config.max_iter,
1918 last_solution.final_value,
1919 last_solution.final_gradient_norm.unwrap_or(f64::NAN),
1920 );
1921 Ok(solution_into_outer_result(*last_solution, false, the_plan))
1922 }
1923 Err(e) => Err(FixedPointOuterRunError::Failed(
1924 EstimationError::RemlOptimizationFailed(format!("{failure_prefix}: {e:?}")),
1925 )),
1926 }
1927}