gam_solve/rho_optimizer/objective.rs
1use super::*;
2
3// Re-exported here while the shared EFS contract lives in `gam-problem`.
4pub use gam_problem::EfsEval;
5
6/// Outcome of [`OuterObjective::seed_inner_state`].
7///
8/// Distinguishes two non-error outcomes that callers handle differently:
9///
10/// - [`SeedOutcome::Installed`] — the objective owns an inner-β slot and the
11/// provided β has been stored there. The next `eval*` will warm-start from
12/// this β.
13/// - [`SeedOutcome::NoSlot`] — the objective has no inner-β slot at all. The
14/// provided β is silently discarded. This is the contract reply for
15/// objectives whose inner iterate is conceptually empty (e.g. line-search
16/// bridges, screening proxies, fixed-spec objectives).
17///
18/// Genuine seeding failures (wrong dimension when a slot exists, internal
19/// allocation faults, …) are reported via `Err(EstimationError)`.
20///
21/// The two non-error variants exist because the two real callers want
22/// opposite behavior on the no-slot path:
23///
24/// - The outer cache warm-start path (`OuterProblem::run`) reads a `(ρ, β)`
25/// pair from disk; if the objective has no β slot it must log loudly
26/// ("β-bearing checkpoint silently degraded to ρ-only resume") so cache
27/// provenance is auditable.
28/// - The continuation walk (`prime_outer_seed`) forwards `inner_beta_hint`
29/// from the previous step; if the objective has no β slot the walk
30/// simply proceeds cold — no log, no error.
31///
32/// Encoding the distinction in the return type lets each caller branch on
33/// the variant without inspecting error message strings (the previous
34/// brittle approach, see git history for `is_no_hook` in continuation.rs).
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum SeedOutcome {
37 /// The objective installed the provided β into its inner-β slot.
38 Installed,
39 /// The objective has no inner-β slot; the β was discarded.
40 NoSlot,
41 /// The objective owns an inner-β slot, but the provided β is
42 /// structurally incompatible with this fit's inner block layout
43 /// (its length does not match the per-block coefficient widths). The
44 /// β was discarded and the fit resumes ρ-only.
45 ///
46 /// This is the load-time reply for a *row-relaxed* cross-fit seed
47 /// (the `cache_seed_key` prefix channel): two folds of the same model
48 /// share an ρ-dim, so the cached ρ transfers, but the realized basis
49 /// rank — hence the inner β length — is row-population dependent and
50 /// legitimately differs across folds (the LOSO p=37-vs-p=85 case).
51 /// A length-mismatched seed β is therefore NOT an error: cross-length
52 /// β transfer is delegated to the gauge-projected `FitArtifact`
53 /// channel, which least-squares re-expresses the parent's raw β into
54 /// this fold's reduced subspace. Reporting `Incompatible` here keeps
55 /// the (correct) ρ seed and avoids a spurious full cold-start.
56 Incompatible,
57}
58
59/// Common interface for outer smoothing-parameter objectives.
60///
61/// Every model path that optimizes smoothing parameters implements this trait.
62/// The runner function consumes it and handles solver selection,
63/// multi-start, and logging while delegating derivative fallback policy to
64/// `opt`.
65///
66/// # Contract
67///
68/// - `capability()` must be stable (same result across calls).
69/// - `eval()` may return `HessianResult::Unavailable` at individual trial
70/// points even when `capability().hessian == Analytic`; `opt` degrades that
71/// step to first-order behavior instead of requiring the objective to fake a
72/// stale or non-finite Hessian.
73/// - Use `eval_cost()` / `OuterEval::infeasible()` for infeasible trial points.
74/// Return `Err(...)` for genuine evaluation breakdowns so the runner can mark
75/// the step as a recoverable solver failure and escalate to the next declared
76/// fallback plan if the full attempt still fails.
77/// - `eval_cost()` is used only for cost-based optimization paths.
78/// - `eval()` is the main evaluation path (cost + gradient + optional Hessian).
79/// - `eval_efs()` is used only by the EFS solver. It runs the inner solve,
80/// builds the `InnerSolution`, and computes the EFS step vector. The default
81/// implementation returns an error; only objectives that support EFS need
82/// to override it.
83/// - `reset()` restores state to a clean baseline (for multi-start).
84pub trait OuterObjective {
85 /// Declare what this objective can compute analytically.
86 fn capability(&self) -> OuterCapability;
87
88 /// Evaluate cost only for cost-based optimization paths.
89 fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError>;
90
91 /// Evaluate the seed-screening ranking proxy at this `rho`.
92 ///
93 /// Used exclusively by the `rank_seeds_with_screening` cascade. The
94 /// default delegates to [`OuterObjective::eval_cost`], which preserves
95 /// behavior for non-REML objectives.
96 ///
97 /// Concrete REML-state objectives override this to return the per-seed
98 /// minimum penalized deviance observed during the inner P-IRLS solve
99 /// (a monotonically descending quantity that remains a meaningful
100 /// quality signal even at a 3-iteration screening cap), instead of the
101 /// V_LAML criterion (which is dominated by a poorly-conditioned
102 /// `0.5·log|H|` term at partial-fit β̂ and ranks seeds little better
103 /// than random). The proxy fires *only* in screening mode; outside
104 /// screening it must return the regular V_LAML cost so the optimization
105 /// objective is unchanged.
106 ///
107 /// # Why the `eval_cost` default is correct for everyone else (#969)
108 ///
109 /// The partial-fit pathology is CAUSED by the screening cap: it is the
110 /// `0.5·log|H|` term evaluated at a β̂ whose inner solve was truncated
111 /// by `screening_max_inner_iterations`. An objective only suffers it if
112 /// it (a) consumes that cap atomic AND (b) ranks on a curvature-bearing
113 /// criterion at the truncated iterate — which is exactly the REML/LAML
114 /// state-objective family, all of which override this method (or are
115 /// built via `build_objective_with_screening_proxy`). Objectives that
116 /// never wire the cap pay the full inner solve during screening, so
117 /// their screened cost IS the true criterion — slower, but a correct
118 /// ranking by definition, and a proxy could only degrade it. Any future
119 /// objective that starts honoring the screening cap on a
120 /// curvature-bearing criterion must override this with its own
121 /// monotonically-descending inner quantity (the penalized-deviance
122 /// pattern above generalizes: rank on the best inner merit seen, never
123 /// on a curvature term at a truncated iterate).
124 fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
125 self.eval_cost(rho)
126 }
127
128 /// Evaluate cost + gradient + (if capable) Hessian.
129 fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError>;
130
131 /// Evaluate the outer objective at the order requested by the active plan.
132 ///
133 /// The default preserves legacy behavior by delegating value-only requests
134 /// to [`OuterObjective::eval_cost`] and derivative requests to
135 /// [`OuterObjective::eval`].
136 fn eval_with_order(
137 &mut self,
138 rho: &Array1<f64>,
139 order: OuterEvalOrder,
140 ) -> Result<OuterEval, EstimationError> {
141 match order {
142 OuterEvalOrder::Value => {
143 let cost = self.eval_cost(rho)?;
144 Ok(OuterEval::value_only(cost, rho.len(), None))
145 }
146 OuterEvalOrder::ValueAndGradient | OuterEvalOrder::ValueGradientHessian => {
147 self.eval(rho)
148 }
149 }
150 }
151
152 /// Evaluate cost + EFS step vector. Only needed when the plan selects
153 /// `Solver::Efs`. The default returns an error indicating EFS is not
154 /// supported by this objective.
155 fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
156 Err(EstimationError::RemlOptimizationFailed(format!(
157 "EFS evaluation not implemented for this objective at rho_dim={}",
158 rho.len()
159 )))
160 }
161
162 /// Restore to a clean baseline for the next multi-start candidate.
163 fn reset(&mut self);
164
165 /// Seed the inner-solver iterate before the first eval, e.g. when the
166 /// outer-iterate cache restored a `(ρ, β)` pair from a prior run, or
167 /// when the continuation walk forwards `OuterEval::inner_beta_hint`
168 /// from the previous step.
169 ///
170 /// Objectives make an explicit choice via the [`SeedOutcome`] return:
171 /// implementations with an inner β slot return [`SeedOutcome::Installed`]
172 /// after storing β; implementations without one return
173 /// [`SeedOutcome::NoSlot`]. Genuine seeding failures (wrong dimension
174 /// when a slot exists, etc.) are reported via `Err(EstimationError)`.
175 ///
176 /// Callers that need to distinguish "no slot" from "installed" (the
177 /// outer cache warm-start path, which logs cache provenance) branch on
178 /// the variant. Callers that don't care (the continuation walk, which
179 /// only proceeds-cold when the hint is unusable) ignore it and only
180 /// propagate `Err`.
181 fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError>;
182
183 /// Whether the objective can benefit from continuation pre-warm before
184 /// the first solver eval at a candidate seed.
185 ///
186 /// Pre-warm is only correct for objectives with a real writable inner
187 /// state slot: it evaluates an oversmoothed rho path before the seed and
188 /// forwards non-empty `inner_beta_hint`s between steps. Generic synthetic
189 /// objectives and rho-only cache probes must start at the chosen seed
190 /// directly, otherwise the pre-warm becomes an observable extra eval and
191 /// can clobber seed-dispatch bookkeeping with empty beta seeds.
192 fn allow_continuation_prewarm(&self) -> bool {
193 false
194 }
195
196 /// Optional opt-in to the device-resident outer REML BFGS-over-ρ driver
197 /// (`crate::gpu::reml_outer::run_reml_outer_on_device`). Returns
198 /// `Some(adm)` when the objective is a REML evaluator whose
199 /// `(spec, n, p, num_rho)` admission predicate accepts the device path,
200 /// and `None` otherwise.
201 ///
202 /// The default returns `None` so non-REML objectives (line-search-only
203 /// inner bridges, screening proxies, the EFS / hybrid-EFS sub-objectives)
204 /// keep the host BFGS branch unconditionally — only the concrete
205 /// REML-state objectives override this to consult
206 /// [`crate::estimate::reml::outer_eval::outer_reml_device_admission`].
207 fn outer_device_admission(&self) -> Option<gam_gpu::policy::RemlOuterAdmission> {
208 None
209 }
210
211 /// Whether every joint fit of this objective must ENTER through the
212 /// [`crate::continuation_path::ContinuationPath`] (heavy-smoothing
213 /// entry) rather than being solved cold at the seed ρ*.
214 ///
215 /// The SAE-manifold joint objective overrides this to `true`: its joint
216 /// `(logits, t, β)` block has a combinatorial active-set component that a
217 /// cold solve can collapse, so it is entered at a heavy-smoothing regime
218 /// and annealed down. Crucially, this flips the seed cascade's structural
219 /// failure handling from REJECT to **DEMOTE-WITH-REASON**: a "cold"
220 /// structural defect (rank/alias/active-set diagnosis from the seed
221 /// pre-warm or the uniform-structural early-exit) is not a disqualification
222 /// but a signal to RE-ENTER the same seed at a *heavier* ContinuationPath
223 /// regime. The candidate set therefore never empties on a structural
224 /// diagnosis — every demotion is recorded with its reason and routed to a
225 /// heavier regime.
226 ///
227 /// The default `false` preserves the existing contract for every other
228 /// objective: pre-warm stays an optimization (never a feasibility gate),
229 /// and a uniform structural rejection still short-circuits the cascade.
230 fn requires_continuation_path_entry(&self) -> bool {
231 false
232 }
233
234 /// Run the objective's certified curvature-homotopy entry leg, if it has
235 /// one, leaving the inner state warm at the real (`η = 1`) objective.
236 ///
237 /// An objective with a *certified anchor* — a point known by construction to
238 /// be the global optimum of a relaxed problem — can replace the blind
239 /// multi-seed multistart with a single predictor-corrector walk from that
240 /// anchor to the true objective (#1007). The SAE-manifold objective
241 /// overrides this: its `η = 0` Eckart-Young linear relaxation is convex and
242 /// its optimum is certified by `linear_span_anchor`, so the walk in `η`
243 /// tracks the unique optimal branch to `η = 1`. The walk monitors the
244 /// arrow-factor min-pivot and halves the `η` step when it shrinks; a pivot
245 /// collapse below tolerance is a DETECTED bifurcation (recorded on the fit
246 /// payload, never silent), at which point the objective falls back to the
247 /// documented multi-seed cascade.
248 ///
249 /// Returns:
250 /// * `None` — no certified anchor; use the standard seed cascade
251 /// (the default for every other objective).
252 /// * `Some(Ok(true))` — the walk arrived; the inner state is warm at the
253 /// certified `η = 1` solution and the seed cascade is bypassed.
254 /// * `Some(Ok(false))` — the anchor degenerated or the walk detected a
255 /// bifurcation; fall back to the multi-seed cascade (the report is
256 /// recorded on the objective for the fit payload).
257 /// * `Some(Err(_))` — a hard failure constructing the anchor.
258 fn curvature_homotopy_entry(
259 &mut self,
260 rho: &Array1<f64>,
261 ) -> Option<Result<bool, EstimationError>> {
262 // Default: no certified anchor — but a non-finite seed is reported
263 // here rather than silently handed to the seed cascade, mirroring the
264 // hard-failure contract of the overriding implementations.
265 if let Some(idx) = rho.iter().position(|v| !v.is_finite()) {
266 return Some(Err(EstimationError::RemlOptimizationFailed(format!(
267 "curvature-homotopy entry received non-finite rho[{idx}]"
268 ))));
269 }
270 None
271 }
272
273 /// Let an objective declare that a seed is already a terminal outer result.
274 /// Used for objectives with a certified high-quality construction seed where
275 /// the generic rho optimizer can only degrade the fitted state.
276 fn accept_seed_without_outer_iterations(
277 &mut self,
278 rho: &Array1<f64>,
279 ) -> Result<Option<f64>, EstimationError> {
280 if rho.is_empty() {
281 return Ok(None);
282 }
283 Ok(None)
284 }
285
286 /// Re-install the selected outer result into the mutable objective before
287 /// callers consume objective-owned fitted state. Optimizers may evaluate
288 /// rejected trial points after the best point was found; without this final
289 /// synchronization, stateful objectives can report the last trial fit rather
290 /// than the returned `OuterResult::rho`.
291 fn finalize_outer_result(
292 &mut self,
293 rho: &Array1<f64>,
294 plan: &OuterPlan,
295 ) -> Result<(), EstimationError> {
296 log::debug!(
297 "[OUTER] finalize: re-installing best rho into the objective (solver {:?})",
298 plan.solver
299 );
300 match plan.solver {
301 Solver::Efs | Solver::HybridEfs => self.eval_efs(rho).map(|_| ()),
302 Solver::Bfgs => self
303 .eval_with_order(rho, OuterEvalOrder::ValueAndGradient)
304 .map(|_| ()),
305 Solver::Arc => self
306 .eval_with_order(rho, OuterEvalOrder::ValueGradientHessian)
307 .map(|_| ()),
308 }
309 }
310}
311
312// ─── Persistent warm-start checkpoint plumbing ────────────────────────
313//
314// `CheckpointingObjective` wraps any `OuterObjective` to write a copy of
315// `(rho, cost, eval_id)` to disk on each finite evaluation. The on-disk
316// [`gam_runtime::warm_start::Session`] rate-limits writes (≥2 s gap unless this iterate
317// strictly improves on the best-so-far) so a tight inner loop never thrashes
318// the filesystem. The same checkpoint is also broadcast to optional mirror
319// sessions, which lets interrupted exact-key runs seed later related fits via
320// their prefix key instead of waiting for a final converged write.
321
322#[derive(serde::Serialize, serde::Deserialize)]
323pub(crate) struct IteratePayload {
324 /// Bump on incompatible payload changes; decode rejects mismatches.
325 schema: u32,
326 pub(crate) rho: Vec<f64>,
327 /// Inner-solver iterate (PIRLS β) captured alongside ρ. The (ρ, β)
328 /// pair lives on the implicit-function manifold β = β*(ρ); restoring
329 /// ρ alone forces the next inner solve to reconstruct β from scratch.
330 /// For saturated ρ (|ρ_i| near `rho_bound`) the inner Hessian
331 /// `X'WX + Σ λ_i S_i` has condition number `≈ e^{2·rho_bound}` — Newton
332 /// degrades to O(1/k) descent and the cycle budget exhausts before
333 /// KKT. Caching β lets the resume start in Newton's quadratic basin
334 /// regardless of where ρ lives. Empty when the family did not surface
335 /// an inner-β hint at write time (still useful as a ρ-only seed).
336 #[serde(default)]
337 pub(crate) beta: Vec<f64>,
338 /// Converged exact outer curvature `H(θ̂)` (full θ×θ, row-major flatten),
339 /// captured alongside the (ρ, β) iterate. A gradient-based BFGS solve does
340 /// not surface its accumulated inverse-Hessian, so the next
341 /// structurally-matching fit (e.g. the next LOSO fold) otherwise restarts
342 /// BFGS from an unscaled identity metric and rediscovers curvature through
343 /// line-search bracketing — multiple full inner-solve value probes per
344 /// accepted outer step. Persisting the converged curvature lets the resume
345 /// seed `InitialMetric::DenseInverseHessian(H⁻¹)` for a quasi-Newton first
346 /// step. Empty when no exact outer Hessian was available at write time
347 /// (still a valid ρ/β seed). `hessian_dim²` must equal `hessian.len()`.
348 #[serde(default)]
349 pub(crate) hessian: Vec<f64>,
350 /// Side length of the square `hessian` matrix (`hessian.len() == dim²`).
351 /// Zero when no Hessian was persisted.
352 #[serde(default)]
353 pub(crate) hessian_dim: usize,
354 pub(crate) cost: f64,
355 eval_id: u64,
356}
357
358/// Entries with a different schema id are rejected by `decode_iterate`
359/// so incompatible on-disk payloads fall through to cold start instead
360/// of seeding the inner solve with a malformed iterate.
361pub(crate) const ITERATE_PAYLOAD_SCHEMA: u32 = 2;
362
363pub(crate) fn encode_iterate(
364 rho: &Array1<f64>,
365 beta: Option<&Array1<f64>>,
366 hessian: Option<&Array2<f64>>,
367 cost: f64,
368 eval_id: u64,
369) -> Option<Vec<u8>> {
370 // Persist the converged outer curvature only when it is square and finite;
371 // a non-finite or non-square Hessian is dropped (the resume falls back to a
372 // ρ/β-only seed) so a malformed curvature can never corrupt a warm start.
373 let (hessian_flat, hessian_dim) = match hessian {
374 Some(h) if h.nrows() == h.ncols() && h.iter().all(|v| v.is_finite()) => {
375 (h.iter().copied().collect::<Vec<f64>>(), h.nrows())
376 }
377 _ => (Vec::new(), 0),
378 };
379 let p = IteratePayload {
380 schema: ITERATE_PAYLOAD_SCHEMA,
381 rho: rho.to_vec(),
382 beta: beta.map(|b| b.to_vec()).unwrap_or_default(),
383 hessian: hessian_flat,
384 hessian_dim,
385 cost,
386 eval_id,
387 };
388 serde_json::to_vec(&p).ok()
389}
390
391pub(crate) fn decode_iterate(bytes: &[u8], expected_rho_dim: usize) -> Option<IteratePayload> {
392 let mut p: IteratePayload = serde_json::from_slice(bytes).ok()?;
393 if p.schema != ITERATE_PAYLOAD_SCHEMA {
394 return None;
395 }
396 if p.rho.len() != expected_rho_dim {
397 return None;
398 }
399 if !p.rho.iter().all(|x| x.is_finite()) || !p.cost.is_finite() {
400 return None;
401 }
402 if !p.beta.iter().all(|x| x.is_finite()) {
403 return None;
404 }
405 // A persisted Hessian must be square (`dim²` entries) and finite to be
406 // usable as a warm-start metric; an inconsistent or non-finite curvature is
407 // scrubbed to "no Hessian" rather than rejecting the whole iterate, so the
408 // ρ/β seed still warms the resume.
409 if p.hessian_dim.saturating_mul(p.hessian_dim) != p.hessian.len()
410 || !p.hessian.iter().all(|x| x.is_finite())
411 {
412 p.hessian = Vec::new();
413 p.hessian_dim = 0;
414 }
415 Some(p)
416}
417
418/// Outcome of inspecting a cache entry as a seed for the outer optimizer.
419///
420/// The classifier rejects only entries that fail structural validity
421/// (wrong dimension, non-finite payload). It does NOT reshape ρ based on
422/// saturation: every finite, well-shaped entry is honored as the next
423/// run's seed.
424///
425/// Previously this enum carried `saturated_coords` / `clamped_to` /
426/// "all-coords-saturated-poisoned-entry" branches that pulled boundary
427/// ρ inward or discarded fully-saturated entries. Those were read-side
428/// band-aids over the real bug: the warm-start contract stored ρ but
429/// not β, so resuming at boundary ρ forced PIRLS to recompute β from
430/// cold-start against a Hessian with condition number `≈ e^{2·rho_bound}`,
431/// and Newton degraded to O(1/k) descent that exhausted the cycle budget.
432///
433/// The contract is now `(ρ, β)`: the schema-2 iterate payload carries
434/// both, and [`CheckpointingObjective`] refuses to persist a divergent
435/// inner state (non-finite cost or β). Boundary ρ — when written under
436/// the new invariant — is a *legitimate* finding (the smoothness wants
437/// to be near-null), and the cached β puts the next inner solve at the
438/// previously converged iterate where the gradient is already at zero.
439/// No clamp or shape-based discard is needed.
440#[derive(Debug)]
441pub(crate) enum CacheSeedDecision {
442 ExactFinal {
443 rho: Array1<f64>,
444 /// Optional inner β captured at the converged ρ. Empty when the
445 /// payload didn't carry one (legacy ρ-only writes or families
446 /// that don't surface β).
447 beta: Vec<f64>,
448 final_value: f64,
449 iterations: usize,
450 prior_obj_display: f64,
451 },
452 Seed {
453 rho: Array1<f64>,
454 /// Optional inner β to prime the next run's inner solver via
455 /// [`OuterObjective::seed_inner_state`]. When non-empty, the
456 /// dispatcher injects β before the first eval so the inner
457 /// PIRLS opens at zero-gradient regardless of where ρ sits in
458 /// the box.
459 beta: Vec<f64>,
460 /// Optional converged outer Hessian `H(θ̂)` from the prior fit, as a
461 /// `(dim, row-major flatten)` pair. `None` when the payload carried no
462 /// curvature (legacy ρ/β-only writes). Seeds the BFGS iter-0 metric on
463 /// the resume so the first outer step is quasi-Newton.
464 hessian: Option<(usize, Vec<f64>)>,
465 prior_obj_display: f64,
466 iteration: u64,
467 },
468 Discard {
469 reason: &'static str,
470 prior_obj_display: f64,
471 all_rho_finite: Option<bool>,
472 },
473}
474
475pub(crate) fn classify_cache_entry_for_outer(
476 loaded: &gam_runtime::warm_start::LoadedEntry,
477 expected_rho_dim: usize,
478) -> CacheSeedDecision {
479 let entry = &loaded.entry;
480 let Some(payload) = decode_iterate(&entry.payload, expected_rho_dim) else {
481 return CacheSeedDecision::Discard {
482 reason: "payload-shape-mismatch",
483 prior_obj_display: entry.objective.unwrap_or(f64::NAN),
484 all_rho_finite: None,
485 };
486 };
487 let cached_rho = Array1::from_vec(payload.rho);
488 let prior_obj_display = entry.objective.unwrap_or(f64::NAN);
489 if matches!(entry.objective, Some(v) if !v.is_finite()) {
490 return CacheSeedDecision::Discard {
491 reason: "non-finite-payload",
492 prior_obj_display,
493 all_rho_finite: Some(cached_rho.iter().all(|v| v.is_finite())),
494 };
495 }
496 if !cached_rho.iter().all(|v| v.is_finite()) {
497 return CacheSeedDecision::Discard {
498 reason: "non-finite-payload",
499 prior_obj_display,
500 all_rho_finite: Some(false),
501 };
502 }
503 if loaded.source == LoadSource::Exact && entry.kind == gam_runtime::warm_start::EntryKind::Final
504 {
505 return CacheSeedDecision::ExactFinal {
506 rho: cached_rho,
507 beta: payload.beta,
508 final_value: entry.objective.unwrap_or(payload.cost),
509 iterations: entry
510 .iteration
511 .unwrap_or(payload.eval_id)
512 .min(usize::MAX as u64) as usize,
513 prior_obj_display,
514 };
515 }
516 let hessian = if payload.hessian_dim > 0
517 && payload.hessian.len() == payload.hessian_dim * payload.hessian_dim
518 {
519 Some((payload.hessian_dim, payload.hessian))
520 } else {
521 None
522 };
523 CacheSeedDecision::Seed {
524 rho: cached_rho,
525 beta: payload.beta,
526 hessian,
527 prior_obj_display,
528 iteration: entry.iteration.unwrap_or(payload.eval_id),
529 }
530}
531
532pub fn cache_entry_would_help_outer(
533 loaded: &gam_runtime::warm_start::LoadedEntry,
534 expected_rho_dim: usize,
535) -> bool {
536 matches!(
537 classify_cache_entry_for_outer(loaded, expected_rho_dim),
538 CacheSeedDecision::ExactFinal { .. } | CacheSeedDecision::Seed { .. }
539 )
540}
541
542pub(crate) struct CheckpointingObjective<'a> {
543 inner: &'a mut dyn OuterObjective,
544 session: Arc<CacheSession>,
545 mirror_sessions: Vec<Arc<CacheSession>>,
546 eval_counter: AtomicU64,
547 /// Most-recent inner β surfaced via [`OuterEval::inner_beta_hint`]. The
548 /// finalize path reads this so the `kind: Final` write encodes the
549 /// (ρ, β) pair that the BFGS optimum was actually fitted at — without
550 /// this the finalize would clobber per-eval checkpoint β state with a
551 /// ρ-only payload, reintroducing the cold-β resume failure.
552 last_inner_beta: std::sync::Mutex<Option<Array1<f64>>>,
553}
554
555impl<'a> CheckpointingObjective<'a> {
556 pub(crate) fn new(
557 inner: &'a mut dyn OuterObjective,
558 session: Arc<CacheSession>,
559 mirror_sessions: Vec<Arc<CacheSession>>,
560 ) -> Self {
561 Self {
562 inner,
563 session,
564 mirror_sessions,
565 eval_counter: AtomicU64::new(0),
566 last_inner_beta: std::sync::Mutex::new(None),
567 }
568 }
569
570 pub(crate) fn last_inner_beta(&self) -> Option<Array1<f64>> {
571 self.last_inner_beta.lock().ok().and_then(|g| g.clone())
572 }
573
574 fn note(&self, rho: &Array1<f64>, beta: Option<&Array1<f64>>, cost: f64) {
575 if !cost.is_finite() {
576 return;
577 }
578 // If β is provided, require it to be finite; non-finite β is a
579 // divergent inner state — persisting it would re-poison the cache.
580 if let Some(b) = beta {
581 if !b.iter().all(|v| v.is_finite()) {
582 return;
583 }
584 if let Ok(mut guard) = self.last_inner_beta.lock() {
585 *guard = Some(b.clone());
586 }
587 }
588 let i = self.eval_counter.fetch_add(1, Ordering::Relaxed);
589 // Per-eval checkpoints carry no converged outer Hessian (curvature is
590 // only meaningful at the final optimum); the finalize write is where the
591 // converged `H(θ̂)` is persisted for cross-fit warm starts.
592 if let Some(bytes) = encode_iterate(rho, beta, None, cost, i) {
593 self.session.checkpoint(&bytes, Some(cost), Some(i));
594 for mirror in &self.mirror_sessions {
595 mirror.checkpoint(&bytes, Some(cost), Some(i));
596 }
597 }
598 }
599}
600
601impl<'a> OuterObjective for CheckpointingObjective<'a> {
602 fn capability(&self) -> OuterCapability {
603 self.inner.capability()
604 }
605
606 fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
607 let v = self.inner.eval_cost(rho)?;
608 // `eval_cost` carries no inner-β handle — persist ρ-only.
609 self.note(rho, None, v);
610 Ok(v)
611 }
612
613 fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
614 // Screening proxies run at sub-converged β̂ and aren't a meaningful
615 // best-so-far signal; forward without persisting.
616 self.inner.eval_screening_proxy(rho)
617 }
618
619 fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
620 let r = self.inner.eval(rho)?;
621 self.note(rho, r.inner_beta_hint.as_ref(), r.cost);
622 Ok(r)
623 }
624
625 fn eval_with_order(
626 &mut self,
627 rho: &Array1<f64>,
628 order: OuterEvalOrder,
629 ) -> Result<OuterEval, EstimationError> {
630 let r = self.inner.eval_with_order(rho, order)?;
631 self.note(rho, r.inner_beta_hint.as_ref(), r.cost);
632 Ok(r)
633 }
634
635 fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
636 let r = self.inner.eval_efs(rho)?;
637 // EfsEval has no inner-β hint surface yet — persist ρ-only.
638 self.note(rho, None, r.cost);
639 Ok(r)
640 }
641
642 fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
643 // Forward to the wrapped objective, then prime our last-inner-beta
644 // cache so a subsequent finalize-write encodes the seeded β if no
645 // eval surfaces a fresher β first. Only prime on actual install —
646 // `NoSlot` means the inner solver will not see β, so the cache
647 // entry would be a lie.
648 let result = self.inner.seed_inner_state(beta);
649 if matches!(result, Ok(SeedOutcome::Installed))
650 && beta.iter().all(|v| v.is_finite())
651 && let Ok(mut guard) = self.last_inner_beta.lock()
652 {
653 *guard = Some(beta.clone());
654 }
655 result
656 }
657
658 fn allow_continuation_prewarm(&self) -> bool {
659 self.inner.allow_continuation_prewarm()
660 }
661
662 fn requires_continuation_path_entry(&self) -> bool {
663 self.inner.requires_continuation_path_entry()
664 }
665
666 fn reset(&mut self) {
667 self.inner.reset();
668 }
669}
670
671/// Closure-based adapter for [`OuterObjective`].
672///
673/// This allows any call site to construct an `OuterObjective` from closures
674/// without needing to define a wrapper struct or modify the state type.
675/// Each call site wraps its existing methods into closures and passes them here.
676pub struct ClosureObjective<
677 S,
678 Fc,
679 Fe,
680 Fr = fn(&mut S),
681 Fefs = fn(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
682 Feo = fn(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
683 Fsp = fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
684 Fseed = fn(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
685> {
686 pub state: S,
687 pub(crate) cap: OuterCapability,
688 pub(crate) cost_fn: Fc,
689 pub(crate) eval_fn: Fe,
690 /// Optional order-aware eval closure. When `None`, `eval_with_order()`
691 /// falls back to `eval()`.
692 pub(crate) eval_order_fn: Option<Feo>,
693 /// Optional reset closure. When `None`, `reset()` is a no-op.
694 pub(crate) reset_fn: Option<Fr>,
695 /// Optional EFS evaluation closure. When `None`, the default
696 /// `OuterObjective::eval_efs` returns an error.
697 pub(crate) efs_fn: Option<Fefs>,
698 /// Optional seed-screening ranking proxy closure. When `None`,
699 /// `eval_screening_proxy()` falls back to `eval_cost()` (the trait
700 /// default), preserving legacy behavior for non-REML objectives.
701 pub(crate) screening_proxy_fn: Option<Fsp>,
702 /// Optional inner-state seeding closure. Objectives with PIRLS / Newton
703 /// inner state install cached β here before the first outer eval.
704 pub(crate) seed_fn: Option<Fseed>,
705 /// Whether a seed hook should also opt the objective into generic
706 /// continuation pre-warm. High-dimensional REML keeps the seed hook for
707 /// cache/warm-start replay but declines the expensive rho-anneal pre-pass.
708 pub(crate) continuation_prewarm: bool,
709}
710
711impl<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed> OuterObjective
712 for ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed>
713where
714 Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
715 Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
716 Fr: FnMut(&mut S),
717 Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
718 Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
719 Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
720 Fseed: FnMut(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
721{
722 fn capability(&self) -> OuterCapability {
723 self.cap.clone()
724 }
725
726 fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
727 crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
728 (self.cost_fn)(&mut self.state, rho)
729 }
730
731 fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
732 crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
733 match self.screening_proxy_fn.as_mut() {
734 Some(f) => f(&mut self.state, rho),
735 None => (self.cost_fn)(&mut self.state, rho),
736 }
737 }
738
739 fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
740 crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
741 (self.eval_fn)(&mut self.state, rho)
742 }
743
744 fn eval_with_order(
745 &mut self,
746 rho: &Array1<f64>,
747 order: OuterEvalOrder,
748 ) -> Result<OuterEval, EstimationError> {
749 crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
750 match self.eval_order_fn.as_mut() {
751 Some(f) => f(&mut self.state, rho, order),
752 None => (self.eval_fn)(&mut self.state, rho),
753 }
754 }
755
756 fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
757 crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
758 match self.efs_fn.as_mut() {
759 Some(f) => f(&mut self.state, rho),
760 None => Err(EstimationError::RemlOptimizationFailed(
761 "EFS evaluation not implemented for this objective".to_string(),
762 )),
763 }
764 }
765
766 fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
767 // Empty β: by convention, "no warm-start available" — treat as a
768 // no-op install. Distinct from `NoSlot` because the objective may
769 // very well have a slot; the caller just didn't supply a β to fill
770 // it. Reporting `Installed` is correct: the slot's pre-existing
771 // state (cold default) is the post-seed state.
772 if beta.is_empty() {
773 return Ok(SeedOutcome::Installed);
774 }
775 match self.seed_fn.as_mut() {
776 Some(f) => f(&mut self.state, beta),
777 // No hook installed — the objective owns no inner-β slot.
778 // The caller decides whether this is a loud cache-provenance
779 // event or a silent continuation-walk degradation.
780 None => Ok(SeedOutcome::NoSlot),
781 }
782 }
783
784 fn allow_continuation_prewarm(&self) -> bool {
785 self.continuation_prewarm && self.seed_fn.is_some()
786 }
787
788 fn reset(&mut self) {
789 if let Some(f) = self.reset_fn.as_mut() {
790 f(&mut self.state);
791 }
792 }
793}
794
795impl<S, Fc, Fe, Fr, Fefs, Feo, Fsp> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp>
796where
797 Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
798 Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
799 Fr: FnMut(&mut S),
800 Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
801 Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
802 Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
803{
804 pub fn with_seed_inner_state<Fseed>(
805 self,
806 seed_fn: Fseed,
807 ) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed>
808 where
809 Fseed: FnMut(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
810 {
811 ClosureObjective {
812 state: self.state,
813 cap: self.cap,
814 cost_fn: self.cost_fn,
815 eval_fn: self.eval_fn,
816 eval_order_fn: self.eval_order_fn,
817 reset_fn: self.reset_fn,
818 efs_fn: self.efs_fn,
819 screening_proxy_fn: self.screening_proxy_fn,
820 seed_fn: Some(seed_fn),
821 continuation_prewarm: self.continuation_prewarm,
822 }
823 }
824}
825
826pub(crate) fn into_objective_error(context: &str, err: EstimationError) -> ObjectiveEvalError {
827 ObjectiveEvalError::recoverable(format!("{context}: {err}"))
828}
829
830pub(crate) fn finite_cost_or_error(context: &str, cost: f64) -> Result<f64, ObjectiveEvalError> {
831 if cost.is_finite() {
832 Ok(cost)
833 } else {
834 Err(ObjectiveEvalError::recoverable(format!(
835 "{context}: objective returned a non-finite cost"
836 )))
837 }
838}
839
840/// Shared first-order validation: gradient length, finite cost, finite gradient.
841///
842/// Extracted so the cost+gradient checks live in exactly one place — both the
843/// full (`finite_outer_eval_or_error`) and first-order
844/// (`finite_outer_first_order_eval_or_error`) validators delegate here, keeping
845/// their error messages and check order bit-for-bit identical.
846fn validate_outer_first_order(
847 context: &str,
848 layout: OuterThetaLayout,
849 eval: &OuterEval,
850) -> Result<(), ObjectiveEvalError> {
851 layout.validate_gradient_len(&eval.gradient, context)?;
852 if !eval.cost.is_finite() {
853 return Err(ObjectiveEvalError::recoverable(format!(
854 "{context}: objective returned a non-finite cost"
855 )));
856 }
857 if !eval.gradient.iter().all(|v| v.is_finite()) {
858 return Err(ObjectiveEvalError::recoverable(format!(
859 "{context}: objective returned a non-finite gradient"
860 )));
861 }
862 Ok(())
863}
864
865pub(crate) fn finite_outer_eval_or_error(
866 context: &str,
867 layout: OuterThetaLayout,
868 eval: OuterEval,
869) -> Result<OuterEval, ObjectiveEvalError> {
870 validate_outer_first_order(context, layout, &eval)?;
871 match &eval.hessian {
872 HessianResult::Analytic(hessian) => {
873 layout.validate_hessian_shape(hessian, context)?;
874 if !hessian.iter().all(|v| v.is_finite()) {
875 return Err(ObjectiveEvalError::recoverable(format!(
876 "{context}: objective returned a non-finite Hessian"
877 )));
878 }
879 }
880 HessianResult::Operator(op) => {
881 if op.dim() != layout.n_params {
882 return Err(ObjectiveEvalError::recoverable(format!(
883 "{context}: outer Hessian operator dimension mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
884 op.dim(),
885 layout.n_params,
886 layout.rho_dim(),
887 layout.psi_dim
888 )));
889 }
890 }
891 HessianResult::Unavailable => {}
892 }
893 Ok(eval)
894}
895
896pub(crate) fn finite_outer_first_order_eval_or_error(
897 context: &str,
898 layout: OuterThetaLayout,
899 eval: OuterEval,
900) -> Result<OuterEval, ObjectiveEvalError> {
901 validate_outer_first_order(context, layout, &eval)?;
902 Ok(eval)
903}
904
905pub(crate) fn validate_second_order_seed_hessian(
906 context: &str,
907 layout: OuterThetaLayout,
908 eval: &OuterEval,
909) -> Result<(), ObjectiveEvalError> {
910 if layout.n_params > SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS || !eval.hessian.is_analytic() {
911 return Ok(());
912 }
913 if matches!(
914 &eval.hessian,
915 HessianResult::Operator(op) if !op.materialization_capability().is_available()
916 ) {
917 return Ok(());
918 }
919
920 let Some(hessian) = eval.hessian.materialize_dense().map_err(|message| {
921 ObjectiveEvalError::recoverable(format!(
922 "{context}: analytic outer Hessian materialization failed during second-order seed validation: {message}"
923 ))
924 })?
925 else {
926 return Ok(());
927 };
928
929 layout.validate_hessian_shape(&hessian, context)?;
930 if !hessian.iter().all(|value| value.is_finite()) {
931 return Err(ObjectiveEvalError::recoverable(format!(
932 "{context}: analytic outer Hessian probe encountered non-finite entries"
933 )));
934 }
935
936 Ok(())
937}
938
939// ─── Permutation-invariant outer coordinate canonicalization ──────────
940//
941// The additive-term-order (#1539) and tensor-margin-order (#1538) invariance
942// bugs share one root cause: the outer smoothing-parameter optimizer resolves
943// a flat double-penalty REML valley differently depending on the ORDER the
944// penalty blocks are presented (seed placement, multistart, and tie-breaking
945// all operate in native penalty-index order). The design and penalty are
946// symmetric up to a block permutation, so the cure is permutation-invariance
947// by construction: present the optimizer an identical CANONICAL coordinate
948// layout regardless of native order, then map the optimized ρ back.
949//
950// The canonical order is a stable sort of the native coordinates by their
951// structural key (see `PenaltyCoordinate::canonical_structural_key`), which is
952// derived purely from each penalty's rotation-/placement-invariant content —
953// never from its native position. Two formula orders therefore yield the SAME
954// canonical layout, so the optimizer's seeding/multistart/tie-break all run on
955// byte-identical coordinates and select identical λ̂.
956
957/// Canonical→native index map: `perm[c]` is the native coordinate placed at
958/// canonical position `c`.
959///
960/// Returns `None` when the keys are already in canonical order (the permutation
961/// is the identity), so the legacy native-order path runs untouched.
962pub(crate) fn canonical_permutation(keys: &[u64]) -> Option<Vec<usize>> {
963 let n = keys.len();
964 if n <= 1 {
965 return None;
966 }
967 let mut perm: Vec<usize> = (0..n).collect();
968 // Stable sort by structural key. Ties (structurally interchangeable
969 // coordinates) keep their native relative order — harmless precisely
970 // because tied coordinates produce identical fits under any assignment.
971 perm.sort_by_key(|&i| keys[i]);
972 if perm.iter().enumerate().all(|(c, &i)| c == i) {
973 None
974 } else {
975 Some(perm)
976 }
977}
978
979/// Reorder a native-layout ρ vector into canonical order: `out[c] = native[perm[c]]`.
980fn permute_to_canonical(native: &Array1<f64>, perm: &[usize]) -> Array1<f64> {
981 Array1::from_iter(perm.iter().map(|&i| native[i]))
982}
983
984/// Reorder a canonical-layout ρ vector back into native order:
985/// `out[perm[c]] = canonical[c]`.
986fn permute_to_native(canonical: &Array1<f64>, perm: &[usize]) -> Array1<f64> {
987 let mut out = Array1::zeros(canonical.len());
988 for (c, &i) in perm.iter().enumerate() {
989 out[i] = canonical[c];
990 }
991 out
992}
993
994/// Map an `OuterResult` produced in CANONICAL coordinate order back to the
995/// objective's native layout, in place. Permutes every per-coordinate array
996/// (ρ, gradient, Hessian) consistently; scalar and diagnostic fields are
997/// untouched.
998pub(crate) fn outer_result_to_native(mut result: OuterResult, perm: &[usize]) -> OuterResult {
999 if result.rho.len() == perm.len() {
1000 result.rho = permute_to_native(&result.rho, perm);
1001 }
1002 if let Some(g) = result.final_gradient.as_ref()
1003 && g.len() == perm.len()
1004 {
1005 result.final_gradient = Some(permute_to_native(g, perm));
1006 }
1007 if let Some(h) = result.final_hessian.as_ref()
1008 && h.nrows() == perm.len()
1009 && h.ncols() == perm.len()
1010 {
1011 // H_native[perm[a], perm[b]] = H_canon[a, b].
1012 let mut hn = Array2::<f64>::zeros((perm.len(), perm.len()));
1013 for (a, &ia) in perm.iter().enumerate() {
1014 for (b, &ib) in perm.iter().enumerate() {
1015 hn[[ia, ib]] = h[[a, b]];
1016 }
1017 }
1018 result.final_hessian = Some(hn);
1019 }
1020 result
1021}
1022
1023/// Wraps any [`OuterObjective`] so the optimizer can work in a CANONICAL
1024/// coordinate order while the wrapped objective continues to receive ρ in its
1025/// NATIVE order. The optimizer hands canonical ρ to this wrapper; the wrapper
1026/// permutes canonical→native before forwarding to the inner objective, so the
1027/// inner objective (and any checkpointing/cache layer beneath it) sees native
1028/// ρ exactly as before. Capability shape (`n_params`, `psi_dim`, …) is
1029/// unchanged — only coordinate order differs.
1030pub(crate) struct CanonicalizedObjective<'a> {
1031 inner: &'a mut dyn OuterObjective,
1032 /// Canonical→native map: `perm[c]` is the native index at canonical slot `c`.
1033 perm: Vec<usize>,
1034}
1035
1036impl<'a> CanonicalizedObjective<'a> {
1037 pub(crate) fn new(inner: &'a mut dyn OuterObjective, perm: Vec<usize>) -> Self {
1038 Self { inner, perm }
1039 }
1040
1041 #[inline]
1042 fn to_native(&self, canonical: &Array1<f64>) -> Array1<f64> {
1043 if canonical.len() == self.perm.len() {
1044 permute_to_native(canonical, &self.perm)
1045 } else {
1046 // Defensive: a length the permutation does not cover is forwarded
1047 // verbatim rather than corrupted (should not occur for ρ-coords).
1048 canonical.clone()
1049 }
1050 }
1051
1052 /// Map a native-order eval (gradient/Hessian) back into canonical order so
1053 /// the optimizer sees a self-consistent canonical objective.
1054 fn eval_to_canonical(&self, mut eval: OuterEval) -> OuterEval {
1055 if eval.gradient.len() == self.perm.len() {
1056 eval.gradient = permute_to_canonical(&eval.gradient, &self.perm);
1057 }
1058 eval.hessian = match eval.hessian {
1059 HessianResult::Analytic(h)
1060 if h.nrows() == self.perm.len() && h.ncols() == self.perm.len() =>
1061 {
1062 let mut hc = Array2::<f64>::zeros((self.perm.len(), self.perm.len()));
1063 for (a, &ia) in self.perm.iter().enumerate() {
1064 for (b, &ib) in self.perm.iter().enumerate() {
1065 hc[[a, b]] = h[[ia, ib]];
1066 }
1067 }
1068 HessianResult::Analytic(hc)
1069 }
1070 other => other,
1071 };
1072 // `inner_beta_hint` is in the coefficient basis (not ρ-coordinate
1073 // order), so it is forwarded unchanged.
1074 eval
1075 }
1076}
1077
1078impl<'a> OuterObjective for CanonicalizedObjective<'a> {
1079 fn capability(&self) -> OuterCapability {
1080 self.inner.capability()
1081 }
1082
1083 fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
1084 let native = self.to_native(rho);
1085 self.inner.eval_cost(&native)
1086 }
1087
1088 fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
1089 let native = self.to_native(rho);
1090 self.inner.eval_screening_proxy(&native)
1091 }
1092
1093 fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
1094 let native = self.to_native(rho);
1095 let eval = self.inner.eval(&native)?;
1096 Ok(self.eval_to_canonical(eval))
1097 }
1098
1099 fn eval_with_order(
1100 &mut self,
1101 rho: &Array1<f64>,
1102 order: OuterEvalOrder,
1103 ) -> Result<OuterEval, EstimationError> {
1104 let native = self.to_native(rho);
1105 let eval = self.inner.eval_with_order(&native, order)?;
1106 Ok(self.eval_to_canonical(eval))
1107 }
1108
1109 fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
1110 let native = self.to_native(rho);
1111 let mut efs = self.inner.eval_efs(&native)?;
1112 // `steps` has one entry per θ-coordinate (length = n_rho + n_ext). The
1113 // canonical permutation covers only the leading ρ-coordinate block, so
1114 // map exactly those native→canonical; any trailing ψ/ext steps keep
1115 // their position (the canonicalized path is ρ-only, psi_dim == 0).
1116 let m = self.perm.len();
1117 if efs.steps.len() >= m {
1118 let leading = Array1::from_iter(efs.steps.iter().take(m).copied());
1119 let canon_leading = permute_to_canonical(&leading, &self.perm);
1120 for (c, v) in canon_leading.iter().enumerate() {
1121 efs.steps[c] = *v;
1122 }
1123 }
1124 Ok(efs)
1125 }
1126
1127 fn reset(&mut self) {
1128 self.inner.reset();
1129 }
1130
1131 fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
1132 // β is in the coefficient basis, not ρ-coordinate order — forward as-is.
1133 self.inner.seed_inner_state(beta)
1134 }
1135
1136 fn allow_continuation_prewarm(&self) -> bool {
1137 self.inner.allow_continuation_prewarm()
1138 }
1139
1140 fn requires_continuation_path_entry(&self) -> bool {
1141 self.inner.requires_continuation_path_entry()
1142 }
1143
1144 fn accept_seed_without_outer_iterations(
1145 &mut self,
1146 rho: &Array1<f64>,
1147 ) -> Result<Option<f64>, EstimationError> {
1148 let native = self.to_native(rho);
1149 self.inner.accept_seed_without_outer_iterations(&native)
1150 }
1151
1152 fn curvature_homotopy_entry(
1153 &mut self,
1154 rho: &Array1<f64>,
1155 ) -> Option<Result<bool, EstimationError>> {
1156 let native = self.to_native(rho);
1157 self.inner.curvature_homotopy_entry(&native)
1158 }
1159
1160 fn finalize_outer_result(
1161 &mut self,
1162 rho: &Array1<f64>,
1163 plan: &OuterPlan,
1164 ) -> Result<(), EstimationError> {
1165 let native = self.to_native(rho);
1166 self.inner.finalize_outer_result(&native, plan)
1167 }
1168
1169 fn outer_device_admission(&self) -> Option<gam_gpu::policy::RemlOuterAdmission> {
1170 // The device path optimizes in its own coordinate layout; canonicalized
1171 // problems route through the host BFGS/ARC path (where the permutation
1172 // is honored) rather than the device driver.
1173 None
1174 }
1175}