Skip to main content

gam_solve/rho_optimizer/
objective.rs

1use super::*;
2
3// Re-exported here while the shared EFS contract lives in `gam-problem`.
4pub use gam_problem::EfsEval;
5
6/// Outcome of [`OuterObjective::seed_inner_state`].
7///
8/// Distinguishes two non-error outcomes that callers handle differently:
9///
10/// - [`SeedOutcome::Installed`] — the objective owns an inner-β slot and the
11///   provided β has been stored there. The next `eval*` will warm-start from
12///   this β.
13/// - [`SeedOutcome::NoSlot`] — the objective has no inner-β slot at all. The
14///   provided β is silently discarded. This is the contract reply for
15///   objectives whose inner iterate is conceptually empty (e.g. line-search
16///   bridges, screening proxies, fixed-spec objectives).
17///
18/// Genuine seeding failures (wrong dimension when a slot exists, internal
19/// allocation faults, …) are reported via `Err(EstimationError)`.
20///
21/// The two non-error variants exist because the two real callers want
22/// opposite behavior on the no-slot path:
23///
24/// - The outer cache warm-start path (`OuterProblem::run`) reads a `(ρ, β)`
25///   pair from disk; if the objective has no β slot it must log loudly
26///   ("β-bearing checkpoint silently degraded to ρ-only resume") so cache
27///   provenance is auditable.
28/// - The continuation walk (`prime_outer_seed`) forwards `inner_beta_hint`
29///   from the previous step; if the objective has no β slot the walk
30///   simply proceeds cold — no log, no error.
31///
32/// Encoding the distinction in the return type lets each caller branch on
33/// the variant without inspecting error message strings (the previous
34/// brittle approach, see git history for `is_no_hook` in continuation.rs).
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum SeedOutcome {
37    /// The objective installed the provided β into its inner-β slot.
38    Installed,
39    /// The objective has no inner-β slot; the β was discarded.
40    NoSlot,
41    /// The objective owns an inner-β slot, but the provided β is
42    /// structurally incompatible with this fit's inner block layout
43    /// (its length does not match the per-block coefficient widths). The
44    /// β was discarded and the fit resumes ρ-only.
45    ///
46    /// This is the load-time reply for a *row-relaxed* cross-fit seed
47    /// (the `cache_seed_key` prefix channel): two folds of the same model
48    /// share an ρ-dim, so the cached ρ transfers, but the realized basis
49    /// rank — hence the inner β length — is row-population dependent and
50    /// legitimately differs across folds (the LOSO p=37-vs-p=85 case).
51    /// A length-mismatched seed β is therefore NOT an error: cross-length
52    /// β transfer is delegated to the gauge-projected `FitArtifact`
53    /// channel, which least-squares re-expresses the parent's raw β into
54    /// this fold's reduced subspace. Reporting `Incompatible` here keeps
55    /// the (correct) ρ seed and avoids a spurious full cold-start.
56    Incompatible,
57}
58
59/// Common interface for outer smoothing-parameter objectives.
60///
61/// Every model path that optimizes smoothing parameters implements this trait.
62/// The runner function consumes it and handles solver selection,
63/// multi-start, and logging while delegating derivative fallback policy to
64/// `opt`.
65///
66/// # Contract
67///
68/// - `capability()` must be stable (same result across calls).
69/// - `eval()` may return `HessianResult::Unavailable` at individual trial
70///   points even when `capability().hessian == Analytic`; `opt` degrades that
71///   step to first-order behavior instead of requiring the objective to fake a
72///   stale or non-finite Hessian.
73/// - Use `eval_cost()` / `OuterEval::infeasible()` for infeasible trial points.
74///   Return `Err(...)` for genuine evaluation breakdowns so the runner can mark
75///   the step as a recoverable solver failure and escalate to the next declared
76///   fallback plan if the full attempt still fails.
77/// - `eval_cost()` is used only for cost-based optimization paths.
78/// - `eval()` is the main evaluation path (cost + gradient + optional Hessian).
79/// - `eval_efs()` is used only by the EFS solver. It runs the inner solve,
80///   builds the `InnerSolution`, and computes the EFS step vector. The default
81///   implementation returns an error; only objectives that support EFS need
82///   to override it.
83/// - `reset()` restores state to a clean baseline (for multi-start).
84pub trait OuterObjective {
85    /// Declare what this objective can compute analytically.
86    fn capability(&self) -> OuterCapability;
87
88    /// Evaluate cost only for cost-based optimization paths.
89    fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError>;
90
91    /// Evaluate the seed-screening ranking proxy at this `rho`.
92    ///
93    /// Used exclusively by the `rank_seeds_with_screening` cascade. The
94    /// default delegates to [`OuterObjective::eval_cost`], which preserves
95    /// behavior for non-REML objectives.
96    ///
97    /// Concrete REML-state objectives override this to return the per-seed
98    /// minimum penalized deviance observed during the inner P-IRLS solve
99    /// (a monotonically descending quantity that remains a meaningful
100    /// quality signal even at a 3-iteration screening cap), instead of the
101    /// V_LAML criterion (which is dominated by a poorly-conditioned
102    /// `0.5·log|H|` term at partial-fit β̂ and ranks seeds little better
103    /// than random). The proxy fires *only* in screening mode; outside
104    /// screening it must return the regular V_LAML cost so the optimization
105    /// objective is unchanged.
106    ///
107    /// # Why the `eval_cost` default is correct for everyone else (#969)
108    ///
109    /// The partial-fit pathology is CAUSED by the screening cap: it is the
110    /// `0.5·log|H|` term evaluated at a β̂ whose inner solve was truncated
111    /// by `screening_max_inner_iterations`. An objective only suffers it if
112    /// it (a) consumes that cap atomic AND (b) ranks on a curvature-bearing
113    /// criterion at the truncated iterate — which is exactly the REML/LAML
114    /// state-objective family, all of which override this method (or are
115    /// built via `build_objective_with_screening_proxy`). Objectives that
116    /// never wire the cap pay the full inner solve during screening, so
117    /// their screened cost IS the true criterion — slower, but a correct
118    /// ranking by definition, and a proxy could only degrade it. Any future
119    /// objective that starts honoring the screening cap on a
120    /// curvature-bearing criterion must override this with its own
121    /// monotonically-descending inner quantity (the penalized-deviance
122    /// pattern above generalizes: rank on the best inner merit seen, never
123    /// on a curvature term at a truncated iterate).
124    fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
125        self.eval_cost(rho)
126    }
127
128    /// Evaluate cost + gradient + (if capable) Hessian.
129    fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError>;
130
131    /// Evaluate the outer objective at the order requested by the active plan.
132    ///
133    /// The default preserves legacy behavior by delegating value-only requests
134    /// to [`OuterObjective::eval_cost`] and derivative requests to
135    /// [`OuterObjective::eval`].
136    fn eval_with_order(
137        &mut self,
138        rho: &Array1<f64>,
139        order: OuterEvalOrder,
140    ) -> Result<OuterEval, EstimationError> {
141        match order {
142            OuterEvalOrder::Value => {
143                let cost = self.eval_cost(rho)?;
144                Ok(OuterEval::value_only(cost, rho.len(), None))
145            }
146            OuterEvalOrder::ValueAndGradient | OuterEvalOrder::ValueGradientHessian => {
147                self.eval(rho)
148            }
149        }
150    }
151
152    /// Evaluate cost + EFS step vector. Only needed when the plan selects
153    /// `Solver::Efs`. The default returns an error indicating EFS is not
154    /// supported by this objective.
155    fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
156        Err(EstimationError::RemlOptimizationFailed(format!(
157            "EFS evaluation not implemented for this objective at rho_dim={}",
158            rho.len()
159        )))
160    }
161
162    /// Restore to a clean baseline for the next multi-start candidate.
163    fn reset(&mut self);
164
165    /// Seed the inner-solver iterate before the first eval, e.g. when the
166    /// outer-iterate cache restored a `(ρ, β)` pair from a prior run, or
167    /// when the continuation walk forwards `OuterEval::inner_beta_hint`
168    /// from the previous step.
169    ///
170    /// Objectives make an explicit choice via the [`SeedOutcome`] return:
171    /// implementations with an inner β slot return [`SeedOutcome::Installed`]
172    /// after storing β; implementations without one return
173    /// [`SeedOutcome::NoSlot`]. Genuine seeding failures (wrong dimension
174    /// when a slot exists, etc.) are reported via `Err(EstimationError)`.
175    ///
176    /// Callers that need to distinguish "no slot" from "installed" (the
177    /// outer cache warm-start path, which logs cache provenance) branch on
178    /// the variant. Callers that don't care (the continuation walk, which
179    /// only proceeds-cold when the hint is unusable) ignore it and only
180    /// propagate `Err`.
181    fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError>;
182
183    /// Whether the objective can benefit from continuation pre-warm before
184    /// the first solver eval at a candidate seed.
185    ///
186    /// Pre-warm is only correct for objectives with a real writable inner
187    /// state slot: it evaluates an oversmoothed rho path before the seed and
188    /// forwards non-empty `inner_beta_hint`s between steps. Generic synthetic
189    /// objectives and rho-only cache probes must start at the chosen seed
190    /// directly, otherwise the pre-warm becomes an observable extra eval and
191    /// can clobber seed-dispatch bookkeeping with empty beta seeds.
192    fn allow_continuation_prewarm(&self) -> bool {
193        false
194    }
195
196    /// Optional opt-in to the device-resident outer REML BFGS-over-ρ driver
197    /// (`crate::gpu::reml_outer::run_reml_outer_on_device`). Returns
198    /// `Some(adm)` when the objective is a REML evaluator whose
199    /// `(spec, n, p, num_rho)` admission predicate accepts the device path,
200    /// and `None` otherwise.
201    ///
202    /// The default returns `None` so non-REML objectives (line-search-only
203    /// inner bridges, screening proxies, the EFS / hybrid-EFS sub-objectives)
204    /// keep the host BFGS branch unconditionally — only the concrete
205    /// REML-state objectives override this to consult
206    /// [`crate::estimate::reml::outer_eval::outer_reml_device_admission`].
207    fn outer_device_admission(&self) -> Option<gam_gpu::policy::RemlOuterAdmission> {
208        None
209    }
210
211    /// Whether every joint fit of this objective must ENTER through the
212    /// [`crate::continuation_path::ContinuationPath`] (heavy-smoothing
213    /// entry) rather than being solved cold at the seed ρ*.
214    ///
215    /// The SAE-manifold joint objective overrides this to `true`: its joint
216    /// `(logits, t, β)` block has a combinatorial active-set component that a
217    /// cold solve can collapse, so it is entered at a heavy-smoothing regime
218    /// and annealed down. Crucially, this flips the seed cascade's structural
219    /// failure handling from REJECT to **DEMOTE-WITH-REASON**: a "cold"
220    /// structural defect (rank/alias/active-set diagnosis from the seed
221    /// pre-warm or the uniform-structural early-exit) is not a disqualification
222    /// but a signal to RE-ENTER the same seed at a *heavier* ContinuationPath
223    /// regime. The candidate set therefore never empties on a structural
224    /// diagnosis — every demotion is recorded with its reason and routed to a
225    /// heavier regime.
226    ///
227    /// The default `false` preserves the existing contract for every other
228    /// objective: pre-warm stays an optimization (never a feasibility gate),
229    /// and a uniform structural rejection still short-circuits the cascade.
230    fn requires_continuation_path_entry(&self) -> bool {
231        false
232    }
233
234    /// Run the objective's certified curvature-homotopy entry leg, if it has
235    /// one, leaving the inner state warm at the real (`η = 1`) objective.
236    ///
237    /// An objective with a *certified anchor* — a point known by construction to
238    /// be the global optimum of a relaxed problem — can replace the blind
239    /// multi-seed multistart with a single predictor-corrector walk from that
240    /// anchor to the true objective (#1007). The SAE-manifold objective
241    /// overrides this: its `η = 0` Eckart-Young linear relaxation is convex and
242    /// its optimum is certified by `linear_span_anchor`, so the walk in `η`
243    /// tracks the unique optimal branch to `η = 1`. The walk monitors the
244    /// arrow-factor min-pivot and halves the `η` step when it shrinks; a pivot
245    /// collapse below tolerance is a DETECTED bifurcation (recorded on the fit
246    /// payload, never silent), at which point the objective falls back to the
247    /// documented multi-seed cascade.
248    ///
249    /// Returns:
250    ///   * `None` — no certified anchor; use the standard seed cascade
251    ///     (the default for every other objective).
252    ///   * `Some(Ok(true))` — the walk arrived; the inner state is warm at the
253    ///     certified `η = 1` solution and the seed cascade is bypassed.
254    ///   * `Some(Ok(false))` — the anchor degenerated or the walk detected a
255    ///     bifurcation; fall back to the multi-seed cascade (the report is
256    ///     recorded on the objective for the fit payload).
257    ///   * `Some(Err(_))` — a hard failure constructing the anchor.
258    fn curvature_homotopy_entry(
259        &mut self,
260        rho: &Array1<f64>,
261    ) -> Option<Result<bool, EstimationError>> {
262        // Default: no certified anchor — but a non-finite seed is reported
263        // here rather than silently handed to the seed cascade, mirroring the
264        // hard-failure contract of the overriding implementations.
265        if let Some(idx) = rho.iter().position(|v| !v.is_finite()) {
266            return Some(Err(EstimationError::RemlOptimizationFailed(format!(
267                "curvature-homotopy entry received non-finite rho[{idx}]"
268            ))));
269        }
270        None
271    }
272
273    /// Let an objective declare that a seed is already a terminal outer result.
274    /// Used for objectives with a certified high-quality construction seed where
275    /// the generic rho optimizer can only degrade the fitted state.
276    fn accept_seed_without_outer_iterations(
277        &mut self,
278        rho: &Array1<f64>,
279    ) -> Result<Option<f64>, EstimationError> {
280        if rho.is_empty() {
281            return Ok(None);
282        }
283        Ok(None)
284    }
285
286    /// Re-install the selected outer result into the mutable objective before
287    /// callers consume objective-owned fitted state. Optimizers may evaluate
288    /// rejected trial points after the best point was found; without this final
289    /// synchronization, stateful objectives can report the last trial fit rather
290    /// than the returned `OuterResult::rho`.
291    fn finalize_outer_result(
292        &mut self,
293        rho: &Array1<f64>,
294        plan: &OuterPlan,
295    ) -> Result<(), EstimationError> {
296        log::debug!(
297            "[OUTER] finalize: re-installing best rho into the objective (solver {:?})",
298            plan.solver
299        );
300        match plan.solver {
301            Solver::Efs | Solver::HybridEfs => self.eval_efs(rho).map(|_| ()),
302            Solver::Bfgs => self
303                .eval_with_order(rho, OuterEvalOrder::ValueAndGradient)
304                .map(|_| ()),
305            Solver::Arc => self
306                .eval_with_order(rho, OuterEvalOrder::ValueGradientHessian)
307                .map(|_| ()),
308        }
309    }
310}
311
312// ─── Persistent warm-start checkpoint plumbing ────────────────────────
313//
314// `CheckpointingObjective` wraps any `OuterObjective` to write a copy of
315// `(rho, cost, eval_id)` to disk on each finite evaluation. The on-disk
316// [`gam_runtime::warm_start::Session`] rate-limits writes (≥2 s gap unless this iterate
317// strictly improves on the best-so-far) so a tight inner loop never thrashes
318// the filesystem. The same checkpoint is also broadcast to optional mirror
319// sessions, which lets interrupted exact-key runs seed later related fits via
320// their prefix key instead of waiting for a final converged write.
321
322#[derive(serde::Serialize, serde::Deserialize)]
323pub(crate) struct IteratePayload {
324    /// Bump on incompatible payload changes; decode rejects mismatches.
325    schema: u32,
326    pub(crate) rho: Vec<f64>,
327    /// Inner-solver iterate (PIRLS β) captured alongside ρ. The (ρ, β)
328    /// pair lives on the implicit-function manifold β = β*(ρ); restoring
329    /// ρ alone forces the next inner solve to reconstruct β from scratch.
330    /// For saturated ρ (|ρ_i| near `rho_bound`) the inner Hessian
331    /// `X'WX + Σ λ_i S_i` has condition number `≈ e^{2·rho_bound}` — Newton
332    /// degrades to O(1/k) descent and the cycle budget exhausts before
333    /// KKT. Caching β lets the resume start in Newton's quadratic basin
334    /// regardless of where ρ lives. Empty when the family did not surface
335    /// an inner-β hint at write time (still useful as a ρ-only seed).
336    #[serde(default)]
337    pub(crate) beta: Vec<f64>,
338    /// Converged exact outer curvature `H(θ̂)` (full θ×θ, row-major flatten),
339    /// captured alongside the (ρ, β) iterate. A gradient-based BFGS solve does
340    /// not surface its accumulated inverse-Hessian, so the next
341    /// structurally-matching fit (e.g. the next LOSO fold) otherwise restarts
342    /// BFGS from an unscaled identity metric and rediscovers curvature through
343    /// line-search bracketing — multiple full inner-solve value probes per
344    /// accepted outer step. Persisting the converged curvature lets the resume
345    /// seed `InitialMetric::DenseInverseHessian(H⁻¹)` for a quasi-Newton first
346    /// step. Empty when no exact outer Hessian was available at write time
347    /// (still a valid ρ/β seed). `hessian_dim²` must equal `hessian.len()`.
348    #[serde(default)]
349    pub(crate) hessian: Vec<f64>,
350    /// Side length of the square `hessian` matrix (`hessian.len() == dim²`).
351    /// Zero when no Hessian was persisted.
352    #[serde(default)]
353    pub(crate) hessian_dim: usize,
354    pub(crate) cost: f64,
355    eval_id: u64,
356}
357
358/// Entries with a different schema id are rejected by `decode_iterate`
359/// so incompatible on-disk payloads fall through to cold start instead
360/// of seeding the inner solve with a malformed iterate.
361pub(crate) const ITERATE_PAYLOAD_SCHEMA: u32 = 2;
362
363pub(crate) fn encode_iterate(
364    rho: &Array1<f64>,
365    beta: Option<&Array1<f64>>,
366    hessian: Option<&Array2<f64>>,
367    cost: f64,
368    eval_id: u64,
369) -> Option<Vec<u8>> {
370    // Persist the converged outer curvature only when it is square and finite;
371    // a non-finite or non-square Hessian is dropped (the resume falls back to a
372    // ρ/β-only seed) so a malformed curvature can never corrupt a warm start.
373    let (hessian_flat, hessian_dim) = match hessian {
374        Some(h) if h.nrows() == h.ncols() && h.iter().all(|v| v.is_finite()) => {
375            (h.iter().copied().collect::<Vec<f64>>(), h.nrows())
376        }
377        _ => (Vec::new(), 0),
378    };
379    let p = IteratePayload {
380        schema: ITERATE_PAYLOAD_SCHEMA,
381        rho: rho.to_vec(),
382        beta: beta.map(|b| b.to_vec()).unwrap_or_default(),
383        hessian: hessian_flat,
384        hessian_dim,
385        cost,
386        eval_id,
387    };
388    serde_json::to_vec(&p).ok()
389}
390
391pub(crate) fn decode_iterate(bytes: &[u8], expected_rho_dim: usize) -> Option<IteratePayload> {
392    let mut p: IteratePayload = serde_json::from_slice(bytes).ok()?;
393    if p.schema != ITERATE_PAYLOAD_SCHEMA {
394        return None;
395    }
396    if p.rho.len() != expected_rho_dim {
397        return None;
398    }
399    if !p.rho.iter().all(|x| x.is_finite()) || !p.cost.is_finite() {
400        return None;
401    }
402    if !p.beta.iter().all(|x| x.is_finite()) {
403        return None;
404    }
405    // A persisted Hessian must be square (`dim²` entries) and finite to be
406    // usable as a warm-start metric; an inconsistent or non-finite curvature is
407    // scrubbed to "no Hessian" rather than rejecting the whole iterate, so the
408    // ρ/β seed still warms the resume.
409    if p.hessian_dim.saturating_mul(p.hessian_dim) != p.hessian.len()
410        || !p.hessian.iter().all(|x| x.is_finite())
411    {
412        p.hessian = Vec::new();
413        p.hessian_dim = 0;
414    }
415    Some(p)
416}
417
418/// Outcome of inspecting a cache entry as a seed for the outer optimizer.
419///
420/// The classifier rejects only entries that fail structural validity
421/// (wrong dimension, non-finite payload). It does NOT reshape ρ based on
422/// saturation: every finite, well-shaped entry is honored as the next
423/// run's seed.
424///
425/// Previously this enum carried `saturated_coords` / `clamped_to` /
426/// "all-coords-saturated-poisoned-entry" branches that pulled boundary
427/// ρ inward or discarded fully-saturated entries. Those were read-side
428/// band-aids over the real bug: the warm-start contract stored ρ but
429/// not β, so resuming at boundary ρ forced PIRLS to recompute β from
430/// cold-start against a Hessian with condition number `≈ e^{2·rho_bound}`,
431/// and Newton degraded to O(1/k) descent that exhausted the cycle budget.
432///
433/// The contract is now `(ρ, β)`: the schema-2 iterate payload carries
434/// both, and [`CheckpointingObjective`] refuses to persist a divergent
435/// inner state (non-finite cost or β). Boundary ρ — when written under
436/// the new invariant — is a *legitimate* finding (the smoothness wants
437/// to be near-null), and the cached β puts the next inner solve at the
438/// previously converged iterate where the gradient is already at zero.
439/// No clamp or shape-based discard is needed.
440#[derive(Debug)]
441pub(crate) enum CacheSeedDecision {
442    ExactFinal {
443        rho: Array1<f64>,
444        /// Optional inner β captured at the converged ρ. Empty when the
445        /// payload didn't carry one (legacy ρ-only writes or families
446        /// that don't surface β).
447        beta: Vec<f64>,
448        final_value: f64,
449        iterations: usize,
450        prior_obj_display: f64,
451    },
452    Seed {
453        rho: Array1<f64>,
454        /// Optional inner β to prime the next run's inner solver via
455        /// [`OuterObjective::seed_inner_state`]. When non-empty, the
456        /// dispatcher injects β before the first eval so the inner
457        /// PIRLS opens at zero-gradient regardless of where ρ sits in
458        /// the box.
459        beta: Vec<f64>,
460        /// Optional converged outer Hessian `H(θ̂)` from the prior fit, as a
461        /// `(dim, row-major flatten)` pair. `None` when the payload carried no
462        /// curvature (legacy ρ/β-only writes). Seeds the BFGS iter-0 metric on
463        /// the resume so the first outer step is quasi-Newton.
464        hessian: Option<(usize, Vec<f64>)>,
465        prior_obj_display: f64,
466        iteration: u64,
467    },
468    Discard {
469        reason: &'static str,
470        prior_obj_display: f64,
471        all_rho_finite: Option<bool>,
472    },
473}
474
475pub(crate) fn classify_cache_entry_for_outer(
476    loaded: &gam_runtime::warm_start::LoadedEntry,
477    expected_rho_dim: usize,
478) -> CacheSeedDecision {
479    let entry = &loaded.entry;
480    let Some(payload) = decode_iterate(&entry.payload, expected_rho_dim) else {
481        return CacheSeedDecision::Discard {
482            reason: "payload-shape-mismatch",
483            prior_obj_display: entry.objective.unwrap_or(f64::NAN),
484            all_rho_finite: None,
485        };
486    };
487    let cached_rho = Array1::from_vec(payload.rho);
488    let prior_obj_display = entry.objective.unwrap_or(f64::NAN);
489    if matches!(entry.objective, Some(v) if !v.is_finite()) {
490        return CacheSeedDecision::Discard {
491            reason: "non-finite-payload",
492            prior_obj_display,
493            all_rho_finite: Some(cached_rho.iter().all(|v| v.is_finite())),
494        };
495    }
496    if !cached_rho.iter().all(|v| v.is_finite()) {
497        return CacheSeedDecision::Discard {
498            reason: "non-finite-payload",
499            prior_obj_display,
500            all_rho_finite: Some(false),
501        };
502    }
503    if loaded.source == LoadSource::Exact && entry.kind == gam_runtime::warm_start::EntryKind::Final
504    {
505        return CacheSeedDecision::ExactFinal {
506            rho: cached_rho,
507            beta: payload.beta,
508            final_value: entry.objective.unwrap_or(payload.cost),
509            iterations: entry
510                .iteration
511                .unwrap_or(payload.eval_id)
512                .min(usize::MAX as u64) as usize,
513            prior_obj_display,
514        };
515    }
516    let hessian = if payload.hessian_dim > 0
517        && payload.hessian.len() == payload.hessian_dim * payload.hessian_dim
518    {
519        Some((payload.hessian_dim, payload.hessian))
520    } else {
521        None
522    };
523    CacheSeedDecision::Seed {
524        rho: cached_rho,
525        beta: payload.beta,
526        hessian,
527        prior_obj_display,
528        iteration: entry.iteration.unwrap_or(payload.eval_id),
529    }
530}
531
532pub fn cache_entry_would_help_outer(
533    loaded: &gam_runtime::warm_start::LoadedEntry,
534    expected_rho_dim: usize,
535) -> bool {
536    matches!(
537        classify_cache_entry_for_outer(loaded, expected_rho_dim),
538        CacheSeedDecision::ExactFinal { .. } | CacheSeedDecision::Seed { .. }
539    )
540}
541
542pub(crate) struct CheckpointingObjective<'a> {
543    inner: &'a mut dyn OuterObjective,
544    session: Arc<CacheSession>,
545    mirror_sessions: Vec<Arc<CacheSession>>,
546    eval_counter: AtomicU64,
547    /// Most-recent inner β surfaced via [`OuterEval::inner_beta_hint`]. The
548    /// finalize path reads this so the `kind: Final` write encodes the
549    /// (ρ, β) pair that the BFGS optimum was actually fitted at — without
550    /// this the finalize would clobber per-eval checkpoint β state with a
551    /// ρ-only payload, reintroducing the cold-β resume failure.
552    last_inner_beta: std::sync::Mutex<Option<Array1<f64>>>,
553}
554
555impl<'a> CheckpointingObjective<'a> {
556    pub(crate) fn new(
557        inner: &'a mut dyn OuterObjective,
558        session: Arc<CacheSession>,
559        mirror_sessions: Vec<Arc<CacheSession>>,
560    ) -> Self {
561        Self {
562            inner,
563            session,
564            mirror_sessions,
565            eval_counter: AtomicU64::new(0),
566            last_inner_beta: std::sync::Mutex::new(None),
567        }
568    }
569
570    pub(crate) fn last_inner_beta(&self) -> Option<Array1<f64>> {
571        self.last_inner_beta.lock().ok().and_then(|g| g.clone())
572    }
573
574    fn note(&self, rho: &Array1<f64>, beta: Option<&Array1<f64>>, cost: f64) {
575        if !cost.is_finite() {
576            return;
577        }
578        // If β is provided, require it to be finite; non-finite β is a
579        // divergent inner state — persisting it would re-poison the cache.
580        if let Some(b) = beta {
581            if !b.iter().all(|v| v.is_finite()) {
582                return;
583            }
584            if let Ok(mut guard) = self.last_inner_beta.lock() {
585                *guard = Some(b.clone());
586            }
587        }
588        let i = self.eval_counter.fetch_add(1, Ordering::Relaxed);
589        // Per-eval checkpoints carry no converged outer Hessian (curvature is
590        // only meaningful at the final optimum); the finalize write is where the
591        // converged `H(θ̂)` is persisted for cross-fit warm starts.
592        if let Some(bytes) = encode_iterate(rho, beta, None, cost, i) {
593            self.session.checkpoint(&bytes, Some(cost), Some(i));
594            for mirror in &self.mirror_sessions {
595                mirror.checkpoint(&bytes, Some(cost), Some(i));
596            }
597        }
598    }
599}
600
601impl<'a> OuterObjective for CheckpointingObjective<'a> {
602    fn capability(&self) -> OuterCapability {
603        self.inner.capability()
604    }
605
606    fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
607        let v = self.inner.eval_cost(rho)?;
608        // `eval_cost` carries no inner-β handle — persist ρ-only.
609        self.note(rho, None, v);
610        Ok(v)
611    }
612
613    fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
614        // Screening proxies run at sub-converged β̂ and aren't a meaningful
615        // best-so-far signal; forward without persisting.
616        self.inner.eval_screening_proxy(rho)
617    }
618
619    fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
620        let r = self.inner.eval(rho)?;
621        self.note(rho, r.inner_beta_hint.as_ref(), r.cost);
622        Ok(r)
623    }
624
625    fn eval_with_order(
626        &mut self,
627        rho: &Array1<f64>,
628        order: OuterEvalOrder,
629    ) -> Result<OuterEval, EstimationError> {
630        let r = self.inner.eval_with_order(rho, order)?;
631        self.note(rho, r.inner_beta_hint.as_ref(), r.cost);
632        Ok(r)
633    }
634
635    fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
636        let r = self.inner.eval_efs(rho)?;
637        // EfsEval has no inner-β hint surface yet — persist ρ-only.
638        self.note(rho, None, r.cost);
639        Ok(r)
640    }
641
642    fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
643        // Forward to the wrapped objective, then prime our last-inner-beta
644        // cache so a subsequent finalize-write encodes the seeded β if no
645        // eval surfaces a fresher β first. Only prime on actual install —
646        // `NoSlot` means the inner solver will not see β, so the cache
647        // entry would be a lie.
648        let result = self.inner.seed_inner_state(beta);
649        if matches!(result, Ok(SeedOutcome::Installed))
650            && beta.iter().all(|v| v.is_finite())
651            && let Ok(mut guard) = self.last_inner_beta.lock()
652        {
653            *guard = Some(beta.clone());
654        }
655        result
656    }
657
658    fn allow_continuation_prewarm(&self) -> bool {
659        self.inner.allow_continuation_prewarm()
660    }
661
662    fn requires_continuation_path_entry(&self) -> bool {
663        self.inner.requires_continuation_path_entry()
664    }
665
666    fn reset(&mut self) {
667        self.inner.reset();
668    }
669}
670
671/// Closure-based adapter for [`OuterObjective`].
672///
673/// This allows any call site to construct an `OuterObjective` from closures
674/// without needing to define a wrapper struct or modify the state type.
675/// Each call site wraps its existing methods into closures and passes them here.
676pub struct ClosureObjective<
677    S,
678    Fc,
679    Fe,
680    Fr = fn(&mut S),
681    Fefs = fn(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
682    Feo = fn(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
683    Fsp = fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
684    Fseed = fn(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
685> {
686    pub state: S,
687    pub(crate) cap: OuterCapability,
688    pub(crate) cost_fn: Fc,
689    pub(crate) eval_fn: Fe,
690    /// Optional order-aware eval closure. When `None`, `eval_with_order()`
691    /// falls back to `eval()`.
692    pub(crate) eval_order_fn: Option<Feo>,
693    /// Optional reset closure. When `None`, `reset()` is a no-op.
694    pub(crate) reset_fn: Option<Fr>,
695    /// Optional EFS evaluation closure. When `None`, the default
696    /// `OuterObjective::eval_efs` returns an error.
697    pub(crate) efs_fn: Option<Fefs>,
698    /// Optional seed-screening ranking proxy closure. When `None`,
699    /// `eval_screening_proxy()` falls back to `eval_cost()` (the trait
700    /// default), preserving legacy behavior for non-REML objectives.
701    pub(crate) screening_proxy_fn: Option<Fsp>,
702    /// Optional inner-state seeding closure. Objectives with PIRLS / Newton
703    /// inner state install cached β here before the first outer eval.
704    pub(crate) seed_fn: Option<Fseed>,
705    /// Whether a seed hook should also opt the objective into generic
706    /// continuation pre-warm. High-dimensional REML keeps the seed hook for
707    /// cache/warm-start replay but declines the expensive rho-anneal pre-pass.
708    pub(crate) continuation_prewarm: bool,
709}
710
711impl<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed> OuterObjective
712    for ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed>
713where
714    Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
715    Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
716    Fr: FnMut(&mut S),
717    Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
718    Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
719    Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
720    Fseed: FnMut(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
721{
722    fn capability(&self) -> OuterCapability {
723        self.cap.clone()
724    }
725
726    fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
727        crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
728        (self.cost_fn)(&mut self.state, rho)
729    }
730
731    fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
732        crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
733        match self.screening_proxy_fn.as_mut() {
734            Some(f) => f(&mut self.state, rho),
735            None => (self.cost_fn)(&mut self.state, rho),
736        }
737    }
738
739    fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
740        crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
741        (self.eval_fn)(&mut self.state, rho)
742    }
743
744    fn eval_with_order(
745        &mut self,
746        rho: &Array1<f64>,
747        order: OuterEvalOrder,
748    ) -> Result<OuterEval, EstimationError> {
749        crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
750        match self.eval_order_fn.as_mut() {
751            Some(f) => f(&mut self.state, rho, order),
752            None => (self.eval_fn)(&mut self.state, rho),
753        }
754    }
755
756    fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
757        crate::estimate::reml::outer_eval::record_current_outer_theta_for_ift(rho);
758        match self.efs_fn.as_mut() {
759            Some(f) => f(&mut self.state, rho),
760            None => Err(EstimationError::RemlOptimizationFailed(
761                "EFS evaluation not implemented for this objective".to_string(),
762            )),
763        }
764    }
765
766    fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
767        // Empty β: by convention, "no warm-start available" — treat as a
768        // no-op install. Distinct from `NoSlot` because the objective may
769        // very well have a slot; the caller just didn't supply a β to fill
770        // it. Reporting `Installed` is correct: the slot's pre-existing
771        // state (cold default) is the post-seed state.
772        if beta.is_empty() {
773            return Ok(SeedOutcome::Installed);
774        }
775        match self.seed_fn.as_mut() {
776            Some(f) => f(&mut self.state, beta),
777            // No hook installed — the objective owns no inner-β slot.
778            // The caller decides whether this is a loud cache-provenance
779            // event or a silent continuation-walk degradation.
780            None => Ok(SeedOutcome::NoSlot),
781        }
782    }
783
784    fn allow_continuation_prewarm(&self) -> bool {
785        self.continuation_prewarm && self.seed_fn.is_some()
786    }
787
788    fn reset(&mut self) {
789        if let Some(f) = self.reset_fn.as_mut() {
790            f(&mut self.state);
791        }
792    }
793}
794
795impl<S, Fc, Fe, Fr, Fefs, Feo, Fsp> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp>
796where
797    Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
798    Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
799    Fr: FnMut(&mut S),
800    Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
801    Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
802    Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
803{
804    pub fn with_seed_inner_state<Fseed>(
805        self,
806        seed_fn: Fseed,
807    ) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp, Fseed>
808    where
809        Fseed: FnMut(&mut S, &Array1<f64>) -> Result<SeedOutcome, EstimationError>,
810    {
811        ClosureObjective {
812            state: self.state,
813            cap: self.cap,
814            cost_fn: self.cost_fn,
815            eval_fn: self.eval_fn,
816            eval_order_fn: self.eval_order_fn,
817            reset_fn: self.reset_fn,
818            efs_fn: self.efs_fn,
819            screening_proxy_fn: self.screening_proxy_fn,
820            seed_fn: Some(seed_fn),
821            continuation_prewarm: self.continuation_prewarm,
822        }
823    }
824}
825
826pub(crate) fn into_objective_error(context: &str, err: EstimationError) -> ObjectiveEvalError {
827    ObjectiveEvalError::recoverable(format!("{context}: {err}"))
828}
829
830pub(crate) fn finite_cost_or_error(context: &str, cost: f64) -> Result<f64, ObjectiveEvalError> {
831    if cost.is_finite() {
832        Ok(cost)
833    } else {
834        Err(ObjectiveEvalError::recoverable(format!(
835            "{context}: objective returned a non-finite cost"
836        )))
837    }
838}
839
840/// Shared first-order validation: gradient length, finite cost, finite gradient.
841///
842/// Extracted so the cost+gradient checks live in exactly one place — both the
843/// full (`finite_outer_eval_or_error`) and first-order
844/// (`finite_outer_first_order_eval_or_error`) validators delegate here, keeping
845/// their error messages and check order bit-for-bit identical.
846fn validate_outer_first_order(
847    context: &str,
848    layout: OuterThetaLayout,
849    eval: &OuterEval,
850) -> Result<(), ObjectiveEvalError> {
851    layout.validate_gradient_len(&eval.gradient, context)?;
852    if !eval.cost.is_finite() {
853        return Err(ObjectiveEvalError::recoverable(format!(
854            "{context}: objective returned a non-finite cost"
855        )));
856    }
857    if !eval.gradient.iter().all(|v| v.is_finite()) {
858        return Err(ObjectiveEvalError::recoverable(format!(
859            "{context}: objective returned a non-finite gradient"
860        )));
861    }
862    Ok(())
863}
864
865pub(crate) fn finite_outer_eval_or_error(
866    context: &str,
867    layout: OuterThetaLayout,
868    eval: OuterEval,
869) -> Result<OuterEval, ObjectiveEvalError> {
870    validate_outer_first_order(context, layout, &eval)?;
871    match &eval.hessian {
872        HessianResult::Analytic(hessian) => {
873            layout.validate_hessian_shape(hessian, context)?;
874            if !hessian.iter().all(|v| v.is_finite()) {
875                return Err(ObjectiveEvalError::recoverable(format!(
876                    "{context}: objective returned a non-finite Hessian"
877                )));
878            }
879        }
880        HessianResult::Operator(op) => {
881            if op.dim() != layout.n_params {
882                return Err(ObjectiveEvalError::recoverable(format!(
883                    "{context}: outer Hessian operator dimension mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
884                    op.dim(),
885                    layout.n_params,
886                    layout.rho_dim(),
887                    layout.psi_dim
888                )));
889            }
890        }
891        HessianResult::Unavailable => {}
892    }
893    Ok(eval)
894}
895
896pub(crate) fn finite_outer_first_order_eval_or_error(
897    context: &str,
898    layout: OuterThetaLayout,
899    eval: OuterEval,
900) -> Result<OuterEval, ObjectiveEvalError> {
901    validate_outer_first_order(context, layout, &eval)?;
902    Ok(eval)
903}
904
905pub(crate) fn validate_second_order_seed_hessian(
906    context: &str,
907    layout: OuterThetaLayout,
908    eval: &OuterEval,
909) -> Result<(), ObjectiveEvalError> {
910    if layout.n_params > SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS || !eval.hessian.is_analytic() {
911        return Ok(());
912    }
913    if matches!(
914        &eval.hessian,
915        HessianResult::Operator(op) if !op.materialization_capability().is_available()
916    ) {
917        return Ok(());
918    }
919
920    let Some(hessian) = eval.hessian.materialize_dense().map_err(|message| {
921        ObjectiveEvalError::recoverable(format!(
922            "{context}: analytic outer Hessian materialization failed during second-order seed validation: {message}"
923        ))
924    })?
925    else {
926        return Ok(());
927    };
928
929    layout.validate_hessian_shape(&hessian, context)?;
930    if !hessian.iter().all(|value| value.is_finite()) {
931        return Err(ObjectiveEvalError::recoverable(format!(
932            "{context}: analytic outer Hessian probe encountered non-finite entries"
933        )));
934    }
935
936    Ok(())
937}
938
939// ─── Permutation-invariant outer coordinate canonicalization ──────────
940//
941// The additive-term-order (#1539) and tensor-margin-order (#1538) invariance
942// bugs share one root cause: the outer smoothing-parameter optimizer resolves
943// a flat double-penalty REML valley differently depending on the ORDER the
944// penalty blocks are presented (seed placement, multistart, and tie-breaking
945// all operate in native penalty-index order). The design and penalty are
946// symmetric up to a block permutation, so the cure is permutation-invariance
947// by construction: present the optimizer an identical CANONICAL coordinate
948// layout regardless of native order, then map the optimized ρ back.
949//
950// The canonical order is a stable sort of the native coordinates by their
951// structural key (see `PenaltyCoordinate::canonical_structural_key`), which is
952// derived purely from each penalty's rotation-/placement-invariant content —
953// never from its native position. Two formula orders therefore yield the SAME
954// canonical layout, so the optimizer's seeding/multistart/tie-break all run on
955// byte-identical coordinates and select identical λ̂.
956
957/// Canonical→native index map: `perm[c]` is the native coordinate placed at
958/// canonical position `c`.
959///
960/// Returns `None` when the keys are already in canonical order (the permutation
961/// is the identity), so the legacy native-order path runs untouched.
962pub(crate) fn canonical_permutation(keys: &[u64]) -> Option<Vec<usize>> {
963    let n = keys.len();
964    if n <= 1 {
965        return None;
966    }
967    let mut perm: Vec<usize> = (0..n).collect();
968    // Stable sort by structural key. Ties (structurally interchangeable
969    // coordinates) keep their native relative order — harmless precisely
970    // because tied coordinates produce identical fits under any assignment.
971    perm.sort_by_key(|&i| keys[i]);
972    if perm.iter().enumerate().all(|(c, &i)| c == i) {
973        None
974    } else {
975        Some(perm)
976    }
977}
978
979/// Reorder a native-layout ρ vector into canonical order: `out[c] = native[perm[c]]`.
980fn permute_to_canonical(native: &Array1<f64>, perm: &[usize]) -> Array1<f64> {
981    Array1::from_iter(perm.iter().map(|&i| native[i]))
982}
983
984/// Reorder a canonical-layout ρ vector back into native order:
985/// `out[perm[c]] = canonical[c]`.
986fn permute_to_native(canonical: &Array1<f64>, perm: &[usize]) -> Array1<f64> {
987    let mut out = Array1::zeros(canonical.len());
988    for (c, &i) in perm.iter().enumerate() {
989        out[i] = canonical[c];
990    }
991    out
992}
993
994/// Map an `OuterResult` produced in CANONICAL coordinate order back to the
995/// objective's native layout, in place. Permutes every per-coordinate array
996/// (ρ, gradient, Hessian) consistently; scalar and diagnostic fields are
997/// untouched.
998pub(crate) fn outer_result_to_native(mut result: OuterResult, perm: &[usize]) -> OuterResult {
999    if result.rho.len() == perm.len() {
1000        result.rho = permute_to_native(&result.rho, perm);
1001    }
1002    if let Some(g) = result.final_gradient.as_ref()
1003        && g.len() == perm.len()
1004    {
1005        result.final_gradient = Some(permute_to_native(g, perm));
1006    }
1007    if let Some(h) = result.final_hessian.as_ref()
1008        && h.nrows() == perm.len()
1009        && h.ncols() == perm.len()
1010    {
1011        // H_native[perm[a], perm[b]] = H_canon[a, b].
1012        let mut hn = Array2::<f64>::zeros((perm.len(), perm.len()));
1013        for (a, &ia) in perm.iter().enumerate() {
1014            for (b, &ib) in perm.iter().enumerate() {
1015                hn[[ia, ib]] = h[[a, b]];
1016            }
1017        }
1018        result.final_hessian = Some(hn);
1019    }
1020    result
1021}
1022
1023/// Wraps any [`OuterObjective`] so the optimizer can work in a CANONICAL
1024/// coordinate order while the wrapped objective continues to receive ρ in its
1025/// NATIVE order. The optimizer hands canonical ρ to this wrapper; the wrapper
1026/// permutes canonical→native before forwarding to the inner objective, so the
1027/// inner objective (and any checkpointing/cache layer beneath it) sees native
1028/// ρ exactly as before. Capability shape (`n_params`, `psi_dim`, …) is
1029/// unchanged — only coordinate order differs.
1030pub(crate) struct CanonicalizedObjective<'a> {
1031    inner: &'a mut dyn OuterObjective,
1032    /// Canonical→native map: `perm[c]` is the native index at canonical slot `c`.
1033    perm: Vec<usize>,
1034}
1035
1036impl<'a> CanonicalizedObjective<'a> {
1037    pub(crate) fn new(inner: &'a mut dyn OuterObjective, perm: Vec<usize>) -> Self {
1038        Self { inner, perm }
1039    }
1040
1041    #[inline]
1042    fn to_native(&self, canonical: &Array1<f64>) -> Array1<f64> {
1043        if canonical.len() == self.perm.len() {
1044            permute_to_native(canonical, &self.perm)
1045        } else {
1046            // Defensive: a length the permutation does not cover is forwarded
1047            // verbatim rather than corrupted (should not occur for ρ-coords).
1048            canonical.clone()
1049        }
1050    }
1051
1052    /// Map a native-order eval (gradient/Hessian) back into canonical order so
1053    /// the optimizer sees a self-consistent canonical objective.
1054    fn eval_to_canonical(&self, mut eval: OuterEval) -> OuterEval {
1055        if eval.gradient.len() == self.perm.len() {
1056            eval.gradient = permute_to_canonical(&eval.gradient, &self.perm);
1057        }
1058        eval.hessian = match eval.hessian {
1059            HessianResult::Analytic(h)
1060                if h.nrows() == self.perm.len() && h.ncols() == self.perm.len() =>
1061            {
1062                let mut hc = Array2::<f64>::zeros((self.perm.len(), self.perm.len()));
1063                for (a, &ia) in self.perm.iter().enumerate() {
1064                    for (b, &ib) in self.perm.iter().enumerate() {
1065                        hc[[a, b]] = h[[ia, ib]];
1066                    }
1067                }
1068                HessianResult::Analytic(hc)
1069            }
1070            other => other,
1071        };
1072        // `inner_beta_hint` is in the coefficient basis (not ρ-coordinate
1073        // order), so it is forwarded unchanged.
1074        eval
1075    }
1076}
1077
1078impl<'a> OuterObjective for CanonicalizedObjective<'a> {
1079    fn capability(&self) -> OuterCapability {
1080        self.inner.capability()
1081    }
1082
1083    fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
1084        let native = self.to_native(rho);
1085        self.inner.eval_cost(&native)
1086    }
1087
1088    fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
1089        let native = self.to_native(rho);
1090        self.inner.eval_screening_proxy(&native)
1091    }
1092
1093    fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
1094        let native = self.to_native(rho);
1095        let eval = self.inner.eval(&native)?;
1096        Ok(self.eval_to_canonical(eval))
1097    }
1098
1099    fn eval_with_order(
1100        &mut self,
1101        rho: &Array1<f64>,
1102        order: OuterEvalOrder,
1103    ) -> Result<OuterEval, EstimationError> {
1104        let native = self.to_native(rho);
1105        let eval = self.inner.eval_with_order(&native, order)?;
1106        Ok(self.eval_to_canonical(eval))
1107    }
1108
1109    fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
1110        let native = self.to_native(rho);
1111        let mut efs = self.inner.eval_efs(&native)?;
1112        // `steps` has one entry per θ-coordinate (length = n_rho + n_ext). The
1113        // canonical permutation covers only the leading ρ-coordinate block, so
1114        // map exactly those native→canonical; any trailing ψ/ext steps keep
1115        // their position (the canonicalized path is ρ-only, psi_dim == 0).
1116        let m = self.perm.len();
1117        if efs.steps.len() >= m {
1118            let leading = Array1::from_iter(efs.steps.iter().take(m).copied());
1119            let canon_leading = permute_to_canonical(&leading, &self.perm);
1120            for (c, v) in canon_leading.iter().enumerate() {
1121                efs.steps[c] = *v;
1122            }
1123        }
1124        Ok(efs)
1125    }
1126
1127    fn reset(&mut self) {
1128        self.inner.reset();
1129    }
1130
1131    fn seed_inner_state(&mut self, beta: &Array1<f64>) -> Result<SeedOutcome, EstimationError> {
1132        // β is in the coefficient basis, not ρ-coordinate order — forward as-is.
1133        self.inner.seed_inner_state(beta)
1134    }
1135
1136    fn allow_continuation_prewarm(&self) -> bool {
1137        self.inner.allow_continuation_prewarm()
1138    }
1139
1140    fn requires_continuation_path_entry(&self) -> bool {
1141        self.inner.requires_continuation_path_entry()
1142    }
1143
1144    fn accept_seed_without_outer_iterations(
1145        &mut self,
1146        rho: &Array1<f64>,
1147    ) -> Result<Option<f64>, EstimationError> {
1148        let native = self.to_native(rho);
1149        self.inner.accept_seed_without_outer_iterations(&native)
1150    }
1151
1152    fn curvature_homotopy_entry(
1153        &mut self,
1154        rho: &Array1<f64>,
1155    ) -> Option<Result<bool, EstimationError>> {
1156        let native = self.to_native(rho);
1157        self.inner.curvature_homotopy_entry(&native)
1158    }
1159
1160    fn finalize_outer_result(
1161        &mut self,
1162        rho: &Array1<f64>,
1163        plan: &OuterPlan,
1164    ) -> Result<(), EstimationError> {
1165        let native = self.to_native(rho);
1166        self.inner.finalize_outer_result(&native, plan)
1167    }
1168
1169    fn outer_device_admission(&self) -> Option<gam_gpu::policy::RemlOuterAdmission> {
1170        // The device path optimizes in its own coordinate layout; canonicalized
1171        // problems route through the host BFGS/ARC path (where the permutation
1172        // is honored) rather than the device driver.
1173        None
1174    }
1175}