Skip to main content

gam_models/fit_orchestration/drivers/
design_construction.rs

1// #1521: `build_term_collection_design` and its term-design subgraph were
2// relocated DOWN into `gam_terms::smooth` (see `gam_terms::smooth::term_design`).
3// The joint-build variants below STAY here: they return a `gam_solve`
4// `EstimationError` / call `freeze_term_collection_from_design`
5// (`spatial_optimization.rs`), so they belong to the gam-models orchestration
6// tier. They reach the relocated `build_term_collection_design_inner` /
7// `build_term_collection_design` via the module's `use gam_terms::smooth::*`.
8pub fn build_term_collection_designs_joint(
9    data: ArrayView2<'_, f64>,
10    specs: &[TermCollectionSpec],
11) -> Result<Vec<TermCollectionDesign>, BasisError> {
12    for spec in specs {
13        validate_term_collection_finite_inputs(data, spec)?;
14    }
15    let smooth_blocks = specs
16        .iter()
17        .map(|spec| spec.smooth_terms.clone())
18        .collect::<Vec<_>>();
19    let planned_blocks = plan_joint_spatial_centers_for_term_blocks(data, &smooth_blocks)?;
20    let mut out = Vec::with_capacity(specs.len());
21    for (spec, planned_terms) in specs.iter().zip(planned_blocks.into_iter()) {
22        let mut planned_spec = spec.clone();
23        planned_spec.smooth_terms = planned_terms;
24        out.push(build_term_collection_design_inner(data, &planned_spec)?);
25    }
26    Ok(out)
27}
28
29pub fn build_term_collection_designs_and_freeze_joint(
30    data: ArrayView2<'_, f64>,
31    specs: &[TermCollectionSpec],
32) -> Result<(Vec<TermCollectionDesign>, Vec<TermCollectionSpec>), EstimationError> {
33    let designs = build_term_collection_designs_joint(data, specs)?;
34    let mut resolved_specs = Vec::with_capacity(specs.len());
35    for (spec, design) in specs.iter().zip(designs.iter()) {
36        resolved_specs.push(freeze_term_collection_from_design(spec, design)?);
37    }
38    Ok((designs, resolved_specs))
39}
40
41pub fn fit_term_collection_forspec(
42    data: ArrayView2<'_, f64>,
43    y: ArrayView1<'_, f64>,
44    weights: ArrayView1<'_, f64>,
45    offset: ArrayView1<'_, f64>,
46    spec: &TermCollectionSpec,
47    family: LikelihoodSpec,
48    options: &FitOptions,
49) -> Result<FittedTermCollection, EstimationError> {
50    fit_term_collection_forspecwith_heuristic_lambdas(
51        data, y, weights, offset, spec, None, family, options,
52    )
53}
54
55pub fn fit_term_collection_with_coefficient_groups(
56    data: ArrayView2<'_, f64>,
57    y: ArrayView1<'_, f64>,
58    weights: ArrayView1<'_, f64>,
59    offset: ArrayView1<'_, f64>,
60    spec: &TermCollectionSpec,
61    groups: &[CoefficientGroupSpec],
62    family: LikelihoodSpec,
63    options: &FitOptions,
64) -> Result<FittedTermCollection, EstimationError> {
65    if groups.is_empty() {
66        return fit_term_collection_forspec(data, y, weights, offset, spec, family, options);
67    }
68    let design = build_term_collection_design(data, spec)?;
69    let base_fit_opts = adaptive_fit_options_base(options, &design);
70    let realized = design
71        .realize_coefficient_groups(groups, &base_fit_opts.rho_prior)
72        .map_err(EstimationError::BasisError)?;
73    let mut grouped_options = base_fit_opts.clone();
74    grouped_options.rho_prior = realized.rho_prior;
75    let fitted = FittedTermCollection {
76        fit: gam_solve::estimate::fit_gam_with_penalty_specs(
77            design.design.clone(),
78            y,
79            weights,
80            offset,
81            realized.penalty_specs,
82            realized.nullspace_dims,
83            family.clone(),
84            &grouped_options,
85        )?,
86        design,
87        adaptive_diagnostics: None,
88    };
89    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
90    Ok(fitted)
91}
92
93pub fn fit_term_collection_with_penalty_block_gamma_prior_callback<F>(
94    data: ArrayView2<'_, f64>,
95    y: ArrayView1<'_, f64>,
96    weights: ArrayView1<'_, f64>,
97    offset: ArrayView1<'_, f64>,
98    spec: &TermCollectionSpec,
99    callback: F,
100    family: LikelihoodSpec,
101    options: &FitOptions,
102) -> Result<FittedTermCollection, EstimationError>
103where
104    F: FnMut(&PenaltyBlockGammaPriorMetadata<'_>) -> Option<(f64, f64)>,
105{
106    let design = build_term_collection_design(data, spec)?;
107    let mut fit_opts = adaptive_fit_options_base(options, &design);
108    fit_opts.rho_prior = realize_penalty_block_gamma_priors(&design, callback)
109        .map_err(EstimationError::BasisError)?;
110    let fitted = FittedTermCollection {
111        fit: fit_gamwith_heuristic_lambdas(
112            design.design.clone(),
113            y,
114            weights,
115            offset,
116            &design.penalties,
117            None,
118            family.clone(),
119            &fit_opts,
120        )?,
121        design,
122        adaptive_diagnostics: None,
123    };
124    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
125    Ok(fitted)
126}
127
128pub fn fit_term_collection_with_penalty_block_gamma_priors(
129    data: ArrayView2<'_, f64>,
130    y: ArrayView1<'_, f64>,
131    weights: ArrayView1<'_, f64>,
132    offset: ArrayView1<'_, f64>,
133    spec: &TermCollectionSpec,
134    priors: &[(String, f64, f64)],
135    family: LikelihoodSpec,
136    options: &FitOptions,
137) -> Result<FittedTermCollection, EstimationError> {
138    let design = build_term_collection_design(data, spec)?;
139    let mut fit_opts = adaptive_fit_options_base(options, &design);
140    fit_opts.rho_prior = realize_keyed_penalty_block_gamma_priors(&design, priors)
141        .map_err(EstimationError::BasisError)?;
142    let fitted = FittedTermCollection {
143        fit: fit_gamwith_heuristic_lambdas(
144            design.design.clone(),
145            y,
146            weights,
147            offset,
148            &design.penalties,
149            None,
150            family.clone(),
151            &fit_opts,
152        )?,
153        design,
154        adaptive_diagnostics: None,
155    };
156    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
157    Ok(fitted)
158}
159
160pub fn fit_term_collection_with_coefficient_groups_and_penalty_block_gamma_priors(
161    data: ArrayView2<'_, f64>,
162    y: ArrayView1<'_, f64>,
163    weights: ArrayView1<'_, f64>,
164    offset: ArrayView1<'_, f64>,
165    spec: &TermCollectionSpec,
166    groups: &[CoefficientGroupSpec],
167    priors: &[(String, f64, f64)],
168    family: LikelihoodSpec,
169    options: &FitOptions,
170) -> Result<FittedTermCollection, EstimationError> {
171    if groups.is_empty() {
172        return fit_term_collection_with_penalty_block_gamma_priors(
173            data, y, weights, offset, spec, priors, family, options,
174        );
175    }
176    if priors.is_empty() {
177        return fit_term_collection_with_coefficient_groups(
178            data, y, weights, offset, spec, groups, family, options,
179        );
180    }
181
182    let design = build_term_collection_design(data, spec)?;
183    let base_fit_opts = adaptive_fit_options_base(options, &design);
184    let base_rho_prior = realize_keyed_penalty_block_gamma_priors(&design, priors)
185        .map_err(EstimationError::BasisError)?;
186    let realized = design
187        .realize_coefficient_groups(groups, &base_rho_prior)
188        .map_err(EstimationError::BasisError)?;
189    let mut grouped_options = base_fit_opts.clone();
190    grouped_options.rho_prior = realized.rho_prior;
191    let fitted = FittedTermCollection {
192        fit: gam_solve::estimate::fit_gam_with_penalty_specs(
193            design.design.clone(),
194            y,
195            weights,
196            offset,
197            realized.penalty_specs,
198            realized.nullspace_dims,
199            family.clone(),
200            &grouped_options,
201        )?,
202        design,
203        adaptive_diagnostics: None,
204    };
205    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
206    Ok(fitted)
207}
208
209fn fit_term_collection_forspecwith_heuristic_lambdas(
210    data: ArrayView2<'_, f64>,
211    y: ArrayView1<'_, f64>,
212    weights: ArrayView1<'_, f64>,
213    offset: ArrayView1<'_, f64>,
214    spec: &TermCollectionSpec,
215    heuristic_lambdas: Option<&[f64]>,
216    family: LikelihoodSpec,
217    options: &FitOptions,
218) -> Result<FittedTermCollection, EstimationError> {
219    let base_design = build_term_collection_design(data, spec)?;
220    fit_term_collection_on_realized_design(
221        y,
222        weights,
223        offset,
224        spec,
225        &base_design,
226        heuristic_lambdas,
227        family,
228        options,
229    )
230}
231
232fn has_bounded_linear_terms(spec: &TermCollectionSpec) -> bool {
233    spec.linear_terms.iter().any(|term| {
234        matches!(
235            term.coefficient_geometry,
236            LinearCoefficientGeometry::Bounded { .. }
237        )
238    })
239}
240
241fn fit_term_collection_on_realized_design(
242    y: ArrayView1<'_, f64>,
243    weights: ArrayView1<'_, f64>,
244    offset: ArrayView1<'_, f64>,
245    spec: &TermCollectionSpec,
246    design: &TermCollectionDesign,
247    heuristic_lambdas: Option<&[f64]>,
248    family: LikelihoodSpec,
249    options: &FitOptions,
250) -> Result<FittedTermCollection, EstimationError> {
251    if has_bounded_linear_terms(spec) {
252        return fit_bounded_term_collection_with_design(
253            y,
254            weights,
255            offset,
256            spec,
257            design,
258            heuristic_lambdas,
259            family,
260            options,
261        );
262    }
263    let mut base_fit_opts = adaptive_fit_options_base(options, design);
264    // Lift the symmetric log-λ cap off the smoothing coordinates of
265    // well-determined Gaussian-identity B-spline / thin-plate / tensor smooths so
266    // REML can drive λ to the value the data wants — including λ → ∞ when a
267    // term's signal lives in its penalty null space (#1271 single-penalty tp/ps,
268    // #1266 double-penalty selection). Length-safe: only fires when the inner ρ
269    // aligns 1:1 with the penalty blocks (see `relax_smoothing_rho_prior`).
270    base_fit_opts.rho_prior = relax_smoothing_rho_prior(options, design);
271    let fitted = FittedTermCollection {
272        fit: fit_gamwith_heuristic_lambdas(
273            design.design.clone(),
274            y,
275            weights,
276            offset,
277            &design.penalties,
278            heuristic_lambdas,
279            family.clone(),
280            &base_fit_opts,
281        )?,
282        design: design.clone(),
283        adaptive_diagnostics: None,
284    };
285    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
286
287    let adaptive_opts = options.adaptive_regularization.clone().unwrap_or_default();
288    if !adaptive_opts.enabled {
289        return Ok(fitted);
290    }
291    let runtime_caches = extract_spatial_operator_runtime_caches(spec, &fitted.design)?;
292    if runtime_caches.is_empty() {
293        return Ok(fitted);
294    }
295    // Spatial-adaptive overlay always runs when the operator caches are
296    // non-empty. Catastrophic-overfit protection lives in the operator-log-λ
297    // box bound (Fix B at the BFGS bounds construction), which caps maximum
298    // unpenalization regardless of n. Production fits at n≈300K must run the
299    // overlay; the previous n-gate (n < max(4·p_total, 200)) silently skipped
300    // it for any small-n test, contradicting that contract.
301    fit_term_collectionwith_exact_spatial_adaptive_regularization(
302        fitted,
303        y,
304        weights,
305        offset,
306        family,
307        options,
308        &runtime_caches,
309    )
310}
311
312#[derive(Clone)]
313struct SpatialOperatorRuntimeCache {
314    termname: String,
315    feature_cols: Vec<usize>,
316    coeff_global_range: Range<usize>,
317    mass_penalty_global_idx: usize,
318    tension_penalty_global_idx: usize,
319    stiffness_penalty_global_idx: usize,
320    d0: Array2<f64>,
321    d1: Array2<f64>,
322    d2: Array2<f64>,
323    collocation_points: Array2<f64>,
324    dimension: usize,
325}
326
327#[derive(Clone)]
328struct SpatialAdaptiveWeights {
329    inv_magweight: Array1<f64>,
330    invgradweight: Array1<f64>,
331    inv_lapweight: Array1<f64>,
332}
333
334#[derive(Clone)]
335struct CharbonnierScalarBlockState {
336    signal: Array1<f64>,
337    radius: Array1<f64>,
338    epsilon: f64,
339}
340
341impl CharbonnierScalarBlockState {
342    fn from_signal(signal: Array1<f64>, epsilon: f64) -> Self {
343        let eps = epsilon.max(1e-12);
344        let radius = signal.mapv(|t| (t * t + eps * eps).sqrt());
345        Self {
346            signal,
347            radius,
348            epsilon: eps,
349        }
350    }
351
352    fn absolute_signal(&self) -> Array1<f64> {
353        self.signal.mapv(f64::abs)
354    }
355
356    fn penalty_value(&self) -> f64 {
357        self.radius.iter().map(|r| r - self.epsilon).sum::<f64>()
358    }
359
360    fn betagradient_coeff(&self) -> Array1<f64> {
361        Array1::from_iter(
362            self.signal
363                .iter()
364                .zip(self.radius.iter())
365                .map(|(t, r)| t / r),
366        )
367    }
368
369    fn betahessian_diag(&self) -> Array1<f64> {
370        let eps2 = self.epsilon * self.epsilon;
371        self.radius.mapv(|r| eps2 / r.powi(3))
372    }
373
374    fn log_epsilon_gradient_terms(&self) -> Array1<f64> {
375        let epsilon = self.epsilon;
376        let eps2 = epsilon * epsilon;
377        self.radius.mapv(|r| eps2 / r - epsilon)
378    }
379
380    fn log_epsilon_betagradient_coeff(&self) -> Array1<f64> {
381        let eps2 = self.epsilon * self.epsilon;
382        Array1::from_iter(
383            self.signal
384                .iter()
385                .zip(self.radius.iter())
386                .map(|(t, r)| -eps2 * t / r.powi(3)),
387        )
388    }
389
390    fn log_epsilon_hessian_terms(&self) -> Array1<f64> {
391        let epsilon = self.epsilon;
392        let eps2 = epsilon * epsilon;
393        let eps4 = eps2 * eps2;
394        self.radius
395            .mapv(|r| 2.0 * eps2 / r - eps4 / r.powi(3) - epsilon)
396    }
397
398    fn surrogateweights_posterior_snr(
399        &self,
400        variance: &Array1<f64>,
401        weight_floor: f64,
402        weight_ceiling: f64,
403    ) -> (Array1<f64>, Array1<f64>) {
404        // Posterior-SNR (credible-magnitude) reweighting of the scalar MM
405        // majorizer.
406        //
407        // The magnitude-only surrogate weight uses the *point-estimate* radius
408        //
409        //   r_k^mag = sqrt( t_k^2 + eps^2 ),   t_k = (D0 beta_hat)_k,
410        //   w_k     = 1 / r_k^mag.
411        //
412        // The weight multiplies the local quadratic surrogate penalty
413        // w_k (D0 beta)^2, so a *small* w_k leaves the response un-penalized
414        // (treated as a genuine feature) and a *large* w_k pulls it toward zero
415        // (enforces flatness). The failure of the point-estimate radius is that
416        // a response t_k which is large only because it is poorly determined
417        // gets a tiny weight and is left un-penalized — the weight chases noise
418        // in low-information regions.
419        //
420        // Resolution via the posterior second moment under the working-Laplace
421        // posterior beta ~ N(beta_hat, Sigma_beta), Sigma_beta = H^{-1}: the
422        // variance of the response is
423        //
424        //   Var( (D0 beta)_k ) = (D0 Sigma_beta D0^T)_kk >= 0,
425        //
426        // and the *credible* (noise-floor-corrected) squared magnitude is
427        //
428        //   t_k^credible^2 = max( t_k^2 - Var(...)_k , 0 ),
429        //   r_k^snr        = sqrt( t_k^credible^2 + eps^2 ),
430        //   w_k            = 1 / r_k^snr.
431        //
432        // The principled fix evaluates the MM weight at the *credible* (noise-
433        // floor-corrected) squared magnitude rather than the raw point estimate.
434        // Under the working-Laplace posterior `beta ~ N(beta_hat, Sigma_beta)`,
435        // `Sigma_beta = H^{-1}`, the response `t_k = (D0 beta)_k` has posterior
436        // mean `t_hat_k` and variance `V_k = (D0 Sigma_beta D0^T)_kk >= 0`. The
437        // expected squared response is `E[t_k^2] = t_hat_k^2 + V_k`, so the part
438        // of `t_hat_k^2` that exceeds the noise floor `V_k` is the credibly real
439        // squared magnitude
440        //
441        //   t_k^credible^2 = max( t_hat_k^2 - V_k , 0 ),
442        //   r_k^snr        = sqrt( t_k^credible^2 + eps^2 ),   w_k = 1 / r_k^snr.
443        //
444        // This is the correct realization of the intent. Where the point
445        // estimate is a *credible* edge (t_hat^2 >> V) the credible magnitude is
446        // ~|t_hat| and the weight is essentially `1/|t_hat|` (left un-penalized,
447        // edge preserved). Where the large point-estimate magnitude is *noise*
448        // (t_hat^2 <~ V) the credible magnitude collapses to 0 and the weight
449        // rises to `1/eps` (extra smoothing, noise suppressed). The weight is
450        // monotone non-decreasing in `V`, and is bounded above by `1/eps` — the
451        // *same* ceiling the magnitude-only weight `1/sqrt(t^2 + eps^2)` already
452        // attains at `t = 0` (and clamped by `weight_ceiling`), so it is not an
453        // unbounded blow-up: it only moves the noise-dominated rows to the flat-
454        // response weight they would have had with a credible estimate of zero
455        // curvature. The earlier delta-method form `f + ½ f'' V` was non-monotone
456        // (`f''` flips sign at `2t^2 = eps^2`) and unbounded in `V`, which left
457        // noisy rows under-penalized and was the source of the SNR regression.
458        // With `V == 0` everywhere this degrades exactly to `surrogateweights`
459        // (`1/sqrt(t^2 + eps^2)`), so any covariance-unavailable path is
460        // unchanged.
461        let eps2 = self.epsilon * self.epsilon;
462        let weight = Array1::from_iter(self.signal.iter().zip(variance.iter()).map(|(&t, &v)| {
463            let credible2 = (t * t - v.max(0.0)).max(0.0);
464            let r = (credible2 + eps2).sqrt();
465            (1.0 / r).clamp(weight_floor, weight_ceiling)
466        }));
467        let invweight = weight.mapv(|u| 1.0 / u);
468        (weight, invweight)
469    }
470
471    fn directionalhessian_diag(&self, direction_signal: &Array1<f64>) -> Array1<f64> {
472        // Scalar-image directional third derivative:
473        //
474        // If t(beta) = A beta and
475        //   H(beta) = A^T diag( eps^2 / (t_k(beta)^2 + eps^2)^(3/2) ) A,
476        // then for q = A u,
477        //
478        //   D(H)[u]
479        //   = A^T diag( -3 eps^2 t_k q_k / (t_k^2 + eps^2)^(5/2) ) A.
480        //
481        // This is one of the exact P_{beta,beta,beta}[u] terms needed by the
482        // Laplace hypergradient
483        //
484        //   d/dtheta log det H = tr(H^{-1} Hdot_theta),
485        //   Hdot_theta = J_{beta,beta,theta} + D_beta(H)[beta_theta].
486        let eps2 = self.epsilon * self.epsilon;
487        Array1::from_iter(
488            self.signal
489                .iter()
490                .zip(direction_signal.iter())
491                .zip(self.radius.iter())
492                .map(|((t, q), r)| -3.0 * eps2 * t * q / r.powi(5)),
493        )
494    }
495
496    /// Exact scalar-image fourth derivative contracted along two coefficient
497    /// directions: with `t(β)=Aβ`, `H(β)=Aᵀ diag(ψ''(t_k)) A`,
498    /// `ψ''(t)=ε²/r³`, the second directional derivative of `H` along
499    /// `(u, v)` (signals `q1=A u`, `q2=A v`) is
500    /// `Aᵀ diag( ψ''''(t_k) q1_k q2_k ) A`, with
501    /// `ψ''''(t) = -3 ε² / r⁵ + 15 ε² t² / r⁷`.
502    fn second_directionalhessian_diag(
503        &self,
504        direction1_signal: &Array1<f64>,
505        direction2_signal: &Array1<f64>,
506    ) -> Array1<f64> {
507        let eps2 = self.epsilon * self.epsilon;
508        Array1::from_iter(
509            self.signal
510                .iter()
511                .zip(direction1_signal.iter())
512                .zip(direction2_signal.iter())
513                .zip(self.radius.iter())
514                .map(|(((t, q1), q2), r)| {
515                    let r2 = r * r;
516                    let psi4 = -3.0 * eps2 / r.powi(5) + 15.0 * eps2 * t * t / (r.powi(5) * r2);
517                    psi4 * q1 * q2
518                }),
519        )
520    }
521
522    fn log_epsilon_betahessian_diag(&self) -> Array1<f64> {
523        let eps2 = self.epsilon * self.epsilon;
524        let eps4 = eps2 * eps2;
525        Array1::from_iter(
526            self.signal
527                .iter()
528                .zip(self.radius.iter())
529                .map(|(_, r)| 2.0 * eps2 / r.powi(3) - 3.0 * eps4 / r.powi(5)),
530        )
531    }
532
533    fn log_epsilon_beta_mixed_second_coeff(&self) -> Array1<f64> {
534        let eps2 = self.epsilon * self.epsilon;
535        Array1::from_iter(
536            self.signal
537                .iter()
538                .zip(self.radius.iter())
539                .map(|(t, r)| eps2 * t * (eps2 - 2.0 * t * t) / r.powi(5)),
540        )
541    }
542
543    fn log_epsilon_betahessian_second_diag(&self) -> Array1<f64> {
544        let eps2 = self.epsilon * self.epsilon;
545        let eps4 = eps2 * eps2;
546        let eps6 = eps4 * eps2;
547        Array1::from_iter(
548            self.radius.iter().map(|r| {
549                4.0 * eps2 / r.powi(3) - 18.0 * eps4 / r.powi(5) + 15.0 * eps6 / r.powi(7)
550            }),
551        )
552    }
553
554    fn log_epsilon_betahessian_directional_diag(
555        &self,
556        direction_signal: &Array1<f64>,
557    ) -> Array1<f64> {
558        let eps2 = self.epsilon * self.epsilon;
559        let eps4 = eps2 * eps2;
560        Array1::from_iter(
561            self.signal
562                .iter()
563                .zip(direction_signal.iter())
564                .zip(self.radius.iter())
565                .map(|((t, q), r)| (-6.0 * eps2 * t / r.powi(5) + 15.0 * eps4 * t / r.powi(7)) * q),
566        )
567    }
568}
569
570#[derive(Clone)]
571struct CharbonnierGroupedBlockState {
572    norm: Array1<f64>,
573    radius: Array1<f64>,
574    signal_blocks: Array2<f64>,
575    epsilon: f64,
576}
577
578impl CharbonnierGroupedBlockState {
579    fn from_signal_blocks(signal_blocks: Array2<f64>, epsilon: f64) -> Self {
580        let eps = epsilon.max(1e-12);
581        let norm = Array1::from_iter(
582            signal_blocks
583                .rows()
584                .into_iter()
585                .map(|row| row.iter().map(|v| v * v).sum::<f64>().sqrt()),
586        );
587        let radius = norm.mapv(|g| (g * g + eps * eps).sqrt());
588        Self {
589            norm,
590            radius,
591            signal_blocks,
592            epsilon: eps,
593        }
594    }
595
596    fn penalty_value(&self) -> f64 {
597        self.radius.iter().map(|r| r - self.epsilon).sum::<f64>()
598    }
599
600    fn norm_signal(&self) -> Array1<f64> {
601        self.norm.clone()
602    }
603
604    fn betagradient_blocks(&self) -> Array2<f64> {
605        let mut out = self.signal_blocks.clone();
606        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
607            let scale = 1.0 / self.radius[k];
608            row.mapv_inplace(|v| v * scale);
609        }
610        out
611    }
612
613    fn betahessian_blocks(&self) -> Vec<Array2<f64>> {
614        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
615        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
616            let dim = row.len();
617            let mut block = Array2::<f64>::eye(dim);
618            block.mapv_inplace(|v| v / self.radius[k]);
619            for i in 0..dim {
620                for j in 0..dim {
621                    block[[i, j]] -= row[i] * row[j] / self.radius[k].powi(3);
622                }
623            }
624            out.push(block);
625        }
626        out
627    }
628
629    fn log_epsilon_gradient_terms(&self) -> Array1<f64> {
630        let epsilon = self.epsilon;
631        let eps2 = epsilon * epsilon;
632        self.radius.mapv(|r| eps2 / r - epsilon)
633    }
634
635    fn log_epsilon_betagradient_blocks(&self) -> Array2<f64> {
636        let mut out = self.signal_blocks.clone();
637        let eps2 = self.epsilon * self.epsilon;
638        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
639            let scale = -eps2 / self.radius[k].powi(3);
640            row.mapv_inplace(|v| v * scale);
641        }
642        out
643    }
644
645    fn log_epsilon_hessian_terms(&self) -> Array1<f64> {
646        let epsilon = self.epsilon;
647        let eps2 = epsilon * epsilon;
648        let eps4 = eps2 * eps2;
649        self.radius
650            .mapv(|r| 2.0 * eps2 / r - eps4 / r.powi(3) - epsilon)
651    }
652
653    fn surrogateweights_posterior_snr(
654        &self,
655        variance: &Array1<f64>,
656        weight_floor: f64,
657        weight_ceiling: f64,
658    ) -> (Array1<f64>, Array1<f64>) {
659        // Grouped posterior-SNR (credible-magnitude) reweighting.
660        //
661        // The magnitude-only grouped surrogate weight uses the point-estimate
662        // block norm
663        //
664        //   g_k     = ||v_k||_2,   v_k = G_k beta_hat,
665        //   r_k^mag = sqrt( g_k^2 + eps^2 ),
666        //   w_k     = 1 / r_k^mag.
667        //
668        // The posterior covariance of the *block* response v_k = G_k beta under
669        // beta ~ N(beta_hat, Sigma_beta), Sigma_beta = H^{-1}, has total trace
670        //
671        //   Cov(v_k)     = G_k Sigma_beta G_k^T   (a block_dim x block_dim block),
672        //   variance[k]  = tr(Cov(v_k)) = sum_axis ( G_k[axis] Sigma_beta G_k[axis]^T ),
673        //
674        // i.e. the variance aggregated over the axis-block in the same way
675        // `norm` aggregates ||v_k||^2. As for the scalar block, we deflate the
676        // squared block norm by this noise floor to obtain the credible squared
677        // magnitude and shrink poorly-determined responses toward zero:
678        //
679        //   g_k^credible^2 = max( g_k^2 - tr(Cov(v_k)) , 0 ),
680        //   r_k^snr        = sqrt( g_k^credible^2 + eps^2 ),   w_k = 1 / r_k^snr.
681        //
682        // A block whose norm is credibly large (g_k^2 >> tr Cov) keeps a small
683        // weight (real feature, left un-penalized); a block whose norm is
684        // dominated by posterior variance has its credible norm collapse to 0,
685        // raising the weight to `1/eps` (noise suppressed). The weight is
686        // monotone non-decreasing in `tr Cov` and bounded above by `1/eps` — the
687        // same ceiling the magnitude-only weight already attains at `g = 0`
688        // (and clamped by `weight_ceiling`), so it is not an unbounded blow-up.
689        //
690        // This evaluates the grouped MM weight `f(v) = (||v||^2 + eps^2)^{-1/2}`
691        // at the credible block norm rather than at the raw point estimate. The
692        // expected squared block norm under `v_k ~ N(v_hat_k, C_k)` is
693        // `E[||v_k||^2] = ||v_hat_k||^2 + tr(C_k)`, so the credibly-real squared
694        // norm is `max(g_k^2 - tr(C_k), 0)`, identical in form to the scalar
695        // path (`block_dim == 1` recovers it exactly). The earlier delta-method
696        // correction `½ Σ ∂²f · C` was non-monotone (its sign flips with the
697        // Hessian of `f`) and unbounded in `tr C`, which under-penalized noisy
698        // blocks and was the source of the SNR regression. With `tr C == 0` it
699        // recovers `1/sqrt(g^2 + eps^2)`.
700        let eps2 = self.epsilon * self.epsilon;
701        let weight = Array1::from_iter(self.norm.iter().zip(variance.iter()).map(|(&g, &v)| {
702            let credible2 = (g * g - v.max(0.0)).max(0.0);
703            let r = (credible2 + eps2).sqrt();
704            (1.0 / r).clamp(weight_floor, weight_ceiling)
705        }));
706        let invweight = weight.mapv(|u| 1.0 / u);
707        (weight, invweight)
708    }
709
710    fn directionalhessian_blocks(&self, direction_blocks: &Array2<f64>) -> Vec<Array2<f64>> {
711        // Exact grouped directional third derivative for the slope penalty.
712        //
713        // For each collocation block k:
714        //   v_k = G_k beta,
715        //   q_k = G_k u,
716        //   r_k = sqrt(||v_k||^2 + eps^2),
717        //
718        // the exact Hessian block for psi(g; eps) = sqrt(g^2 + eps^2) - eps is
719        //   B_k,
720        //   B_k = (1 / r_k) I - v_k v_k^T / r_k^3.
721        //
722        // Differentiating B_k along u gives
723        //   M_k(u)
724        //   = -(v_k^T q_k / r_k^3) I
725        //     - (q_k v_k^T + v_k q_k^T) / r_k^3
726        //     + 3 (v_k^T q_k) v_k v_k^T / r_k^5.
727        //
728        // This expression must be symmetric because it is the directional
729        // derivative of the symmetric matrix
730        //
731        //   B_k = (1 / r_k) I - v_k v_k^T / r_k^3.
732        //
733        // The full directional penalty Hessian map is then
734        //   D(H_g)[u] = lambda_g * sum_k G_k^T M_k(u) G_k.
735        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
736        for (k, (v, q)) in self
737            .signal_blocks
738            .rows()
739            .into_iter()
740            .zip(direction_blocks.rows().into_iter())
741            .enumerate()
742        {
743            let dim = v.len();
744            let dot = v.iter().zip(q.iter()).map(|(a, b)| a * b).sum::<f64>();
745            let r3 = self.radius[k].powi(3);
746            let r5 = self.radius[k].powi(5);
747            let mut block = Array2::<f64>::eye(dim);
748            block.mapv_inplace(|x| -dot * x / r3);
749            for i in 0..dim {
750                for j in 0..dim {
751                    block[[i, j]] -= (q[i] * v[j] + v[i] * q[j]) / r3;
752                    block[[i, j]] += 3.0 * dot * v[i] * v[j] / r5;
753                }
754            }
755            out.push(block);
756        }
757        out
758    }
759
760    /// Exact grouped second directional derivative of the slope/curvature block
761    /// Hessian `B_k = (1/r_k) I − v_k v_kᵀ / r_k³` along two coefficient
762    /// directions, with per-block signal images `a_k = G_k u`, `b_k = G_k w`.
763    ///
764    /// `B_k`'s first directional derivative along `a` is
765    ///   `M_k(a) = −(v·a/r³) I − (a vᵀ + v aᵀ)/r³ + 3 (v·a) v vᵀ/r⁵`
766    /// (see `directionalhessian_blocks`). Differentiating `M_k(a)` once more
767    /// along `b` (i.e. `v ← v + t b`) gives the symmetric block
768    ///   `N_k(a,b) = (−a·b/r³ + 3 (v·a)(v·b)/r⁵) I`
769    ///            `  − (a bᵀ + b aᵀ)/r³`
770    ///            `  + 3 (v·b)(a vᵀ + v aᵀ)/r⁵`
771    ///            `  + 3 (a·b) v vᵀ/r⁵`
772    ///            `  + 3 (v·a)(b vᵀ + v bᵀ)/r⁵`
773    ///            `  − 15 (v·a)(v·b) v vᵀ/r⁷`,
774    /// so `D²_β H_g[u,w] = λ_g Σ_k G_kᵀ N_k(a_k,b_k) G_k`. `N_k` is symmetric in
775    /// `a ↔ b`, matching `D²H[u,w] = D²H[w,u]`.
776    fn second_directionalhessian_blocks(
777        &self,
778        direction1_blocks: &Array2<f64>,
779        direction2_blocks: &Array2<f64>,
780    ) -> Vec<Array2<f64>> {
781        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
782        for ((k, v), (a, b)) in self.signal_blocks.rows().into_iter().enumerate().zip(
783            direction1_blocks
784                .rows()
785                .into_iter()
786                .zip(direction2_blocks.rows().into_iter()),
787        ) {
788            let dim = v.len();
789            let dot = |x: ndarray::ArrayView1<'_, f64>, y: ndarray::ArrayView1<'_, f64>| {
790                x.iter().zip(y.iter()).map(|(p, q)| p * q).sum::<f64>()
791            };
792            let sa = dot(v, a);
793            let sb = dot(v, b);
794            let ab = dot(a, b);
795            let r = self.radius[k];
796            let r3 = r.powi(3);
797            let r5 = r.powi(5);
798            let r7 = r5 * r * r;
799            let diag = -ab / r3 + 3.0 * sa * sb / r5;
800            let mut block = Array2::<f64>::eye(dim);
801            block.mapv_inplace(|x| diag * x);
802            for i in 0..dim {
803                for j in 0..dim {
804                    block[[i, j]] -= (a[i] * b[j] + b[i] * a[j]) / r3;
805                    block[[i, j]] += 3.0 * sb * (a[i] * v[j] + v[i] * a[j]) / r5;
806                    block[[i, j]] += 3.0 * ab * v[i] * v[j] / r5;
807                    block[[i, j]] += 3.0 * sa * (b[i] * v[j] + v[i] * b[j]) / r5;
808                    block[[i, j]] -= 15.0 * sa * sb * v[i] * v[j] / r7;
809                }
810            }
811            out.push(block);
812        }
813        out
814    }
815
816    fn log_epsilon_betahessian_blocks(&self) -> Vec<Array2<f64>> {
817        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
818        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
819            let dim = row.len();
820            let r3 = self.radius[k].powi(3);
821            let r5 = self.radius[k].powi(5);
822            let mut block = Array2::<f64>::eye(dim);
823            let eps2 = self.epsilon * self.epsilon;
824            block.mapv_inplace(|v| -eps2 * v / r3);
825            for i in 0..dim {
826                for j in 0..dim {
827                    block[[i, j]] += 3.0 * eps2 * row[i] * row[j] / r5;
828                }
829            }
830            out.push(block);
831        }
832        out
833    }
834
835    fn log_epsilon_beta_mixed_second_blocks(&self) -> Array2<f64> {
836        let mut out = self.signal_blocks.clone();
837        let eps2 = self.epsilon * self.epsilon;
838        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
839            let norm2 = self.norm[k] * self.norm[k];
840            let scale = eps2 * (eps2 - 2.0 * norm2) / self.radius[k].powi(5);
841            row.mapv_inplace(|v| v * scale);
842        }
843        out
844    }
845
846    fn log_epsilon_betahessian_second_blocks(&self) -> Vec<Array2<f64>> {
847        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
848        let eps2 = self.epsilon * self.epsilon;
849        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
850            let dim = row.len();
851            let norm2 = self.norm[k] * self.norm[k];
852            let r5 = self.radius[k].powi(5);
853            let r7 = self.radius[k].powi(7);
854            let mut block = Array2::<f64>::eye(dim);
855            block.mapv_inplace(|v| eps2 * (eps2 - 2.0 * norm2) * v / r5);
856            for i in 0..dim {
857                for j in 0..dim {
858                    block[[i, j]] += 3.0 * eps2 * (2.0 * norm2 - 3.0 * eps2) * row[i] * row[j] / r7;
859                }
860            }
861            out.push(block);
862        }
863        out
864    }
865
866    fn log_epsilon_betahessian_directional_blocks(
867        &self,
868        direction_blocks: &Array2<f64>,
869    ) -> Vec<Array2<f64>> {
870        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
871        let eps2 = self.epsilon * self.epsilon;
872        for (k, (v, q)) in self
873            .signal_blocks
874            .rows()
875            .into_iter()
876            .zip(direction_blocks.rows().into_iter())
877            .enumerate()
878        {
879            let dim = v.len();
880            let dot = v.iter().zip(q.iter()).map(|(a, b)| a * b).sum::<f64>();
881            let r5 = self.radius[k].powi(5);
882            let r7 = self.radius[k].powi(7);
883            let mut block = Array2::<f64>::eye(dim);
884            block.mapv_inplace(|x| 3.0 * eps2 * dot * x / r5);
885            for i in 0..dim {
886                for j in 0..dim {
887                    block[[i, j]] += 3.0 * eps2 * (q[i] * v[j] + v[i] * q[j]) / r5;
888                    block[[i, j]] -= 15.0 * eps2 * dot * v[i] * v[j] / r7;
889                }
890            }
891            out.push(block);
892        }
893        out
894    }
895}
896
897fn scalar_operatorgradient(operator: &Array2<f64>, coeff: &Array1<f64>) -> Array1<f64> {
898    operator.t().dot(coeff)
899}
900
901fn scalar_operatorhessian(operator: &Array2<f64>, diag: &Array1<f64>) -> Array2<f64> {
902    let mut weighted = operator.clone();
903    for (k, &w) in diag.iter().enumerate() {
904        weighted.row_mut(k).mapv_inplace(|v| v * w);
905    }
906    let gram = operator.t().dot(&weighted);
907    (&gram + &gram.t().to_owned()) * 0.5
908}
909
910fn grouped_operatorgradient(
911    d1: &Array2<f64>,
912    dimension: usize,
913    blocks: &Array2<f64>,
914) -> Result<Array1<f64>, EstimationError> {
915    if blocks.ncols() != dimension {
916        crate::bail_invalid_estim!(
917            "grouped gradient block dimension mismatch: got {}, expected {dimension}",
918            blocks.ncols()
919        );
920    }
921    if d1.nrows() != blocks.nrows() * dimension {
922        crate::bail_invalid_estim!(
923            "grouped gradient row mismatch: D1 has {} rows, blocks imply {}",
924            d1.nrows(),
925            blocks.nrows() * dimension
926        );
927    }
928    let mut out = Array1::<f64>::zeros(d1.ncols());
929    for k in 0..blocks.nrows() {
930        let gk = d1
931            .slice(s![k * dimension..(k + 1) * dimension, ..])
932            .to_owned();
933        out += &gk.t().dot(&blocks.row(k));
934    }
935    Ok(out)
936}
937
938fn grouped_operatorhessian(
939    d1: &Array2<f64>,
940    dimension: usize,
941    blocks: &[Array2<f64>],
942) -> Result<Array2<f64>, EstimationError> {
943    if d1.nrows() != blocks.len() * dimension {
944        crate::bail_invalid_estim!(
945            "grouped Hessian row mismatch: D1 has {} rows, blocks imply {}",
946            d1.nrows(),
947            blocks.len() * dimension
948        );
949    }
950    let p = d1.ncols();
951    let mut out = Array2::<f64>::zeros((p, p));
952    for (k, block) in blocks.iter().enumerate() {
953        if block.nrows() != dimension || block.ncols() != dimension {
954            crate::bail_invalid_estim!(
955                "grouped Hessian block {k} has shape {}x{}, expected {}x{}",
956                block.nrows(),
957                block.ncols(),
958                dimension,
959                dimension
960            );
961        }
962        let gk = d1
963            .slice(s![k * dimension..(k + 1) * dimension, ..])
964            .to_owned();
965        out += &gk.t().dot(&block.dot(&gk));
966    }
967    Ok((&out + &out.t().to_owned()) * 0.5)
968}
969
970#[derive(Clone)]
971struct SpatialPenaltyExactState {
972    magnitude: CharbonnierScalarBlockState,
973    gradient: CharbonnierGroupedBlockState,
974    curvature: CharbonnierGroupedBlockState,
975}
976
977fn collocationgradient_blocks(
978    gradrows: &Array1<f64>,
979    dimension: usize,
980) -> Result<Array2<f64>, EstimationError> {
981    if dimension == 0 || !gradrows.len().is_multiple_of(dimension) {
982        crate::bail_invalid_estim!(
983            "invalid collocation gradient layout: rows={}, dimension={dimension}",
984            gradrows.len()
985        );
986    }
987    let p = gradrows.len() / dimension;
988    let mut out = Array2::<f64>::zeros((p, dimension));
989    for k in 0..p {
990        for axis in 0..dimension {
991            out[[k, axis]] = gradrows[k * dimension + axis];
992        }
993    }
994    Ok(out)
995}
996
997fn collocationhessian_blocks(
998    hessianrows: &Array1<f64>,
999    dimension: usize,
1000) -> Result<Array2<f64>, EstimationError> {
1001    let block_dim = dimension.checked_mul(dimension).ok_or_else(|| {
1002        EstimationError::InvalidInput("invalid collocation Hessian dimension overflow".to_string())
1003    })?;
1004    if block_dim == 0 || !hessianrows.len().is_multiple_of(block_dim) {
1005        crate::bail_invalid_estim!(
1006            "invalid collocation Hessian layout: rows={}, dimension={dimension}",
1007            hessianrows.len()
1008        );
1009    }
1010    let p = hessianrows.len() / block_dim;
1011    let mut out = Array2::<f64>::zeros((p, block_dim));
1012    for k in 0..p {
1013        for idx in 0..block_dim {
1014            out[[k, idx]] = hessianrows[k * block_dim + idx];
1015        }
1016    }
1017    Ok(out)
1018}
1019
1020impl SpatialPenaltyExactState {
1021    fn from_beta_local(
1022        beta_local: ArrayView1<'_, f64>,
1023        cache: &SpatialOperatorRuntimeCache,
1024        epsilons: [f64; 3],
1025    ) -> Result<Self, EstimationError> {
1026        // Exact collocation-state extraction for the three Charbonnier penalty blocks.
1027        //
1028        // For one spatial smooth term with coefficient vector beta_local, the exact
1029        // operator-decomposition penalty is built from three collocation images:
1030        //
1031        //   magnitude:  f = D0 beta_local
1032        //   slope:      v_k = G_k beta_local
1033        //   curvature:  H_k = D2_k beta_local
1034        //
1035        // where the gradient operator is stored in row-stacked form:
1036        //
1037        //   D1 beta_local in R^(P * d),
1038        //   row layout = (point 0, axis 0..d-1), (point 1, axis 0..d-1), ...
1039        //   D2 beta_local in R^(P * d * d),
1040        //   row layout = (point, Hessian axis_a, Hessian axis_b).
1041        //
1042        // so we first reshape that stacked vector into the grouped block array
1043        //
1044        //   [v_0^T
1045        //    ...
1046        //    v_(P-1)^T]  in R^(P x d).
1047        //
1048        // The three exact Charbonnier block states then carry:
1049        //   - the raw operator signals,
1050        //   - their radii sqrt(signal^2 + eps^2) or sqrt(||v_k||^2 + eps^2),
1051        //   - and all exact derivatives derived from those radii.
1052        //
1053        // This is the canonical translation from coefficient-space beta to the
1054        // penalty-side mathematical objects used throughout the implementation.
1055        let gradientrows = cache.d1.dot(&beta_local);
1056        let hessianrows = cache.d2.dot(&beta_local);
1057        Ok(Self {
1058            magnitude: CharbonnierScalarBlockState::from_signal(
1059                cache.d0.dot(&beta_local),
1060                epsilons[0],
1061            ),
1062            gradient: CharbonnierGroupedBlockState::from_signal_blocks(
1063                collocationgradient_blocks(&gradientrows, cache.dimension)?,
1064                epsilons[1],
1065            ),
1066            curvature: CharbonnierGroupedBlockState::from_signal_blocks(
1067                collocationhessian_blocks(&hessianrows, cache.dimension)?,
1068                epsilons[2],
1069            ),
1070        })
1071    }
1072
1073    fn absolute_collocation_magnitudes(&self) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
1074        (
1075            self.magnitude.absolute_signal(),
1076            self.gradient.norm_signal(),
1077            self.curvature.norm_signal(),
1078        )
1079    }
1080}
1081
1082fn robust_epsilon_from_samples(values: &[f64], min_epsilon_cfg: f64) -> f64 {
1083    if values.is_empty() {
1084        return min_epsilon_cfg.max(1e-12);
1085    }
1086    let mut clean = values
1087        .iter()
1088        .copied()
1089        .filter(|v| v.is_finite() && *v >= 0.0)
1090        .collect::<Vec<_>>();
1091    if clean.is_empty() {
1092        return min_epsilon_cfg.max(1e-12);
1093    }
1094    clean.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1095
1096    let n = clean.len();
1097    let median = quantile_from_sorted(&clean, 0.5);
1098    let q75 = quantile_from_sorted(&clean, 0.75);
1099    let q95 = quantile_from_sorted(&clean, 0.95);
1100
1101    let mut abs_dev = clean
1102        .iter()
1103        .map(|v| (v - median).abs())
1104        .filter(|v| v.is_finite())
1105        .collect::<Vec<_>>();
1106    abs_dev.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1107    let mad = 1.4826 * quantile_from_sorted(&abs_dev, 0.5);
1108
1109    // Charbonnier/MM requires eps bounded away from zero:
1110    //   u(t0) = 1 / (2*sqrt(t0^2 + eps^2)) ~ 1/(2*eps) near t0=0.
1111    // Use robust pilot scale:
1112    //   s = max(median(z), 1.4826*MAD(z), Q75(z)).
1113    // If s is tiny (<= delta), fallback to:
1114    //   s <- max(Q95(z), RMS(z)).
1115    // If still tiny, fallback to absolute floor s_min.
1116    // Then eps = kappa * s.
1117    // Primary robust scale: s = max(median, 1.4826*MAD, Q75).
1118    let mut scale = median.max(mad).max(q75);
1119
1120    // Safety threshold delta and absolute floor s_min.
1121    let delta = (f64::EPSILON.sqrt() * q95.max(1.0))
1122        .max(min_epsilon_cfg)
1123        .max(1e-12);
1124    let s_min = min_epsilon_cfg.max(1e-12);
1125
1126    // If robust scale is tiny, use high-quantile / RMS fallback.
1127    if scale <= delta {
1128        let rms = (clean.iter().map(|v| v * v).sum::<f64>() / n as f64).sqrt();
1129        scale = q95.max(rms);
1130    }
1131    if scale <= delta {
1132        scale = s_min;
1133    }
1134
1135    // Start near the observed operator scale so the optimizer begins in a
1136    // neutral regime where both quadratic and linear behavior are reachable.
1137    let kappa = 1.0_f64;
1138    (kappa * scale).max(s_min)
1139}
1140
1141fn extract_spatial_operator_runtime_caches(
1142    spec: &TermCollectionSpec,
1143    design: &TermCollectionDesign,
1144) -> Result<Vec<SpatialOperatorRuntimeCache>, EstimationError> {
1145    let smooth_start = design
1146        .design
1147        .ncols()
1148        .saturating_sub(design.smooth.total_smooth_cols());
1149    let mut out = Vec::<SpatialOperatorRuntimeCache>::new();
1150    for (term_idx, (termspec, term_fit)) in spec
1151        .smooth_terms
1152        .iter()
1153        .zip(design.smooth.terms.iter())
1154        .enumerate()
1155    {
1156        let Some(global_base_idx) = smooth_term_penalty_index(spec, design, term_idx) else {
1157            continue;
1158        };
1159        let mut active_local_idx = 0usize;
1160        let mut mass_local_idx = None;
1161        let mut tension_local_idx = None;
1162        let mut stiffness_local_idx = None;
1163        let mut mass_norm = None;
1164        let mut tension_norm = None;
1165        let mut stiffness_norm = None;
1166        for info in &term_fit.penaltyinfo_local {
1167            if !info.active {
1168                continue;
1169            }
1170            match info.source {
1171                PenaltySource::OperatorMass => {
1172                    mass_local_idx = Some(active_local_idx);
1173                    mass_norm = Some(info.normalization_scale);
1174                }
1175                PenaltySource::OperatorTension => {
1176                    tension_local_idx = Some(active_local_idx);
1177                    tension_norm = Some(info.normalization_scale);
1178                }
1179                PenaltySource::OperatorStiffness => {
1180                    stiffness_local_idx = Some(active_local_idx);
1181                    stiffness_norm = Some(info.normalization_scale);
1182                }
1183                _ => {}
1184            }
1185            active_local_idx += 1;
1186        }
1187        // The Charbonnier adaptive overlay rebuilds the {mass, tension,
1188        // stiffness} D-operator triplet from explicit collocation derivatives
1189        // and reweights all three channels in tandem; the stiffness slot in
1190        // particular is the D2 second-derivative operator. A term that does
1191        // NOT ship an explicit Stiffness penalty (pure Duchon's RKHS-Primary-
1192        // curvature layout — `DuchonOperatorPenaltySpec::default()`) has no
1193        // matching shipped penalty for the Charbonnier D2 surrogate to reweight,
1194        // so applying the overlay would smuggle a fresh D2 collocation
1195        // operator into a basis whose curvature is the RKHS Primary Gram (a
1196        // different mathematical object). Without an explicit Stiffness
1197        // channel the term must be skipped — the runtime cache for the
1198        // adaptive overlay simply doesn't apply.
1199        let (
1200            Some(mass_local),
1201            Some(tension_local),
1202            Some(stiffness_local),
1203            Some(mass_scale),
1204            Some(tension_scale),
1205            Some(stiffness_scale),
1206        ) = (
1207            mass_local_idx,
1208            tension_local_idx,
1209            stiffness_local_idx,
1210            mass_norm,
1211            tension_norm,
1212            stiffness_norm,
1213        )
1214        else {
1215            continue;
1216        };
1217        let mass_global_idx = global_base_idx + mass_local;
1218        let tension_global_idx = global_base_idx + tension_local;
1219        let stiffness_global_idx = global_base_idx + stiffness_local;
1220
1221        let (feature_cols, mut d0, mut d1, mut d2, collocation_points, dim, center_mass_rows) =
1222            match (&termspec.basis, &term_fit.metadata) {
1223                (
1224                    SmoothBasisSpec::Matern { feature_cols, .. },
1225                    BasisMetadata::Matern {
1226                        centers,
1227                        length_scale,
1228                        nu,
1229                        include_intercept,
1230                        identifiability_transform,
1231                        aniso_log_scales,
1232                        input_scales,
1233                        ..
1234                    },
1235                ) => {
1236                    // Match the σ_geom-compensated effective length scale the
1237                    // design (and shipped penalties) use against the standardized
1238                    // centers; the raw metadata length_scale lives in original
1239                    // coordinates and would put this overlay on a different kernel
1240                    // range than the penalties it scales (#706).
1241                    let collocation_length_scale = match input_scales.as_deref() {
1242                        Some(scales) => {
1243                            compensate_length_scale_for_standardization(*length_scale, scales)
1244                        }
1245                        None => *length_scale,
1246                    };
1247                    let ops = build_matern_collocation_operator_matrices(
1248                        centers.view(),
1249                        None,
1250                        collocation_length_scale,
1251                        *nu,
1252                        *include_intercept,
1253                        identifiability_transform.as_ref().map(|z| z.view()),
1254                        aniso_log_scales.as_deref(),
1255                    )?;
1256                    (
1257                        feature_cols.clone(),
1258                        ops.d0,
1259                        ops.d1,
1260                        ops.d2,
1261                        ops.collocation_points,
1262                        centers.ncols(),
1263                        false,
1264                    )
1265                }
1266                (
1267                    SmoothBasisSpec::Duchon { feature_cols, .. },
1268                    BasisMetadata::Duchon {
1269                        centers,
1270                        length_scale,
1271                        power,
1272                        nullspace_order,
1273                        identifiability_transform,
1274                        input_scales,
1275                        aniso_log_scales,
1276                        operator_collocation_points: Some(collocation_points),
1277                        ..
1278                    },
1279                ) => {
1280                    let collocation_length_scale = match (length_scale, input_scales.as_deref()) {
1281                        (Some(ls), Some(scales)) => {
1282                            Some(compensate_length_scale_for_standardization(*ls, scales))
1283                        }
1284                        (Some(ls), None) => Some(*ls),
1285                        (None, _) => None,
1286                    };
1287                    let ops =
1288                        gam_terms::basis::build_duchon_collocation_operator_matriceswithworkspace(
1289                            centers.view(),
1290                            collocation_points.view(),
1291                            None,
1292                            collocation_length_scale,
1293                            *power,
1294                            *nullspace_order,
1295                            aniso_log_scales.as_deref(),
1296                            identifiability_transform.as_ref().map(|z| z.view()),
1297                            2,
1298                            &mut BasisWorkspace::default(),
1299                        )?;
1300                    (
1301                        feature_cols.clone(),
1302                        ops.d0,
1303                        ops.d1,
1304                        ops.d2,
1305                        ops.collocation_points,
1306                        centers.ncols(),
1307                        true,
1308                    )
1309                }
1310                _ => continue,
1311            };
1312        if center_mass_rows && d0.nrows() > 0 && d0.ncols() > 0 {
1313            let means = d0.sum_axis(Axis(0)).mapv(|v| v / d0.nrows() as f64);
1314            for mut row in d0.rows_mut() {
1315                row -= &means;
1316            }
1317        }
1318
1319        // Runtime operator caches must live on the same normalized penalty scale as the
1320        // shipped design penalties. The basis builders normalize S0=D0'D0, S1=D1'D1, and
1321        // S2=D2'D2 before exposing them as smoothing blocks, recording the corresponding
1322        // Frobenius norms in penaltyinfo_local.normalization_scale. If the exact adaptive
1323        // path uses raw collocation operators here, then its Charbonnier penalties live on a
1324        // different geometry from the ordinary Matérn/Duchon penalties:
1325        //
1326        //   raw quadratic limit:        beta' (D'D) beta
1327        //   shipped design penalty:     beta' (D'D / c) beta
1328        //
1329        // The correct operator-level normalization is therefore
1330        //
1331        //   D_norm = D / sqrt(c),
1332        //
1333        // so that D_norm' D_norm = (D'D)/c matches the design penalty exactly. Without this,
1334        // adaptive lambdas compensate for hidden operator-scale mismatches and are no longer
1335        // comparable to the baseline smoothing parameters.
1336        let mass_scale = mass_scale.max(1e-12).sqrt();
1337        let tension_scale = tension_scale.max(1e-12).sqrt();
1338        let stiffness_scale = stiffness_scale.max(1e-12).sqrt();
1339        d0.mapv_inplace(|v| v / mass_scale);
1340        d1.mapv_inplace(|v| v / tension_scale);
1341        d2.mapv_inplace(|v| v / stiffness_scale);
1342
1343        let coeff_global_range =
1344            (smooth_start + term_fit.coeff_range.start)..(smooth_start + term_fit.coeff_range.end);
1345        if d0.ncols() != coeff_global_range.len()
1346            || d1.ncols() != coeff_global_range.len()
1347            || d2.ncols() != coeff_global_range.len()
1348        {
1349            crate::bail_invalid_estim!(
1350                "spatial operator dimension mismatch for term '{}': D0 cols={}, D1 cols={}, D2 cols={}, coeffs={}",
1351                term_fit.name,
1352                d0.ncols(),
1353                d1.ncols(),
1354                d2.ncols(),
1355                coeff_global_range.len()
1356            );
1357        }
1358        out.push(SpatialOperatorRuntimeCache {
1359            termname: term_fit.name.clone(),
1360            feature_cols,
1361            coeff_global_range,
1362            mass_penalty_global_idx: mass_global_idx,
1363            tension_penalty_global_idx: tension_global_idx,
1364            stiffness_penalty_global_idx: stiffness_global_idx,
1365            d0,
1366            d1,
1367            d2,
1368            collocation_points,
1369            dimension: dim,
1370        });
1371    }
1372    Ok(out)
1373}
1374
1375/// Posterior variance of a scalar collocation operator response under the
1376/// working-Laplace posterior `beta ~ N(beta_hat, Sigma_local)`.
1377///
1378/// For operator row `D_k` (one row of `D0`) acting on the term-local coefficient
1379/// block, `Var((D beta)_k) = D_k Sigma_local D_k^T = (D Sigma_local D^T)_kk`.
1380/// We compute it without forming `D Sigma D^T` densely: for each row we evaluate
1381/// `s_k = Sigma_local D_k^T` (one matrix-vector product) and then `D_k . s_k`.
1382/// `Sigma_local` is the sub-block of the global conditional covariance
1383/// `Sigma_beta = H^{-1}` indexed by the term's `coeff_global_range`, i.e. the
1384/// covariance proxy is the already-materialized inner working-Laplace inverse;
1385/// no second factorization is formed.
1386fn scalar_operator_response_variance(
1387    operator: &Array2<f64>,
1388    cov_local: &Array2<f64>,
1389) -> Array1<f64> {
1390    Array1::from_iter(operator.rows().into_iter().map(|row| {
1391        let s = cov_local.dot(&row);
1392        row.dot(&s).max(0.0)
1393    }))
1394}
1395
1396/// Posterior second-moment variance aggregated over each grouped collocation
1397/// block (gradient/curvature). The grouped operator is stored row-stacked with
1398/// `block_dim` rows per collocation point (`d` axes for the gradient, `d*d` for
1399/// the Hessian). For block `k`,
1400///
1401///   v_k = G_k beta,   Cov(v_k) = G_k Sigma_local G_k^T   (block_dim x block_dim),
1402///   variance_k = tr(Cov(v_k)) = sum_axis ( G_k[axis] Sigma_local G_k[axis]^T ),
1403///
1404/// which matches how `CharbonnierGroupedBlockState::norm` aggregates
1405/// `||v_k||^2 = sum_axis (G_k[axis] beta)^2` across the axis-block.
1406fn grouped_operator_response_variance(
1407    operator: &Array2<f64>,
1408    block_dim: usize,
1409    cov_local: &Array2<f64>,
1410) -> Result<Array1<f64>, EstimationError> {
1411    if block_dim == 0 || !operator.nrows().is_multiple_of(block_dim) {
1412        crate::bail_invalid_estim!(
1413            "grouped variance row layout invalid: rows={}, block_dim={block_dim}",
1414            operator.nrows()
1415        );
1416    }
1417    let p = operator.nrows() / block_dim;
1418    let mut out = Array1::<f64>::zeros(p);
1419    for k in 0..p {
1420        let mut acc = 0.0;
1421        for axis in 0..block_dim {
1422            let row = operator.row(k * block_dim + axis);
1423            let s = cov_local.dot(&row);
1424            acc += row.dot(&s);
1425        }
1426        out[k] = acc.max(0.0);
1427    }
1428    Ok(out)
1429}
1430
1431fn compute_spatial_adaptiveweights_for_beta(
1432    beta: &Array1<f64>,
1433    caches: &[SpatialOperatorRuntimeCache],
1434    epsilon_0: f64,
1435    epsilon_g: f64,
1436    epsilon_c: f64,
1437    weight_floor: f64,
1438    weight_ceiling: f64,
1439    beta_covariance: Option<&Array2<f64>>,
1440) -> Result<Vec<SpatialAdaptiveWeights>, EstimationError> {
1441    // Charbonnier / pseudo-Huber MM derivation (per collocation scalar t):
1442    //   psi(t; eps) = sqrt(t^2 + eps^2) - eps
1443    // and for reference t0 the tangent majorizer in t^2 gives:
1444    //   psi(t) <= 0.5 * w(t0) * t^2 + const(t0),
1445    //   w(t0) = 1 / sqrt(t0^2 + eps^2).
1446    //
1447    // We apply this to:
1448    //   t = f_k = |f(z_k)|             (magnitude),
1449    //   t = g_k = ||nabla f(z_k)||_2   (gradient magnitude),
1450    //   t = c_k = ||D²f(z_k)||_F       (full Hessian curvature),
1451    // both computed from beta^(t-1).
1452    //
1453    // These w values define the quadratic surrogate penalties:
1454    //   K0 = D0_con^T W_0 D0_con,  W_0 = diag(w_0)
1455    //   K1 = D1_con^T W_g D1_con,  W_g = diag(w_g) \otimes I_d  (k,axis order)
1456    //   K2 = D2_con^T W_c D2_con,  W_c = diag(w_c) \otimes I_(d*d).
1457    //
1458    // We clamp w directly, then derive inv_w=1/w for diagnostics and row scaling.
1459    //
1460    // Posterior-SNR reweighting (magic by default): when the inner working-Laplace
1461    // conditional covariance `Sigma_beta = H^{-1}` is available we replace the
1462    // squared point-estimate radius `t_k^2 + eps^2` by the credible (noise-floor-
1463    // corrected) second moment `max(t_k^2 - Var((D beta)_k), 0) + eps^2`, with
1464    // `Var = (D Sigma_beta D^T)_kk`. This stops the weight from leaving derivatives
1465    // un-penalized just because they are large but poorly determined: such
1466    // responses are shrunk toward zero (large weight, strong smoothing), while
1467    // credibly large derivatives (real edges) keep their small weight. `Sigma_beta`
1468    // here is the already-formed inner Hessian inverse from the final exact-family
1469    // solve — no second factorization is built; we only reuse the materialized
1470    // covariance. When the covariance is unavailable (`None`) the variance is zero
1471    // and this degrades *exactly* to the old magnitude-only radius.
1472    caches
1473        .iter()
1474        .map(|cache| {
1475            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
1476            let exact = SpatialPenaltyExactState::from_beta_local(
1477                beta_local,
1478                cache,
1479                [epsilon_0, epsilon_g, epsilon_c],
1480            )?;
1481            let cov_local = beta_covariance.map(|cov| {
1482                cov.slice(s![
1483                    cache.coeff_global_range.clone(),
1484                    cache.coeff_global_range.clone()
1485                ])
1486                .to_owned()
1487            });
1488            let dim = cache.dimension;
1489            let (var_0, var_g, var_c) = match cov_local.as_ref() {
1490                Some(cov) => (
1491                    scalar_operator_response_variance(&cache.d0, cov),
1492                    grouped_operator_response_variance(&cache.d1, dim, cov)?,
1493                    grouped_operator_response_variance(&cache.d2, dim * dim, cov)?,
1494                ),
1495                None => (
1496                    Array1::<f64>::zeros(exact.magnitude.signal.len()),
1497                    Array1::<f64>::zeros(exact.gradient.norm.len()),
1498                    Array1::<f64>::zeros(exact.curvature.norm.len()),
1499                ),
1500            };
1501            let (_, inv_0) = exact.magnitude.surrogateweights_posterior_snr(
1502                &var_0,
1503                weight_floor,
1504                weight_ceiling,
1505            );
1506            let (_, inv_g) =
1507                exact
1508                    .gradient
1509                    .surrogateweights_posterior_snr(&var_g, weight_floor, weight_ceiling);
1510            let (_, inv_c) = exact.curvature.surrogateweights_posterior_snr(
1511                &var_c,
1512                weight_floor,
1513                weight_ceiling,
1514            );
1515            Ok(SpatialAdaptiveWeights {
1516                inv_magweight: inv_0,
1517                invgradweight: inv_g,
1518                inv_lapweight: inv_c,
1519            })
1520        })
1521        .collect()
1522}
1523
1524fn compute_initial_epsilons(
1525    beta: &Array1<f64>,
1526    caches: &[SpatialOperatorRuntimeCache],
1527    min_epsilon: f64,
1528) -> Result<(f64, f64, f64), EstimationError> {
1529    let mut fvals = Vec::<f64>::new();
1530    let mut gvals = Vec::<f64>::new();
1531    let mut cvals = Vec::<f64>::new();
1532    for cache in caches {
1533        let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
1534        let exact = SpatialPenaltyExactState::from_beta_local(
1535            beta_local,
1536            cache,
1537            [min_epsilon, min_epsilon, min_epsilon],
1538        )?;
1539        let (f, g, c) = exact.absolute_collocation_magnitudes();
1540        fvals.extend(f.iter().copied());
1541        gvals.extend(g.iter().copied());
1542        cvals.extend(c.iter().copied());
1543    }
1544    // Robust epsilon initialization from pilot magnitudes:
1545    //   s = max(median(z), 1.4826*MAD(z), Q75(z)),
1546    //   if s is tiny then fallback to max(Q95(z), RMS(z)),
1547    //   if still tiny then use absolute floor min_epsilon.
1548    // Epsilon is then kappa * s.
1549    let eps_0 = robust_epsilon_from_samples(&fvals, min_epsilon);
1550    let eps_g = robust_epsilon_from_samples(&gvals, min_epsilon);
1551    let eps_c = robust_epsilon_from_samples(&cvals, min_epsilon);
1552    Ok((eps_0, eps_g, eps_c))
1553}
1554
1555fn exact_spatial_adaptive_penalty_index_set(
1556    caches: &[SpatialOperatorRuntimeCache],
1557) -> BTreeSet<usize> {
1558    let mut out = BTreeSet::new();
1559    for cache in caches {
1560        out.insert(cache.mass_penalty_global_idx);
1561        out.insert(cache.tension_penalty_global_idx);
1562        out.insert(cache.stiffness_penalty_global_idx);
1563    }
1564    out
1565}
1566
1567fn build_spatial_adaptive_hyperspecs(cache_count: usize) -> Vec<SpatialAdaptiveHyperSpec> {
1568    let mut out = Vec::with_capacity(cache_count * 3 + 3);
1569    for cache_index in 0..cache_count {
1570        out.push(SpatialAdaptiveHyperSpec {
1571            cache_index,
1572            kind: SpatialAdaptiveHyperKind::LogLambdaMagnitude,
1573        });
1574        out.push(SpatialAdaptiveHyperSpec {
1575            cache_index,
1576            kind: SpatialAdaptiveHyperKind::LogLambdaGradient,
1577        });
1578        out.push(SpatialAdaptiveHyperSpec {
1579            cache_index,
1580            kind: SpatialAdaptiveHyperKind::LogLambdaCurvature,
1581        });
1582    }
1583    out.push(SpatialAdaptiveHyperSpec {
1584        cache_index: 0,
1585        kind: SpatialAdaptiveHyperKind::LogEpsilonMagnitude,
1586    });
1587    out.push(SpatialAdaptiveHyperSpec {
1588        cache_index: 0,
1589        kind: SpatialAdaptiveHyperKind::LogEpsilonGradient,
1590    });
1591    out.push(SpatialAdaptiveHyperSpec {
1592        cache_index: 0,
1593        kind: SpatialAdaptiveHyperKind::LogEpsilonCurvature,
1594    });
1595    out
1596}
1597
1598fn penalty_matrixwith_local_block(
1599    total_dim: usize,
1600    coeff_range: Range<usize>,
1601    local: &Array2<f64>,
1602) -> Array2<f64> {
1603    let mut out = Array2::<f64>::zeros((total_dim, total_dim));
1604    out.slice_mut(s![coeff_range.clone(), coeff_range])
1605        .assign(local);
1606    out
1607}
1608
1609fn fit_term_collectionwith_exact_spatial_adaptive_regularization(
1610    baseline: FittedTermCollection,
1611    y: ArrayView1<'_, f64>,
1612    weights: ArrayView1<'_, f64>,
1613    offset: ArrayView1<'_, f64>,
1614    family: LikelihoodSpec,
1615    options: &FitOptions,
1616    runtime_caches: &[SpatialOperatorRuntimeCache],
1617) -> Result<FittedTermCollection, EstimationError> {
1618    // Exact adaptive-regularization hyperfit.
1619    //
1620    // This replaces the old MM-plus-approximate hyperfit with the
1621    // exact pseudo-Laplace objective agreed in the math notes:
1622    //
1623    //   L_tilde(theta)
1624    //   = J(beta_hat(theta); theta) + 0.5 log det H(beta_hat(theta), theta),
1625    //
1626    // where:
1627    //   - beta_hat(theta) is the exact inner mode of the true nonquadratic
1628    //     Charbonnier-penalized objective,
1629    //   - theta contains:
1630    //       * retained quadratic log-lambdas for non-adaptive penalties,
1631    //       * one log-lambda per adaptive operator block,
1632    //       * three global log-epsilons shared by every adaptive spatial term,
1633    //   - H is the exact beta-Hessian of the true objective at the mode.
1634    //
1635    // Implementation structure:
1636    //   1. keep ordinary quadratic penalties that are unrelated to adaptive
1637    //      spatial terms in the standard outer-rho path;
1638    //   2. move the adaptive Charbonnier penalties into a one-block exact-Newton
1639    //      custom family so the inner solve uses the real model rather than an
1640    //      MM surrogate;
1641    //   3. expose exact psi-gradients for adaptive log-lambda / log-epsilon
1642    //      coordinates through the custom-family pseudo-Laplace hook;
1643    //   4. refit once at the optimized hyperparameters with all penalties frozen
1644    //      inside the exact family, so covariance and final diagnostics are
1645    //      computed on the same exact surface.
1646    let adaptive_opts = options.adaptive_regularization.clone().unwrap_or_default();
1647    let adaptive_penalty_indices = exact_spatial_adaptive_penalty_index_set(runtime_caches);
1648    let p_total = baseline.design.design.ncols();
1649    struct RetainedPenaltySetup {
1650        global_idx: usize,
1651        global_penalty: Array2<f64>,
1652        nullspace_dim: usize,
1653        log_lambda: f64,
1654        col_range: Range<usize>,
1655        hessian_piece: Array2<f64>,
1656    }
1657    use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
1658    let retained_setups = baseline
1659        .design
1660        .penalties
1661        .par_iter()
1662        .enumerate()
1663        .map(|(idx, bp)| {
1664            if adaptive_penalty_indices.contains(&idx) {
1665                return None;
1666            }
1667            let lambda = baseline.fit.lambdas[idx];
1668            Some(RetainedPenaltySetup {
1669                global_idx: idx,
1670                global_penalty: bp.to_global(p_total),
1671                nullspace_dim: baseline
1672                    .design
1673                    .nullspace_dims
1674                    .get(idx)
1675                    .copied()
1676                    .unwrap_or(0),
1677                log_lambda: lambda.max(1e-12).ln(),
1678                col_range: bp.col_range.clone(),
1679                hessian_piece: bp.local.mapv(|v| lambda * v),
1680            })
1681        })
1682        .collect::<Vec<_>>();
1683    let retained_count = retained_setups
1684        .iter()
1685        .filter(|setup| setup.is_some())
1686        .count();
1687    let mut retained_penalties = Vec::<Array2<f64>>::with_capacity(retained_count);
1688    let mut retained_nullspace_dims = Vec::<usize>::with_capacity(retained_count);
1689    let mut retained_log_lambdas = Vec::<f64>::with_capacity(retained_count);
1690    let mut retained_global_indices = Vec::<usize>::with_capacity(retained_count);
1691    let mut fixed_quadratichessian = Array2::<f64>::zeros((p_total, p_total));
1692    for setup in retained_setups.into_iter().flatten() {
1693        retained_penalties.push(setup.global_penalty);
1694        retained_nullspace_dims.push(setup.nullspace_dim);
1695        retained_log_lambdas.push(setup.log_lambda);
1696        retained_global_indices.push(setup.global_idx);
1697        fixed_quadratichessian
1698            .slice_mut(s![setup.col_range.clone(), setup.col_range])
1699            .scaled_add(1.0, &setup.hessian_piece);
1700    }
1701
1702    let (eps_0_init, eps_g_init, eps_c_init) = compute_initial_epsilons(
1703        &baseline.fit.beta,
1704        runtime_caches,
1705        adaptive_opts.min_epsilon,
1706    )?;
1707    let mut initial_theta =
1708        Array1::<f64>::zeros(retained_penalties.len() + runtime_caches.len() * 3 + 3);
1709    for (idx, value) in retained_log_lambdas.iter().enumerate() {
1710        initial_theta[idx] = *value;
1711    }
1712    let adaptive_log_lambda_components = runtime_caches
1713        .par_iter()
1714        .map(|cache| {
1715            [
1716                baseline.fit.lambdas[cache.mass_penalty_global_idx]
1717                    .max(1e-12)
1718                    .ln(),
1719                baseline.fit.lambdas[cache.tension_penalty_global_idx]
1720                    .max(1e-12)
1721                    .ln(),
1722                baseline.fit.lambdas[cache.stiffness_penalty_global_idx]
1723                    .max(1e-12)
1724                    .ln(),
1725            ]
1726        })
1727        .collect::<Vec<_>>();
1728    let mut at = retained_penalties.len();
1729    for logs in &adaptive_log_lambda_components {
1730        initial_theta[at] = logs[0];
1731        initial_theta[at + 1] = logs[1];
1732        initial_theta[at + 2] = logs[2];
1733        at += 3;
1734    }
1735    initial_theta[at] = eps_0_init.max(adaptive_opts.min_epsilon).ln();
1736    initial_theta[at + 1] = eps_g_init.max(adaptive_opts.min_epsilon).ln();
1737    initial_theta[at + 2] = eps_c_init.max(adaptive_opts.min_epsilon).ln();
1738
1739    let hyperspecs = build_spatial_adaptive_hyperspecs(runtime_caches.len());
1740    let zero_psi_op: std::sync::Arc<dyn gam_custom_family::CustomFamilyPsiDerivativeOperator> =
1741        std::sync::Arc::new(gam_custom_family::ZeroPsiDerivativeOperator::new(
1742            baseline.design.design.nrows(),
1743            baseline.design.design.ncols(),
1744        ));
1745    let derivative_blocks = vec![
1746        hyperspecs
1747            .par_iter()
1748            .map(|_| CustomFamilyBlockPsiDerivative {
1749                penalty_index: None,
1750                x_psi: Array2::<f64>::zeros((0, 0)),
1751                s_psi: Array2::<f64>::zeros((0, 0)),
1752                s_psi_components: None,
1753                s_psi_penalty_components: None,
1754                x_psi_psi: None,
1755                s_psi_psi: None,
1756                s_psi_psi_components: None,
1757                s_psi_psi_penalty_components: None,
1758                implicit_operator: Some(std::sync::Arc::clone(&zero_psi_op)),
1759                implicit_axis: 0,
1760                implicit_group_id: None,
1761            })
1762            .collect::<Vec<_>>(),
1763    ];
1764
1765    let mixture_link_state = options
1766        .mixture_link
1767        .clone()
1768        .as_ref()
1769        .map(state_fromspec)
1770        .transpose()
1771        .map_err(EstimationError::InvalidInput)?;
1772    let sas_link_state = options
1773        .sas_link
1774        .map(|spec| {
1775            if family.is_binomial_beta_logistic() {
1776                state_from_beta_logisticspec(spec)
1777            } else {
1778                state_from_sasspec(spec)
1779            }
1780        })
1781        .transpose()
1782        .map_err(EstimationError::InvalidInput)?;
1783    let latent_cloglog_state = options.latent_cloglog;
1784    let shared_y = Arc::new(y.to_owned());
1785    let sharedweights = Arc::new(weights.to_owned());
1786    let shared_design = baseline
1787        .design
1788        .design
1789        .try_to_dense_arc("spatial adaptive exact hyperfit design")
1790        .map_err(EstimationError::InvalidInput)?;
1791    let shared_offset = Arc::new(offset.to_owned());
1792    let shared_runtime_caches = Arc::new(runtime_caches.to_vec());
1793    let shared_hyperspecs = Arc::new(hyperspecs.clone());
1794    let zero_quadratic = Arc::new(Array2::<f64>::zeros((
1795        baseline.design.design.ncols(),
1796        baseline.design.design.ncols(),
1797    )));
1798    let base_family = SpatialAdaptiveExactFamily {
1799        family: family.clone(),
1800        latent_cloglog_state,
1801        mixture_link_state: mixture_link_state.clone(),
1802        sas_link_state,
1803        y: shared_y.clone(),
1804        weights: sharedweights.clone(),
1805        design: shared_design.clone(),
1806        offset: shared_offset.clone(),
1807        linear_constraints: baseline.design.linear_constraints.clone(),
1808        runtime_caches: shared_runtime_caches.clone(),
1809        adaptive_params: Vec::new(),
1810        fixed_quadratichessian: zero_quadratic.clone(),
1811        hyperspecs: shared_hyperspecs.clone(),
1812        exact_eval_cache: Arc::new(Mutex::new(None)),
1813    };
1814
1815    let rho_dim = retained_penalties.len();
1816    let operator_slots_end = rho_dim + runtime_caches.len() * 3;
1817    // Every slot's box is `initial_theta[idx] ± WINDOW` clamped into a
1818    // per-slot [floor, cap]. Retained-λ previously used a scale-blind
1819    // ±30 absolute interval, which on small-n / weakly-identified Duchon
1820    // fits let those lambdas wander to the exp(-30) floor and produce
1821    // near-interpolant solutions. Anchoring on baseline log-λ inherits the
1822    // baseline REML's scale calibration so the overlay can only refine
1823    // within an exp(±6) ≈ 400× band of the well-posed baseline regime,
1824    // matching the discipline already applied to operator and epsilon
1825    // slots.
1826    const UNIFIED_LOG_WINDOW: f64 = 6.0;
1827    const RETAINED_LAMBDA_LOG_LOWER_FLOOR: f64 = -30.0;
1828    const RETAINED_LAMBDA_LOG_UPPER_CAP: f64 = 30.0;
1829    const OPERATOR_LAMBDA_LOG_LOWER_FLOOR: f64 = -10.0;
1830    const OPERATOR_LAMBDA_LOG_UPPER_CAP: f64 = 30.0;
1831    let epsilon_floor_log = adaptive_opts.min_epsilon.max(1e-12).ln();
1832    let anchored_bound = |idx: usize, sign: f64| -> f64 {
1833        let raw = initial_theta[idx] + sign * UNIFIED_LOG_WINDOW;
1834        if idx < rho_dim {
1835            raw.clamp(
1836                RETAINED_LAMBDA_LOG_LOWER_FLOOR,
1837                RETAINED_LAMBDA_LOG_UPPER_CAP,
1838            )
1839        } else if idx < operator_slots_end {
1840            raw.clamp(
1841                OPERATOR_LAMBDA_LOG_LOWER_FLOOR,
1842                OPERATOR_LAMBDA_LOG_UPPER_CAP,
1843            )
1844        } else {
1845            raw.max(epsilon_floor_log)
1846        }
1847    };
1848    let eps_lower =
1849        Array1::from_iter((0..initial_theta.len()).map(|idx| anchored_bound(idx, -1.0)));
1850    let eps_upper = Array1::from_iter((0..initial_theta.len()).map(|idx| anchored_bound(idx, 1.0)));
1851    let blockspec = ParameterBlockSpec {
1852        name: "eta".to_string(),
1853        design: baseline.design.design.clone(),
1854        offset: offset.to_owned(),
1855        penalties: retained_penalties
1856            .iter()
1857            .cloned()
1858            .map(PenaltyMatrix::Dense)
1859            .collect(),
1860        nullspace_dims: retained_nullspace_dims.clone(),
1861        initial_log_lambdas: Array1::from_vec(retained_log_lambdas.clone()),
1862        initial_beta: Some(baseline.fit.beta.clone()),
1863        gauge_priority: 100,
1864        jacobian_callback: None,
1865        stacked_design: None,
1866        stacked_offset: None,
1867    };
1868    let screening_cap = Arc::new(AtomicUsize::new(0));
1869    let outer_opts = BlockwiseFitOptions {
1870        inner_max_cycles: options.max_iter,
1871        inner_tol: options.tol,
1872        outer_max_iter: options.max_iter,
1873        outer_tol: options.tol,
1874        compute_covariance: false,
1875        screening_max_inner_iterations: Some(Arc::clone(&screening_cap)),
1876        ..BlockwiseFitOptions::default()
1877    };
1878
1879    use gam_solve::rho_optimizer::OuterProblem;
1880    use gam_problem::{DeclaredHessianForm, Derivative, HessianResult, OuterEval};
1881
1882    struct SpatialAdaptiveOuterState {
1883        warm_cache: Option<CustomFamilyWarmStart>,
1884        last_eval: Option<(
1885            Array1<f64>,
1886            f64,
1887            Array1<f64>,
1888            HessianResult,
1889            CustomFamilyWarmStart,
1890        )>,
1891    }
1892
1893    let n_theta = initial_theta.len();
1894
1895    // Clamp theta to the asymmetric epsilon bounds that run_outer's symmetric
1896    // rho_bound cannot express directly.
1897    let theta_bounds = Some((eps_lower.clone(), eps_upper.clone()));
1898    let clamp_theta = {
1899        let lo = eps_lower;
1900        let hi = eps_upper;
1901        move |theta: &Array1<f64>| -> Array1<f64> {
1902            let mut clamped = theta.clone();
1903            for i in 0..clamped.len() {
1904                clamped[i] = clamped[i].clamp(lo[i], hi[i]);
1905            }
1906            clamped
1907        }
1908    };
1909
1910    let decode_theta = |theta: &Array1<f64>| -> (Array1<f64>, Vec<SpatialAdaptiveTermHyperParams>) {
1911        let rho = theta.slice(s![..rho_dim]).to_owned();
1912        let adaptive_lambda_start = rho_dim;
1913        let adaptive_lambda_end = adaptive_lambda_start + runtime_caches.len() * 3;
1914        let eps = [
1915            theta[adaptive_lambda_end].exp(),
1916            theta[adaptive_lambda_end + 1].exp(),
1917            theta[adaptive_lambda_end + 2].exp(),
1918        ];
1919        let adaptive_params = runtime_caches
1920            .iter()
1921            .enumerate()
1922            .map(|(cache_idx, _)| SpatialAdaptiveTermHyperParams {
1923                lambda: [
1924                    theta[adaptive_lambda_start + cache_idx * 3].exp(),
1925                    theta[adaptive_lambda_start + cache_idx * 3 + 1].exp(),
1926                    theta[adaptive_lambda_start + cache_idx * 3 + 2].exp(),
1927                ],
1928                epsilon: eps,
1929            })
1930            .collect::<Vec<_>>();
1931        (rho, adaptive_params)
1932    };
1933    let analytic_outer_hessian_available =
1934        gam_custom_family::joint_exact_analytic_outer_hessian_available()
1935            && base_family
1936                .exact_outer_derivative_order(std::slice::from_ref(&blockspec), &outer_opts)
1937                .has_hessian()
1938            && gam_custom_family::exact_newton_outer_geometry_supports_second_order_solver(
1939                &base_family,
1940            );
1941    let outer_max_iter = gam_custom_family::cost_gated_first_order_max_iter(
1942        options.max_iter,
1943        base_family.coefficient_gradient_cost(std::slice::from_ref(&blockspec)),
1944        analytic_outer_hessian_available,
1945    );
1946    if outer_max_iter < options.max_iter {
1947        log::info!(
1948            "[OUTER] exact spatial adaptive regularization: first-order work gate reduced outer_max_iter {} -> {}",
1949            options.max_iter,
1950            outer_max_iter,
1951        );
1952    }
1953    // Keep the exact outer Hessian whenever the adaptive family can provide it.
1954    // The Charbonnier pseudo-Laplace surface mixes ordinary log-lambda
1955    // coordinates with adaptive λ/ε coordinates; exact curvature is the best
1956    // route when available. If a family cannot provide exact curvature, this
1957    // builder declares only the true first-order capability.
1958    let problem = OuterProblem::new(n_theta)
1959        .with_gradient(Derivative::Analytic)
1960        .with_hessian(if analytic_outer_hessian_available {
1961            DeclaredHessianForm::Either
1962        } else {
1963            DeclaredHessianForm::Unavailable
1964        })
1965        .with_fallback_policy(gam_solve::rho_optimizer::FallbackPolicy::Disabled)
1966        .with_psi_dim(n_theta.saturating_sub(rho_dim))
1967        .with_tolerance(options.tol)
1968        .with_max_iter(outer_max_iter)
1969        .with_seed_config(gam_problem::SeedConfig::default())
1970        .with_screening_cap(Arc::clone(&screening_cap))
1971        .with_initial_rho(initial_theta.clone());
1972    let problem = if let Some((lo, hi)) = theta_bounds {
1973        problem.with_bounds(lo, hi)
1974    } else {
1975        problem
1976    };
1977
1978    let eval_outer = |st: &mut SpatialAdaptiveOuterState,
1979                      theta: &Array1<f64>,
1980                      order: gam_solve::rho_optimizer::OuterEvalOrder|
1981     -> Result<OuterEval, EstimationError> {
1982        let theta = clamp_theta(theta);
1983
1984        if let Some((cached_theta, cached_cost, cached_grad, cached_hess, cached_warm)) =
1985            &st.last_eval
1986            && cached_theta.len() == theta.len()
1987            && cached_theta
1988                .iter()
1989                .zip(theta.iter())
1990                .all(|(&a, &b)| (a - b).abs() <= 1e-12)
1991            && (!matches!(
1992                order,
1993                gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
1994            ) || analytic_outer_hessian_available)
1995        {
1996            st.warm_cache = Some(cached_warm.clone());
1997            return Ok(OuterEval {
1998                cost: *cached_cost,
1999                gradient: cached_grad.clone(),
2000                hessian: if matches!(
2001                    order,
2002                    gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2003                ) && analytic_outer_hessian_available
2004                {
2005                    cached_hess.clone()
2006                } else {
2007                    HessianResult::Unavailable
2008                },
2009                inner_beta_hint: None,
2010            });
2011        }
2012
2013        let (rho, adaptive_params) = decode_theta(&theta);
2014        let family_eval = base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2015        let need_hessian = matches!(
2016            order,
2017            gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2018        ) && analytic_outer_hessian_available;
2019        let result = evaluate_custom_family_joint_hyper(
2020            &family_eval,
2021            std::slice::from_ref(&blockspec),
2022            &outer_opts,
2023            &rho,
2024            &derivative_blocks,
2025            st.warm_cache.as_ref(),
2026            if need_hessian {
2027                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueGradientHessian
2028            } else {
2029                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueAndGradient
2030            },
2031        )
2032        .map_err(|e| {
2033            EstimationError::RemlOptimizationFailed(format!("spatial adaptive eval failed: {e}"))
2034        })?;
2035        if !result.inner_converged {
2036            st.warm_cache = Some(result.warm_start.clone());
2037            return Err(EstimationError::RemlOptimizationFailed(
2038                "exact spatial adaptive inner solve did not converge".to_string(),
2039            ));
2040        }
2041        if !result.objective.is_finite() || result.gradient.iter().any(|v| !v.is_finite()) {
2042            return Err(EstimationError::RemlOptimizationFailed(
2043                "exact spatial adaptive objective returned non-finite values".to_string(),
2044            ));
2045        }
2046        let hessian_result = if need_hessian {
2047            if !result.outer_hessian.is_analytic() {
2048                return Err(EstimationError::RemlOptimizationFailed(
2049                    "exact spatial adaptive objective did not return an exact outer Hessian"
2050                        .to_string(),
2051                ));
2052            }
2053            match result.outer_hessian.dim() {
2054                Some(dim) if dim == theta.len() => {}
2055                Some(dim) => {
2056                    return Err(EstimationError::RemlOptimizationFailed(format!(
2057                        "exact spatial adaptive outer Hessian dimension mismatch: got {dim}, expected {}",
2058                        theta.len(),
2059                    )));
2060                }
2061                None => {
2062                    return Err(EstimationError::RemlOptimizationFailed(
2063                        "exact spatial adaptive objective did not report an outer Hessian dimension"
2064                            .to_string(),
2065                    ));
2066                }
2067            }
2068            st.last_eval = Some((
2069                theta.clone(),
2070                result.objective,
2071                result.gradient.clone(),
2072                result.outer_hessian.clone(),
2073                result.warm_start.clone(),
2074            ));
2075            result.outer_hessian
2076        } else {
2077            HessianResult::Unavailable
2078        };
2079        st.warm_cache = Some(result.warm_start);
2080        Ok(OuterEval {
2081            cost: result.objective,
2082            gradient: result.gradient,
2083            hessian: hessian_result,
2084            inner_beta_hint: None,
2085        })
2086    };
2087
2088    let mut obj = problem.build_objective_with_screening_proxy(
2089        SpatialAdaptiveOuterState {
2090            warm_cache: None,
2091            last_eval: None,
2092        },
2093        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2094            let theta = clamp_theta(theta);
2095            let (rho, adaptive_params) = decode_theta(&theta);
2096            let family_eval =
2097                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2098            let result = evaluate_custom_family_joint_hyper(
2099                &family_eval,
2100                std::slice::from_ref(&blockspec),
2101                &outer_opts,
2102                &rho,
2103                &derivative_blocks,
2104                st.warm_cache.as_ref(),
2105                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueOnly,
2106            )
2107            .map_err(|e| {
2108                EstimationError::RemlOptimizationFailed(format!(
2109                    "spatial adaptive cost eval failed: {e}"
2110                ))
2111            })?;
2112            if !result.inner_converged {
2113                st.warm_cache = Some(result.warm_start);
2114                return Err(EstimationError::RemlOptimizationFailed(
2115                    "exact spatial adaptive cost inner solve did not converge".to_string(),
2116                ));
2117            }
2118            st.warm_cache = Some(result.warm_start);
2119            Ok(result.objective)
2120        },
2121        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2122            eval_outer(
2123                st,
2124                theta,
2125                if analytic_outer_hessian_available {
2126                    gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2127                } else {
2128                    gam_solve::rho_optimizer::OuterEvalOrder::ValueAndGradient
2129                },
2130            )
2131        },
2132        |st: &mut SpatialAdaptiveOuterState,
2133         theta: &Array1<f64>,
2134         order: gam_solve::rho_optimizer::OuterEvalOrder| {
2135            eval_outer(st, theta, order)
2136        },
2137        Some(|st: &mut SpatialAdaptiveOuterState| {
2138            st.warm_cache = None;
2139            st.last_eval = None;
2140        }),
2141        Some(|st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2142            let theta = clamp_theta(theta);
2143            let (rho, adaptive_params) = decode_theta(&theta);
2144            let family_eval =
2145                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2146            let result = evaluate_custom_family_joint_hyper_efs(
2147                &family_eval,
2148                std::slice::from_ref(&blockspec),
2149                &outer_opts,
2150                &rho,
2151                &derivative_blocks,
2152                st.warm_cache.as_ref(),
2153            )
2154            .map_err(|e| {
2155                EstimationError::RemlOptimizationFailed(format!(
2156                    "spatial adaptive EFS eval failed: {e}"
2157                ))
2158            })?;
2159            if !result.inner_converged {
2160                st.warm_cache = Some(result.warm_start);
2161                return Err(EstimationError::RemlOptimizationFailed(
2162                    "exact spatial adaptive EFS inner solve did not converge".to_string(),
2163                ));
2164            }
2165            st.warm_cache = Some(result.warm_start);
2166            Ok(result.efs_eval)
2167        }),
2168        // Seed-screening ranking proxy (#969). The regular cost closure
2169        // above hard-errors on a non-converged inner solve — correct for
2170        // line-search costs, but under the screening cap
2171        // (`screening_max_inner_iterations`, wired into `outer_opts`) the
2172        // inner solve is truncated BY DESIGN, so screening through that
2173        // closure rejects every seed and re-creates the all-seeds-rejected
2174        // front-door failure genus. Screening only RANKS candidates: the
2175        // penalized objective of the capped solve is a meaningful ranking
2176        // signal even unconverged (the same contract as the custom-family
2177        // labeled proxy), so accept it and let the cascade pick the best
2178        // seed; the selected seed is then fit with the full budget.
2179        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2180            let theta = clamp_theta(theta);
2181            let (rho, adaptive_params) = decode_theta(&theta);
2182            let family_eval =
2183                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2184            let result = evaluate_custom_family_joint_hyper(
2185                &family_eval,
2186                std::slice::from_ref(&blockspec),
2187                &outer_opts,
2188                &rho,
2189                &derivative_blocks,
2190                st.warm_cache.as_ref(),
2191                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueOnly,
2192            )
2193            .map_err(|e| {
2194                EstimationError::RemlOptimizationFailed(format!(
2195                    "spatial adaptive screening eval failed: {e}"
2196                ))
2197            })?;
2198            st.warm_cache = Some(result.warm_start);
2199            Ok(result.objective)
2200        },
2201    );
2202
2203    let outer_result = problem
2204        .run(&mut obj, "exact spatial adaptive regularization")
2205        .map_err(|e| {
2206            EstimationError::InvalidInput(format!(
2207                "exact spatial adaptive outer optimization failed: {e}"
2208            ))
2209        })?;
2210    if !outer_result.converged {
2211        // The strict absolute-floor gradient criterion (`‖g‖_proj ≤ options.tol`)
2212        // is too tight near the box-constrained boundary of the adaptive
2213        // Charbonnier pseudo-Laplace objective: as the optimizer pushes ε → ∞
2214        // (overlay-disabled corner), λ → λ_min, the Hessian's nearly-null
2215        // direction lets Cauchy/Newton accept ~e-3-magnitude probe steps that
2216        // give cost changes well below 6-digit precision, and the projected
2217        // gradient floors at numerical-noise-scale (≈ 5e-6 for n≈500, cost≈
2218        // 3e2 fits in double precision) rather than at 0. Accept the iterate
2219        // when the mgcv-style relative-to-cost criterion ‖g‖_proj ≤ τ·(1+|f|)
2220        // is satisfied — that is the textbook REML convergence rule and is
2221        // exactly what `opt::GradientTolerance::relative_to_cost(τ)` would
2222        // have enforced if this OuterProblem path had wired it through. The
2223        // strict absolute floor is retained as the primary check; the
2224        // rel-to-cost form only kicks in once the absolute one has timed out
2225        // at `max_iter`, so unconverged divergent runs (which have large |g|)
2226        // still surface as errors.
2227        let rel_to_cost_threshold = options.tol * (1.0_f64 + outer_result.final_value.abs());
2228        // Rel-to-cost acceptance requires an actual gradient measurement;
2229        // `None` (cache-hit short-circuit, gradient-free path) cannot satisfy
2230        // the mgcv-style criterion regardless of magnitude.
2231        if let Some(final_grad) = outer_result
2232            .final_grad_norm
2233            .filter(|v| v.is_finite() && *v <= rel_to_cost_threshold)
2234        {
2235            log::info!(
2236                "[spatial-adaptive] outer optimization hit max_iter={} but \
2237                 projected gradient norm {:.3e} ≤ τ·(1+|f|) = {:.3e} \
2238                 (τ={:.3e}, |f|={:.3e}); accepting iterate under the mgcv-style \
2239                 relative-to-cost REML convergence criterion.",
2240                outer_result.iterations,
2241                final_grad,
2242                rel_to_cost_threshold,
2243                options.tol,
2244                outer_result.final_value.abs(),
2245            );
2246        } else {
2247            crate::bail_invalid_estim!(
2248                "exact spatial adaptive outer optimization did not converge after {} iterations (final_objective={:.6e}, final_grad_norm={})",
2249                outer_result.iterations,
2250                outer_result.final_value,
2251                outer_result.final_grad_norm_report(),
2252            );
2253        }
2254    }
2255    let outer_iterations = outer_result.iterations;
2256    // `None` = no gradient measurement (cache-hit / gradient-free); the
2257    // authoritative convergence signal is `outer_converged`.
2258    let outer_grad_norm: Option<f64> = outer_result.final_grad_norm;
2259    let theta_star = outer_result.rho;
2260    let rho_star = theta_star.slice(s![..rho_dim]).to_owned();
2261    let adaptive_lambda_start = rho_dim;
2262    let adaptive_lambda_end = adaptive_lambda_start + runtime_caches.len() * 3;
2263    let eps_star = [
2264        theta_star[adaptive_lambda_end].exp(),
2265        theta_star[adaptive_lambda_end + 1].exp(),
2266        theta_star[adaptive_lambda_end + 2].exp(),
2267    ];
2268    let adaptive_params = runtime_caches
2269        .iter()
2270        .enumerate()
2271        .map(|(cache_idx, _)| SpatialAdaptiveTermHyperParams {
2272            lambda: [
2273                theta_star[adaptive_lambda_start + cache_idx * 3].exp(),
2274                theta_star[adaptive_lambda_start + cache_idx * 3 + 1].exp(),
2275                theta_star[adaptive_lambda_start + cache_idx * 3 + 2].exp(),
2276            ],
2277            epsilon: eps_star,
2278        })
2279        .collect::<Vec<_>>();
2280    let mut fixed_total = Array2::<f64>::zeros((
2281        baseline.design.design.ncols(),
2282        baseline.design.design.ncols(),
2283    ));
2284    for (idx, penalty) in retained_penalties.iter().enumerate() {
2285        fixed_total.scaled_add(rho_star[idx].exp(), penalty);
2286    }
2287    let final_family =
2288        base_family.with_adaptive_params(adaptive_params.clone(), Arc::new(fixed_total.clone()));
2289    let final_blockspec = ParameterBlockSpec {
2290        name: "eta".to_string(),
2291        design: baseline.design.design.clone(),
2292        offset: offset.to_owned(),
2293        penalties: vec![],
2294        nullspace_dims: vec![],
2295        initial_log_lambdas: Array1::zeros(0),
2296        initial_beta: Some(baseline.fit.beta.clone()),
2297        gauge_priority: 100,
2298        jacobian_callback: None,
2299        stacked_design: None,
2300        stacked_offset: None,
2301    };
2302    let final_fit = fit_custom_family(
2303        &final_family,
2304        &[final_blockspec],
2305        &BlockwiseFitOptions {
2306            inner_max_cycles: options.max_iter,
2307            inner_tol: options.tol,
2308            outer_max_iter: 1,
2309            outer_tol: options.tol,
2310            compute_covariance: true,
2311            ..BlockwiseFitOptions::default()
2312        },
2313    )
2314    .map_err(EstimationError::CustomFamily)?;
2315    let beta = final_fit.block_states[0].beta.clone();
2316    let final_eval = final_family
2317        .exact_evaluation(&beta)
2318        .map_err(EstimationError::InvalidInput)?;
2319    let penalized_hessian = final_eval
2320        .totalobjectivehessian(&final_family.design)
2321        .map_err(EstimationError::InvalidInput)?;
2322    let beta_covariance = final_fit.covariance_conditional.clone();
2323    let beta_standard_errors = beta_covariance
2324        .as_ref()
2325        .map(|cov| Array1::from_iter((0..cov.nrows()).map(|i| cov[[i, i]].max(0.0).sqrt())));
2326
2327    let mut full_lambdas = baseline.fit.lambdas.clone();
2328    for (idx, &global_idx) in retained_global_indices.iter().enumerate() {
2329        full_lambdas[global_idx] = rho_star[idx].exp();
2330    }
2331    for (cache_idx, cache) in runtime_caches.iter().enumerate() {
2332        full_lambdas[cache.mass_penalty_global_idx] = adaptive_params[cache_idx].lambda[0];
2333        full_lambdas[cache.tension_penalty_global_idx] = adaptive_params[cache_idx].lambda[1];
2334        full_lambdas[cache.stiffness_penalty_global_idx] = adaptive_params[cache_idx].lambda[2];
2335    }
2336
2337    let deviance = if family.is_gaussian_identity() {
2338        y.iter()
2339            .zip(final_eval.obs.mu.iter())
2340            .zip(weights.iter())
2341            .map(|((&yy, &mu), &w)| w.max(0.0) * (yy - mu) * (yy - mu))
2342            .sum()
2343    } else {
2344        -2.0 * final_eval.obs.log_likelihood
2345    };
2346    let mut local_penalty_blocks =
2347        Vec::<PenaltySpec>::with_capacity(baseline.design.penalties.len());
2348    for (global_idx, bp) in baseline.design.penalties.iter().enumerate() {
2349        if adaptive_penalty_indices.contains(&global_idx) {
2350            let cache = runtime_caches
2351                .iter()
2352                .find(|cache| {
2353                    cache.mass_penalty_global_idx == global_idx
2354                        || cache.tension_penalty_global_idx == global_idx
2355                        || cache.stiffness_penalty_global_idx == global_idx
2356                })
2357                .ok_or_else(|| {
2358                    EstimationError::InvalidInput(format!(
2359                        "missing runtime cache for adaptive penalty index {global_idx}"
2360                    ))
2361                })?;
2362            let cache_idx = runtime_caches
2363                .iter()
2364                .position(|c| {
2365                    c.mass_penalty_global_idx == global_idx
2366                        || c.tension_penalty_global_idx == global_idx
2367                        || c.stiffness_penalty_global_idx == global_idx
2368                })
2369                .ok_or_else(|| {
2370                    EstimationError::InvalidInput(format!(
2371                        "missing adaptive cache position for penalty index {global_idx}"
2372                    ))
2373                })?;
2374            let state = &final_eval.adaptive_states[cache_idx];
2375            let local = if cache.mass_penalty_global_idx == global_idx {
2376                scalar_operatorhessian(&cache.d0, &state.magnitude.betahessian_diag())
2377                    .mapv(|v| adaptive_params[cache_idx].lambda[0] * v)
2378            } else if cache.tension_penalty_global_idx == global_idx {
2379                grouped_operatorhessian(
2380                    &cache.d1,
2381                    cache.dimension,
2382                    &state.gradient.betahessian_blocks(),
2383                )?
2384                .mapv(|v| adaptive_params[cache_idx].lambda[1] * v)
2385            } else {
2386                grouped_operatorhessian(
2387                    &cache.d2,
2388                    cache.dimension * cache.dimension,
2389                    &state.curvature.betahessian_blocks(),
2390                )?
2391                .mapv(|v| adaptive_params[cache_idx].lambda[2] * v)
2392            };
2393            // Wrap the pre-scaled global penalty matrix as PenaltySpec::Dense.
2394            local_penalty_blocks.push(PenaltySpec::Dense(penalty_matrixwith_local_block(
2395                baseline.design.design.ncols(),
2396                cache.coeff_global_range.clone(),
2397                &local,
2398            )));
2399        } else {
2400            local_penalty_blocks.push(PenaltySpec::Dense(
2401                bp.to_global(p_total).mapv(|v| v * full_lambdas[global_idx]),
2402            ));
2403        }
2404    }
2405    let (edf_by_block, penalty_block_trace, edf_total) = if let Some(cov) = beta_covariance.as_ref()
2406    {
2407        exact_bounded_edf(
2408            &local_penalty_blocks,
2409            &Array1::from_elem(local_penalty_blocks.len(), 1.0),
2410            cov,
2411        )?
2412    } else {
2413        (
2414            vec![0.0; local_penalty_blocks.len()],
2415            vec![0.0; local_penalty_blocks.len()],
2416            0.0,
2417        )
2418    };
2419    let stable_penalty_term =
2420        2.0 * final_eval.adaptive_penalty_value + beta.dot(&fixed_total.dot(&beta));
2421    let standard_deviation = if family.is_gaussian_identity() {
2422        let denom = (y.len() as f64 - edf_total).max(1.0);
2423        (deviance / denom).sqrt()
2424    } else {
2425        1.0
2426    };
2427    let maps = compute_spatial_adaptiveweights_for_beta(
2428        &beta,
2429        runtime_caches,
2430        eps_star[0],
2431        eps_star[1],
2432        eps_star[2],
2433        adaptive_opts.weight_floor,
2434        adaptive_opts.weight_ceiling,
2435        // Working-Laplace conditional covariance Sigma_beta = H^{-1} from the
2436        // final exact-family solve, reused here as the posterior-SNR variance
2437        // source (no second factorization is formed).
2438        beta_covariance.as_ref(),
2439    )?
2440    .into_iter()
2441    .zip(runtime_caches.iter())
2442    .map(|(w, cache)| AdaptiveSpatialMap {
2443        termname: cache.termname.clone(),
2444        feature_cols: cache.feature_cols.clone(),
2445        collocation_points: cache.collocation_points.clone(),
2446        inv_magweight: w.inv_magweight,
2447        invgradweight: w.invgradweight,
2448        inv_lapweight: w.inv_lapweight,
2449    })
2450    .collect::<Vec<_>>();
2451    let fitted_link = if family.is_latent_cloglog() {
2452        FittedLinkState::LatentCLogLog {
2453            state: latent_cloglog_state
2454                .expect("BinomialLatentCLogLog requires an explicit latent-cloglog state"),
2455        }
2456    } else if family.is_binomial_mixture() {
2457        mixture_link_state
2458            .clone()
2459            .map(|state| FittedLinkState::Mixture {
2460                state,
2461                covariance: None,
2462            })
2463            .unwrap_or(FittedLinkState::Standard(None))
2464    } else if family.is_binomial_sas() {
2465        sas_link_state
2466            .map(|state| FittedLinkState::Sas {
2467                state,
2468                covariance: None,
2469            })
2470            .unwrap_or(FittedLinkState::Standard(None))
2471    } else if family.is_binomial_beta_logistic() {
2472        sas_link_state
2473            .map(|state| FittedLinkState::BetaLogistic {
2474                state,
2475                covariance: None,
2476            })
2477            .unwrap_or(FittedLinkState::Standard(None))
2478    } else {
2479        FittedLinkState::Standard(None)
2480    };
2481    let max_abs_eta = final_eval
2482        .obs
2483        .eta
2484        .iter()
2485        .fold(0.0_f64, |acc, &v| acc.max(v.abs()));
2486    let fitted = FittedTermCollection {
2487        fit: {
2488            let log_lambdas = full_lambdas.mapv(|v| v.max(1e-300).ln());
2489            let inf = FitInference {
2490                edf_by_block,
2491                penalty_block_trace,
2492                edf_total,
2493                smoothing_correction: None,
2494                // Boundary adapter: wrap the raw `Array2<f64>` Hessian as
2495                // `UnscaledPrecision` for the newtype storage.
2496                penalized_hessian: penalized_hessian.clone().into(),
2497                working_weights: final_eval.obs.fisherweight.clone(),
2498                working_response: {
2499                    let mut out = final_eval.obs.eta.clone();
2500                    for i in 0..out.len() {
2501                        let wi = final_eval.obs.fisherweight[i].max(1e-12);
2502                        out[i] += final_eval.obs.score[i] / wi;
2503                    }
2504                    out
2505                },
2506                reparam_qs: None,
2507                dispersion: gam_solve::estimate::Dispersion::Known(1.0),
2508                beta_covariance: beta_covariance
2509                    .clone()
2510                    .map(gam_problem::dispersion_cov::PhiScaledCovariance::from),
2511                beta_standard_errors,
2512                beta_covariance_corrected: None,
2513                beta_standard_errors_corrected: None,
2514                beta_covariance_frequentist: None,
2515                coefficient_influence: None,
2516                weighted_gram: None,
2517                bias_correction_beta: None,
2518            };
2519            let geometry = Some(gam_solve::estimate::FitGeometry {
2520                penalized_hessian: penalized_hessian.into(),
2521                working_weights: inf.working_weights.clone(),
2522                working_response: inf.working_response.clone(),
2523            });
2524            let covariance_conditional = beta_covariance;
2525            let pirls_status_val = if final_fit.outer_converged {
2526                gam_solve::pirls::PirlsStatus::Converged
2527            } else {
2528                gam_solve::pirls::PirlsStatus::StalledAtValidMinimum
2529            };
2530            UnifiedFitResult::try_from_parts(UnifiedFitResultParts {
2531                blocks: vec![gam_solve::estimate::FittedBlock {
2532                    beta: beta.clone(),
2533                    role: gam_problem::BlockRole::Mean,
2534                    edf: edf_total,
2535                    lambdas: full_lambdas.clone(),
2536                }],
2537                log_lambdas,
2538                lambdas: full_lambdas,
2539                likelihood_scale: family.default_scale_metadata(),
2540                likelihood_family: Some(family),
2541                log_likelihood_normalization:
2542                    gam_spec::LogLikelihoodNormalization::UserProvided,
2543                log_likelihood: final_eval.obs.log_likelihood,
2544                deviance,
2545                reml_score: final_fit.penalized_objective,
2546                stable_penalty_term,
2547                penalized_objective: final_fit.penalized_objective,
2548                used_device: false,
2549                outer_iterations,
2550                outer_converged: final_fit.outer_converged,
2551                outer_gradient_norm: outer_grad_norm,
2552                standard_deviation,
2553                covariance_conditional,
2554                covariance_corrected: None,
2555                inference: Some(inf),
2556                fitted_link,
2557                geometry,
2558                block_states: Vec::new(),
2559                pirls_status: pirls_status_val,
2560                max_abs_eta,
2561                constraint_kkt: None,
2562                artifacts: gam_solve::estimate::FitArtifacts {
2563                    pirls: None,
2564                    ..Default::default()
2565                },
2566                inner_cycles: 0,
2567            })?
2568        },
2569        design: baseline.design,
2570        adaptive_diagnostics: Some(AdaptiveRegularizationDiagnostics {
2571            epsilon_0: eps_star[0],
2572            epsilon_g: eps_star[1],
2573            epsilon_c: eps_star[2],
2574            epsilon_outer_iterations: outer_iterations,
2575            mm_iterations: 0,
2576            converged: final_fit.outer_converged,
2577            maps,
2578        }),
2579    };
2580    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
2581    Ok(fitted)
2582}
2583
2584/// Relax the per-coordinate ρ-prior for terms running in Marra–Wood
2585/// double-penalty selection mode (#1266).
2586///
2587/// The default ρ-prior is a `Normal { mean: 0, sd: 3 }` cap on each log-λ — a
2588/// stabiliser that keeps ordinary smoothing parameters from drifting to
2589/// degenerate extremes (gam#893/#1196). For a smooth carrying a
2590/// `DoublePenaltyNullspace` block (`double_penalty = True`, the default `s(...)`
2591/// — analogous to mgcv `select = TRUE`) that cap is actively wrong: the whole
2592/// purpose of the second penalty is to let REML drive an *unsupported* term to
2593/// `EDF → 0`, which needs both the wiggliness and null-space log-λ to grow
2594/// large. The `ρ²/(2·9)` cap pulls them back toward 0, so REML settles at a
2595/// point that leaves the term under-shrunk — the smooth's EDF comes out ABOVE
2596/// the single-penalty (`double_penalty = False`) EDF instead of at or below it,
2597/// the exact contract violation in #1266. mgcv's `select = TRUE` applies no
2598/// such cap to the selection coordinates, and the lower-level term-collection
2599/// fits already converge correctly under a flat prior.
2600///
2601/// We therefore rewrite the prior to `Independent`, holding the base prior on
2602/// every ordinary coordinate but switching the coordinates of any
2603/// double-penalty term to `Flat`. Single-penalty terms are byte-for-byte
2604/// unchanged, and an already-`Flat`/already-`Independent` base prior, or a
2605/// design with no double-penalty block, is returned untouched.
2606///
2607/// The relaxed per-coordinate prior is FAMILY-AGNOSTIC: the cap-lifting of the
2608/// bending coordinate and the determinacy-gated null-space treatment apply
2609/// identically for Gaussian and non-Gaussian families. The response family / link
2610/// only matters for length-safety (it can append auxiliary trailing ρ
2611/// coordinates via dispersion / SAS / mixture / moving-κ machinery), which is
2612/// gated separately by `length_safe`; once that gate passes the inner ρ aligns
2613/// 1:1 with `penaltyinfo` regardless of family, so the same relaxation is valid
2614/// for a Tweedie / Gamma-log `ps` smooth as for a Gaussian one (#1426/#1477).
2615fn relax_smoothing_rho_prior(
2616    options: &FitOptions,
2617    design: &TermCollectionDesign,
2618) -> gam_spec::RhoPrior {
2619    use gam_terms::basis::BasisMetadata;
2620    let base = &options.rho_prior;
2621    // Only a single scalar prior that actually caps log-λ needs relaxing;
2622    // `Flat` already imposes no cap and `Independent` is assumed caller-built.
2623    if matches!(
2624        base,
2625        gam_spec::RhoPrior::Flat | gam_spec::RhoPrior::Independent(_)
2626    ) {
2627        return base.clone();
2628    }
2629    // LENGTH SAFETY (load-bearing). The per-coordinate `Independent` prior is
2630    // validated against the FULL outer ρ vector and a length disagreement
2631    // saturates the prior to `+∞`, breaking the fit. The ρ vector this prior is
2632    // attached to (the inner REML fit at a *fixed* realized design) aligns 1:1
2633    // with the penalty blocks in `design.penaltyinfo` ONLY when the fit
2634    // introduces no auxiliary trailing ρ coordinates. Such coordinates come from
2635    //   * non-Gaussian dispersion / non-identity link machinery,
2636    //   * SAS ε/δ and mixture-link parameters,
2637    //   * spatial κ length-scale optimisation that actually moves κ.
2638    // Gate to the link-aux-free case. Spatial κ optimisation (Matérn / Duchon /
2639    // sphere / curvature / measure-jet) genuinely appends a moving log-κ
2640    // coordinate AND needs the cap to stabilise it, so bail if any such term is
2641    // present. Thin-plate is the exception: its length-scale is a pure radial
2642    // SCALE that REML cannot identify (the κ optimiser converges to a no-op,
2643    // leaving `n_params = penalty-block count`), so it adds no trailing
2644    // coordinate and is safe to relax alongside the B-spline family. The response
2645    // family / link itself does NOT break length-safety (a non-Gaussian GAM with
2646    // no link-aux and no moving κ still has exactly `penaltyinfo.len()` inner ρ
2647    // coordinates), so the relaxed prior below is family-agnostic.
2648    let has_link_aux = options.sas_link.is_some()
2649        || options.optimize_sas
2650        || options.mixture_link.is_some()
2651        || options.optimize_mixture;
2652    let has_moving_kappa = design.smooth.terms.iter().any(|t| {
2653        matches!(
2654            t.metadata,
2655            BasisMetadata::Matern { .. }
2656                | BasisMetadata::Duchon { .. }
2657                | BasisMetadata::Sphere { .. }
2658                | BasisMetadata::SphereHarmonics { .. }
2659                | BasisMetadata::ConstantCurvature { .. }
2660                | BasisMetadata::MeasureJet { .. }
2661        )
2662    });
2663    // LENGTH SAFETY decides only whether the inner ρ aligns 1:1 with the penalty
2664    // blocks (so an `Independent` prior is valid): it is broken by SAS/mixture
2665    // link-shape coordinates and by a moving spatial κ, NOT by the response
2666    // family or link per se. A Gamma/log (or any other non-Gaussian) GAM with no
2667    // link-aux and no moving κ has exactly `penaltyinfo.len()` ρ coordinates, so
2668    // the `DoublePenaltyNullspace` selection prior below is length-safe there too.
2669    let length_safe = !has_link_aux && !has_moving_kappa;
2670    if !length_safe {
2671        return base.clone();
2672    }
2673    let coords = &design.penaltyinfo;
2674    if coords.is_empty() {
2675        return base.clone();
2676    }
2677    // WELL-IDENTIFICATION GATE (#1089). The ρ-prior is two things at once: a
2678    // #1266/#1271-harmful symmetric cap on each smoothing log-λ, AND a
2679    // #1089-load-bearing stabiliser that makes the outer REML loop terminate on
2680    // an *under-determined* design (gam#893/#1196/#1089: the n=30 five-`ps` wine
2681    // fit has p ≈ 51 > n, so without the cap's curvature the outer criterion is
2682    // flat/degenerate in ρ-space and the loop never certifies a stationary
2683    // point). Only lift the cap when the data comfortably over-determines the
2684    // model (`n ≥ 2·p`), so the unregularised REML problem is well-posed on its
2685    // own; otherwise keep the base prior. The #1266/#1271 cases (n ≈ 800,
2686    // p ≈ 20–40) clear this by ≥20×; the #1089 wine fit (n < p) keeps its cap.
2687    let n_obs = design.design.nrows();
2688    let p_total = design.design.ncols();
2689    // REGIME of the relaxed prior on the relaxable smooth coordinates.
2690    //
2691    // * WELL-DETERMINED (`n ≥ 2·p`): the unregularised REML problem is well
2692    //   posed on its own, so the relaxable coordinates are freed to `Flat`,
2693    //   which the runtime resolves to the firth one-sided barrier — byte-flat
2694    //   on the identified side (pure REML, exactly mgcv) and only a convex wall
2695    //   against the `λ → 0` degeneracy. This is the #1266/#1271 behaviour.
2696    //
2697    // * UNDER-DETERMINED (`n < 2·p`): the design does NOT over-determine the
2698    //   model (the n≈26 five-`ps` wine fit has p > n), so the firth barrier's
2699    //   zero curvature on the identified side leaves the outer REML criterion
2700    //   flat/degenerate in ρ-space and the loop hits `max_iter` at whatever
2701    //   (under-smoothed) λ it last held — EDF rails up to ≈n, the smooths
2702    //   interpolate the training rows, and held-out prediction explodes
2703    //   (#1392: held-out R² as low as −2.5e6 on `wine_gamair`). The previous
2704    //   stabiliser kept the FULL base prior here — a symmetric
2705    //   `Normal{mean:0, sd:3}` cap. Its `ρ²/(2·9)` curvature does terminate the
2706    //   loop, but it is centred at λ=1 with a tight `sd=3`: at the REML optimum
2707    //   `ρ* ≈ 8–15` (heavy smoothing, which an over-parameterised fit needs and
2708    //   which mgcv's pure REML reaches), the cap's `ρ*/9` gradient drags λ back
2709    //   down by `O(1)` in ρ, pinning the fit in the under-smoothed regime.
2710    //
2711    //   The fix keeps a stabiliser with strictly positive curvature (so the
2712    //   loop still certifies a stationary point — the #1089 requirement) but
2713    //   WIDENS it to `sd = RELAX_UNDERDETERMINED_RHO_SD` so its gradient drag at
2714    //   the heavily-smoothed optimum is negligible (`ρ*/sd² = O(1/100)`) and
2715    //   pure REML — not the prior — chooses λ. The wide symmetric Gaussian is
2716    //   weakly informative: ±2σ spans the whole feasible ρ range (`|ρ| ≤ 30`),
2717    //   so it adds termination curvature without biasing which λ REML lands on,
2718    //   restoring the mgcv-like heavy smoothing on the over-parameterised fit.
2719    let underdetermined = n_obs < 2 * p_total;
2720    // Relaxable terms: penalized smooths whose smoothing log-λ the symmetric cap
2721    // wrongly bounds when the term's signal lives in its penalty null space — a
2722    // straight line under a bending penalty drives λ → ∞ but the cap pulls it
2723    // back, leaving spurious wiggle. mgcv caps neither. This is exactly the
2724    // B-spline family (`ps`/`cr`/`cs`/`bs`, BSpline1D), thin-plate (`tp`), and
2725    // tensor-B-spline (`te`/`ti`) smooths — single- AND double-penalty (#1266 is
2726    // the double-penalty case, #1271 the single-penalty `tp`/`ps`). EVERY penalty
2727    // coordinate such a term owns (bending wiggliness AND any null-space
2728    // shrinkage) is freed to `Flat`, which the runtime resolves to the
2729    // firth-default one-sided barrier: no high-λ cap, but still a convex wall
2730    // against the `λ → 0` under-smoothing degeneracy.
2731    let relaxable_terms: std::collections::HashSet<&str> = design
2732        .smooth
2733        .terms
2734        .iter()
2735        .filter(|t| {
2736            matches!(
2737                t.metadata,
2738                BasisMetadata::BSpline1D { .. }
2739                    | BasisMetadata::ThinPlate { .. }
2740                    | BasisMetadata::TensorBSpline { .. }
2741            )
2742            // SHAPE-CONSTRAINED terms must KEEP the cap (#1380). A monotone /
2743            // convex / concave smooth carries linear-inequality constraints; at
2744            // the active boundary (e.g. a convex fit pinned at 2nd-diff = 0) the
2745            // active set collapses the penalized subspace onto the bending
2746            // penalty's own null space ({1, x}), where the smoothing log-λ is
2747            // UNIDENTIFIED. Lifting the cap to `Flat` there lets REML rail λ to
2748            // `RHO_BOUND` (zero curvature → the smooth collapses to a flat/linear
2749            // fit, R² ≈ 0 on data the constraint is correct for). The constraint
2750            // already regularizes the term, and the symmetric cap is the
2751            // #1089-style stabiliser that pins the unidentified λ — so a
2752            // shape-constrained term needs the cap KEPT, exactly the
2753            // under-determined case this gate protects. (Unconstrained #1266/#1271
2754            // selection terms still relax.)
2755            && matches!(t.shape, gam_terms::smooth::ShapeConstraint::None)
2756        })
2757        .map(|t| t.name.as_str())
2758        .collect();
2759    let any_relaxed = coords.iter().any(|info| {
2760        info.termname
2761            .as_deref()
2762            .is_some_and(|name| relaxable_terms.contains(name))
2763    });
2764    if !any_relaxed {
2765        return base.clone();
2766    }
2767    // Relaxed prior for a relaxable smooth coordinate, chosen by regime (see the
2768    // block above): the firth one-sided barrier (`Flat`) when the fit is
2769    // well-determined, a wide-but-curved symmetric Gaussian when it is
2770    // under-determined and the loop still needs termination curvature.
2771    let relaxed_prior = if underdetermined {
2772        gam_spec::RhoPrior::Normal {
2773            mean: 0.0,
2774            sd: RELAX_UNDERDETERMINED_RHO_SD,
2775        }
2776    } else {
2777        gam_spec::RhoPrior::Flat
2778    };
2779    // DOUBLE-PENALTY NULL-SPACE SELECTION (#1392, mgcv `select=TRUE`). A
2780    // double-penalty smooth carries a second `DoublePenaltyNullspace` ridge on
2781    // the term's penalty null space ({1, x} for a 1-D bend) whose only job is
2782    // selection: drive its λ UP (toward the prior's finite well-penalized mode
2783    // λ* = θ², not to ∞) to shrink the null-space (linear) component OUT when
2784    // the data does not support it, exactly as mgcv's `select=TRUE` adds a
2785    // null-space penalty. On an over-parameterized `p > n` fit
2786    // (`wine_gamair`: 5 `ps` smooths on ~26 rows) the symmetric relaxed prior
2787    // above leaves this ridge's outer score flat on the select-out side, so REML
2788    // stalls it at λ ≈ 0.11 — the null space is kept, the EDF rails up, and
2789    // held-out prediction collapses (#1392). The RANGE-space (`Primary`) bending
2790    // coordinate's smoothing selection must NOT be touched, so this select-out
2791    // bias is gated to `DoublePenaltyNullspace` coordinates only and is applied
2792    // ONLY in the under-determined regime — in the well-determined regime the
2793    // relaxable coordinates stay byte-flat (`Flat`) so a clean `n > p` fit is
2794    // unchanged (no regression on ordinary smooth recovery).
2795    //
2796    // The strong select-out PC prior is applied to the `DoublePenaltyNullspace`
2797    // coordinate ONLY in the UNDER-DETERMINED regime, where the outer score is
2798    // genuinely flat on the select-out side and REML needs the active push. In the
2799    // WELL-DETERMINED regime the null space gets the wide
2800    // `nullspace_degeneracy_prior` instead (see below) — an active select-out mode
2801    // there would over-shrink a genuinely-supported collinear null space (#1476).
2802    // The RANGE-space (`Primary`) bending coordinate is untouched (stays `Flat`
2803    // when well-determined), so ordinary single-smooth recovery is unchanged.
2804    //
2805    let nullspace_select_prior = gam_spec::RhoPrior::PenalizedComplexity {
2806        upper: NULLSPACE_SELECT_PC_UPPER,
2807        tail_prob: NULLSPACE_SELECT_PC_TAIL_PROB,
2808    };
2809    // WELL-DETERMINED NULL-SPACE DEGENERACY BREAKER (#1476). When the fit is
2810    // well-determined (`n ≥ 2·p`) the strong `nullspace_select_prior` above is the
2811    // WRONG tool for the Gaussian null-space coordinate: its finite well-penalized
2812    // mode at `λ* = θ² ≈ 8483` is an aggressive select-OUT pull that drags a
2813    // GENUINELY-SUPPORTED null space (a real linear/constant component) toward
2814    // collapse — the #1476 over-shrink. But leaving the coordinate fully `Flat`
2815    // (the previous well-determined behaviour) is the OTHER failure: under
2816    // concurvity (`s(x1)+s(x2)`, corr ≈ 0.9) the two smooths' null-space (linear)
2817    // directions are near-collinear, so the joint REML objective is essentially
2818    // FLAT along the "transfer the shared linear signal between the two smooths"
2819    // ridge; with zero curvature on that coordinate REML cannot certify an
2820    // interior stationary point and one smooth's `λ_nullspace` rails to the ρ
2821    // bound (≈1e13), annihilating its genuine linear signal to `EDF ≈ 0` while the
2822    // other absorbs it. The principled fix is NEITHER a select-out mode NOR a
2823    // flat coordinate: it is a WIDE, weakly-informative symmetric Gaussian that
2824    // contributes strictly-positive termination curvature `1/sd²` (breaking the
2825    // concurvity flat-ridge degeneracy so REML lands an interior allocation) while
2826    // its gradient `ρ/sd²` at any plausible optimum is negligible — so REML, not
2827    // the prior, chooses how the shared linear signal is split. This adds no
2828    // directional select-out bias, so it does NOT over-shrink a supported null
2829    // space (#1476); a genuinely-UNSUPPORTED null space is still selected out
2830    // because REML's own score drives its `λ` up and the weak symmetric pull
2831    // barely opposes it (#1266 irrelevant-covariate shrinkage, #1371 single-smooth
2832    // recovery preserved). The strong PC select-out remains in the
2833    // UNDER-DETERMINED regime, where the score IS flat on the select-out side and
2834    // REML needs the active push (#1392 wine `p > n`).
2835    let nullspace_degeneracy_prior = gam_spec::RhoPrior::Normal {
2836        mean: 0.0,
2837        sd: NULLSPACE_WELLDET_DEGENERACY_RHO_SD,
2838    };
2839    let per_coord = coords
2840        .iter()
2841        .map(|info| {
2842            let relax = info
2843                .termname
2844                .as_deref()
2845                .is_some_and(|name| relaxable_terms.contains(name));
2846            if !relax {
2847                return base.clone();
2848            }
2849            let is_nullspace =
2850                matches!(info.penalty.source, PenaltySource::DoublePenaltyNullspace);
2851            // The relaxed per-coordinate prior is FAMILY-AGNOSTIC: the choice
2852            // depends only on the coordinate's role (bending vs null-space
2853            // selection) and on whether the data over-determines the model, NOT
2854            // on the response family or link. (Length-safety — the only thing the
2855            // family/link can break via auxiliary ρ coordinates — is already
2856            // gated above by `length_safe`; reaching this point means the inner ρ
2857            // aligns 1:1 with `penaltyinfo` for Gaussian and non-Gaussian alike.)
2858            //
2859            // The previous code split here on `gaussian_identity` and pinned the
2860            // non-Gaussian null-space coordinate to the AGGRESSIVE PC select-out
2861            // prior in BOTH determinacy regimes. That select-out prior has a
2862            // finite well-penalized mode at λ* ≈ θ² ≈ 8483, which carves a SECOND,
2863            // deep basin into the 2-D (bending, null-space) outer REML surface at
2864            // large λ_null. On a well-determined non-Gaussian double-penalty `ps`
2865            // smooth the outer ARC then has two competing basins — the genuine
2866            // bending optimum and the prior-induced high-λ_null shelf — and the
2867            // expensive non-Gaussian multi-start lands the wrong one: the fit
2868            // ships a right-boundary blow-up (Tweedie `s(x)` pred ≈ 1.4–2.0× truth
2869            // at x=1 on data whose null space is unsupported) and, on the hard
2870            // seeds, a falsely-"converged" EDF-inflated under-smooth (#1477; the
2871            // same genus as the #1426 Gamma/log overfit). The Gaussian path does
2872            // NOT do this — #1476 deliberately switched its well-determined
2873            // null-space coordinate to the wide, weakly-informative degeneracy
2874            // prior precisely because the active select-out over-shrinks /
2875            // destabilises a well-determined fit. Non-Gaussian needs the identical
2876            // treatment, so the determinacy gate now applies to BOTH families:
2877            //
2878            //   * BENDING (range-space) coordinate → `relaxed_prior` (firth
2879            //     one-sided barrier when well-determined = pure REML = mgcv; wide
2880            //     #1089 `Normal` when under-determined).
2881            //   * NULL-SPACE selection coordinate → the AGGRESSIVE PC select-out
2882            //     ONLY when under-determined (`p > n`, #1392 wine: the outer score
2883            //     is flat on the select-out side and REML needs the active push);
2884            //     otherwise the gentle, wide degeneracy prior (#1476), which adds
2885            //     termination curvature without biasing which λ_null REML lands on
2886            //     — so a genuinely-unsupported null space is still selected out by
2887            //     REML's own score (the sin-data linear trend → λ_null large) and a
2888            //     genuinely-supported one is not over-shrunk.
2889            if is_nullspace {
2890                if underdetermined {
2891                    nullspace_select_prior.clone()
2892                } else {
2893                    nullspace_degeneracy_prior.clone()
2894                }
2895            } else {
2896                relaxed_prior.clone()
2897            }
2898        })
2899        .collect::<Vec<_>>();
2900    gam_spec::RhoPrior::Independent(per_coord)
2901}
2902
2903/// Standard deviation of the wide, weakly-informative symmetric `Normal` prior
2904/// placed on a relaxable smooth's log-λ coordinates when the fit is
2905/// under-determined (`n < 2·p`); see [`relax_smoothing_rho_prior`].
2906///
2907/// Chosen so that ±2σ spans the entire feasible ρ range (the outer optimiser
2908/// bounds `|ρ| ≤ 30`): the prior contributes strictly-positive termination
2909/// curvature `1/sd²` to the outer Hessian (the #1089 requirement that the REML
2910/// loop certify a stationary point on a `p > n` design) while its gradient drag
2911/// at the heavily-smoothed REML optimum is negligible, so pure REML — matching
2912/// mgcv — selects λ. Reducing it toward the old `sd = 3` re-introduces the
2913/// #1392 under-smoothing drag; widening it further weakens termination
2914/// curvature without further benefit.
2915const RELAX_UNDERDETERMINED_RHO_SD: f64 = 15.0;
2916
2917/// Distance-scale bound `upper` (`P(d > upper) = tail_prob` on the marginal-SD
2918/// scale `d = exp(-ρ/2)`) of the penalized-complexity prior placed on a
2919/// relaxable smooth's `DoublePenaltyNullspace` selection coordinate when the fit
2920/// is under-determined (`n < 2·p`); see [`relax_smoothing_rho_prior`].
2921///
2922/// The null-space ridge exists only to SELECT the linear/constant null-space
2923/// component out (mgcv `select=TRUE`): we want its `λ` driven UP (`d → 0`)
2924/// unless the data clearly buys the null-space wiggle. The PC prior is the
2925/// convex bowl `C(ρ) = ρ/2 + θ e^{-ρ/2}` with the steep exponential wall on the
2926/// `λ → 0` (null space kept, `d > upper`) side and a FINITE interior mode at
2927/// `ρ* = 2 ln θ` (`λ* = θ²`). A small `upper` puts that wall close in, so the
2928/// coordinate's λ is selected up toward the well-penalized mode; the data can
2929/// still keep the null space when it genuinely earns it (the over-smoothing side
2930/// of the bowl, gradient `→ +1/2` only in the far tail, pulls ρ back DOWN toward
2931/// λ* — there is no λ → ∞ runaway). `0.05` places the wall at a marginal-SD
2932/// scale two decades below unit, biasing toward select-out on the
2933/// over-parameterized `p > n` wine fit while staying weakly informative.
2934const NULLSPACE_SELECT_PC_UPPER: f64 = 0.05;
2935
2936/// Tail probability `α` (`P(d > upper) = α`) calibrating the rate
2937/// `θ = −ln(α)/upper` of the [`NULLSPACE_SELECT_PC_UPPER`] penalized-complexity
2938/// select-out prior. A small `α` makes the wall against the kept-null-space
2939/// (`λ → 0`) side steep; combined with the small `upper` it yields a strong
2940/// θ ≈ 92 so REML moves the under-determined null-space ridge off its stalled
2941/// λ ≈ 0.11 toward select-out. The PC bowl has a FINITE mode at `λ* = θ² ≈ 8483`
2942/// (`ρ* = 2 ln θ ≈ 9.05`), NOT a hard `λ → ∞` cap: beyond the mode the gradient
2943/// turns positive (approaching `+1/2` only as `ρ → +∞`) and, the objective being
2944/// minimized, pulls ρ back DOWN toward λ*. See [`relax_smoothing_rho_prior`].
2945const NULLSPACE_SELECT_PC_TAIL_PROB: f64 = 0.01;
2946
2947fn adaptive_fit_options_base(options: &FitOptions, design: &TermCollectionDesign) -> FitOptions {
2948    FitOptions {
2949        latent_cloglog: options.latent_cloglog,
2950        mixture_link: options.mixture_link.clone(),
2951        optimize_mixture: options.optimize_mixture,
2952        sas_link: options.sas_link,
2953        optimize_sas: options.optimize_sas,
2954        compute_inference: options.compute_inference,
2955        skip_rho_posterior_inference: options.skip_rho_posterior_inference,
2956        max_iter: options.max_iter,
2957        tol: options.tol,
2958        nullspace_dims: design.nullspace_dims.clone(),
2959        linear_constraints: design.linear_constraints.clone(),
2960        firth_bias_reduction: options.firth_bias_reduction,
2961        adaptive_regularization: None,
2962        penalty_shrinkage_floor: options.penalty_shrinkage_floor,
2963        // Propagate user-supplied rho_prior so the baseline/refit and the
2964        // joint optimizer minimize the same REML objective.
2965        rho_prior: options.rho_prior.clone(),
2966        kronecker_penalty_system: design.kronecker_penalty_system(),
2967        kronecker_factored: design
2968            .smooth
2969            .terms
2970            .iter()
2971            .find_map(|t| t.kronecker_factored.clone()),
2972        persist_warm_start_disk: options.persist_warm_start_disk,
2973    }
2974}
2975
2976fn superseded_fit_options(options: &FitOptions) -> FitOptions {
2977    let mut fit_options = options.clone();
2978    fit_options.skip_rho_posterior_inference = true;
2979    fit_options
2980}
2981
2982#[derive(Clone)]
2983struct BoundedLinearTermMeta {
2984    col_idx: usize,
2985    min: f64,
2986    max: f64,
2987    prior: BoundedCoefficientPriorSpec,
2988}
2989
2990/// β-dependent effective Jacobian for the bounded-linear fit block.
2991///
2992/// Each bounded coefficient enters the linear predictor non-linearly, as
2993/// `β = min + width·σ(θ)`, and is supplied to the solver through the family
2994/// adapter's offset rather than the linear design. To keep that contribution
2995/// out of the *linear* design the fit places a deliberately **zeroed**
2996/// placeholder column for every bounded term in the block design
2997/// (see `fit_bounded_term_collection_with_design`). The pre-fit
2998/// identifiability audit, however, assesses block rank by reading each block's
2999/// effective Jacobian — and a zeroed column reads as a structural rank
3000/// deficiency, so without this callback the audit refuses *every* bounded
3001/// model before fitting begins.
3002///
3003/// This callback reports the model's true Jacobian column for each bounded
3004/// term, `∂η_i/∂θ = (dβ/dθ)·x_i`, so the audit inspects the same geometry the
3005/// solver actually fits. Because `dβ/dθ = width·σ(θ)(1−σ(θ))` is strictly
3006/// positive for finite θ and `width > 0`, a bounded column is rank-deficient
3007/// in the audit exactly when its underlying covariate is genuinely collinear
3008/// with the rest of the design — never merely because the placeholder was
3009/// zeroed. The callback is consumed only by the identifiability audit /
3010/// canonicalisation; the inner PIRLS solve drives η through the
3011/// [`BoundedLinearFamily`] adapter, so reporting the non-zeroed Jacobian here
3012/// does not double-count the bounded contribution.
3013struct BoundedEffectiveJacobian {
3014    design: Array2<f64>,
3015    bounded_terms: Vec<BoundedLinearTermMeta>,
3016}
3017
3018impl BlockEffectiveJacobian for BoundedEffectiveJacobian {
3019    fn effective_jacobian_rows(
3020        &self,
3021        state: &FamilyLinearizationState<'_>,
3022        rows: std::ops::Range<usize>,
3023    ) -> Result<Array2<f64>, String> {
3024        let p = self.design.ncols();
3025        let n = self.design.nrows();
3026        let rows = rows.start.min(n)..rows.end.min(n);
3027        if !state.beta.is_empty() {
3028            if state.beta.len() != p {
3029                return Err(format!(
3030                    "BoundedEffectiveJacobian::effective_jacobian_at: beta length {} != design \
3031                     ncols {p}",
3032                    state.beta.len(),
3033                ));
3034            }
3035            if state.beta.iter().any(|v| v.is_nan()) {
3036                return Err(
3037                    "BoundedEffectiveJacobian::effective_jacobian_at: beta contains NaN"
3038                        .to_string(),
3039                );
3040            }
3041        }
3042        let mut jac = self
3043            .design
3044            .slice(ndarray::s![rows.start..rows.end, ..])
3045            .to_owned();
3046        for term in &self.bounded_terms {
3047            let theta = if state.beta.is_empty() {
3048                0.0
3049            } else {
3050                state.beta[term.col_idx]
3051            };
3052            let (_, _, db_dtheta, _, _) = bounded_latent_derivatives(theta, term.min, term.max);
3053            jac.column_mut(term.col_idx).mapv_inplace(|v| v * db_dtheta);
3054        }
3055        Ok(jac)
3056    }
3057}
3058
3059#[derive(Clone)]
3060struct BoundedLinearFamily {
3061    family: LikelihoodSpec,
3062    latent_cloglog_state: Option<LatentCLogLogState>,
3063    mixture_link_state: Option<MixtureLinkState>,
3064    sas_link_state: Option<SasLinkState>,
3065    y: Array1<f64>,
3066    weights: Array1<f64>,
3067    design: Array2<f64>,
3068    designzeroed: Array2<f64>,
3069    offset: Array1<f64>,
3070    bounded_terms: Vec<BoundedLinearTermMeta>,
3071}
3072
3073#[derive(Clone)]
3074struct StandardFamilyObservationState {
3075    eta: Array1<f64>,
3076    mu: Array1<f64>,
3077    score: Array1<f64>,
3078    fisherweight: Array1<f64>,
3079    neghessian_eta: Array1<f64>,
3080    neghessian_eta_derivative: Array1<f64>,
3081    log_likelihood: f64,
3082}
3083
3084fn bounded_logit(z: f64) -> f64 {
3085    let zc = z.clamp(1e-12, 1.0 - 1e-12);
3086    (zc / (1.0 - zc)).ln()
3087}
3088
3089fn stable_sigmoid(theta: f64) -> f64 {
3090    if theta >= 0.0 {
3091        let exp_neg = (-theta).exp();
3092        1.0 / (1.0 + exp_neg)
3093    } else {
3094        let exp_pos = theta.exp();
3095        exp_pos / (1.0 + exp_pos)
3096    }
3097}
3098
3099fn bounded_latent_to_user(theta: f64, min: f64, max: f64) -> (f64, f64, f64) {
3100    let z = stable_sigmoid(theta);
3101    let width = max - min;
3102    let beta = min + width * z;
3103    let db_dtheta = width * z * (1.0 - z);
3104    (beta, z, db_dtheta)
3105}
3106
3107/// Invert the bounded interval transform: given a user-scale coefficient
3108/// `beta` in the open interval `(min, max)`, return the latent coordinate
3109/// `theta` with `bounded_latent_to_user(theta, min, max).0 == beta`.
3110///
3111/// This is the exact inverse of the logistic interval map used by the bounded
3112/// custom family: `z = (beta - min)/(max - min)` (the normalized position in
3113/// the interval) and `theta = logit(z)`. The normalized position is clamped
3114/// strictly inside `(0, 1)` (mirroring `bounded_logit`) so a coefficient that
3115/// sits numerically at a boundary maps to a large-but-finite latent value
3116/// rather than `±∞`.
3117fn bounded_user_to_latent(beta: f64, min: f64, max: f64) -> f64 {
3118    let width = max - min;
3119    if width <= 0.0 || !width.is_finite() {
3120        return 0.0;
3121    }
3122    let z = (beta - min) / width;
3123    bounded_logit(z)
3124}
3125
3126/// One bounded coefficient column for posterior sampling: its position in the
3127/// (internal, conditioned) coefficient vector and the interval bounds expressed
3128/// on that same internal scale.
3129#[derive(Debug, Clone, Copy)]
3130pub struct BoundedSampleColumn {
3131    /// Column index into the internal (conditioned) coefficient vector.
3132    pub col_idx: usize,
3133    /// Lower interval bound on the internal scale.
3134    pub min: f64,
3135    /// Upper interval bound on the internal scale.
3136    pub max: f64,
3137}
3138
3139/// Exact posterior draws for a model with `bounded()` coefficients.
3140///
3141/// The bounded custom family fits each bounded coefficient as a smooth interval
3142/// transform `beta = min + (max - min)·sigmoid(theta)` of an unconstrained
3143/// latent `theta`. The Laplace approximation is *Gaussian on the latent scale*
3144/// — that is precisely the scale on which the fit treats the coefficient as an
3145/// unconstrained, locally-quadratic parameter. Sampling a Gaussian directly on
3146/// the user (bounded) scale is wrong twice over: it can place mass outside
3147/// `[min, max]`, and it discards the boundary-induced skew that the nonlinear
3148/// map produces. This routine instead draws `theta ~ N(theta_mode, H_latent^{-1})`
3149/// and pushes every draw through the *exact* interval map, so user-scale draws
3150/// always lie strictly inside the interval and carry the correct skew.
3151///
3152/// Coordinate bookkeeping. The caller supplies the user-scale mode `beta_user`
3153/// and the user-scale penalized Hessian `user_hessian` (both in *internal /
3154/// conditioned* coordinates — i.e. before `backtransform_*` to the original
3155/// data scale) together with the internal-scale bounds for each bounded column.
3156/// The user-scale Hessian relates to the latent-scale Hessian by the diagonal
3157/// delta-method Jacobian `J = diag(db/dtheta)`:
3158///   `H_user = J^{-1} H_latent J^{-1}`  ⇒  `H_latent = J H_user J`,
3159/// which is exactly the inverse of `transform_bounded_latent_precision_to_user_internal`.
3160/// Non-bounded columns have `J_ii = 1`, so they are sampled as the ordinary
3161/// Gaussian Laplace draw and returned unchanged.
3162///
3163/// Dispersion. `user_hessian` is the UNSCALED penalized Hessian `H_user`
3164/// (unit implicit dispersion). For a free-dispersion family the latent
3165/// posterior covariance is `φ̂·H_latent⁻¹`, so the caller passes
3166/// `sqrt_cov_scale = √φ̂` (the coefficient-covariance scale `√σ̂²` for a
3167/// profiled Gaussian, `1` for fixed-scale families like Binomial) and every
3168/// latent perturbation is multiplied by it. This makes the draw covariance
3169/// `sqrt_cov_scale² · H_latent⁻¹`, matching the fit's reported
3170/// `Vb = cov_scale·H_user⁻¹` exactly (gam#1514) — without it a Gaussian
3171/// bounded slope's draws were ~`1/σ̂` too wide.
3172///
3173/// Returns the draws as a `(n_draws, p)` matrix on the *internal* user scale
3174/// (still conditioned); the caller back-transforms to the original data scale
3175/// with the same conditioning it used for the point estimate.
3176pub fn sample_bounded_latent_posterior_internal(
3177    beta_user: &Array1<f64>,
3178    user_hessian: &Array2<f64>,
3179    bounded_columns: &[BoundedSampleColumn],
3180    n_draws: usize,
3181    sqrt_cov_scale: f64,
3182    base_seed: u64,
3183) -> Result<Array2<f64>, EstimationError> {
3184    let p = beta_user.len();
3185    if user_hessian.nrows() != p || user_hessian.ncols() != p {
3186        crate::bail_invalid_estim!(
3187            "bounded posterior sampling dimension mismatch: mode has {p} entries, user Hessian is {}x{}",
3188            user_hessian.nrows(),
3189            user_hessian.ncols()
3190        );
3191    }
3192
3193    // Latent mode and delta-method Jacobian, column by column.
3194    let mut theta_mode = beta_user.clone();
3195    let mut jac_diag = Array1::<f64>::ones(p);
3196    for bc in bounded_columns {
3197        if bc.col_idx >= p {
3198            crate::bail_invalid_estim!(
3199                "bounded posterior sampling: bounded column index {} out of range for {p} coefficients",
3200                bc.col_idx
3201            );
3202        }
3203        let theta_i = bounded_user_to_latent(beta_user[bc.col_idx], bc.min, bc.max);
3204        let (_, _, db_dtheta) = bounded_latent_to_user(theta_i, bc.min, bc.max);
3205        theta_mode[bc.col_idx] = theta_i;
3206        // Guard against a degenerate (numerically vanishing) Jacobian at a
3207        // coefficient pinned hard against a boundary: floor the slope so the
3208        // latent precision stays finite and the draw simply collapses onto the
3209        // boundary, which is the correct limiting posterior.
3210        jac_diag[bc.col_idx] = db_dtheta.max(1e-12);
3211    }
3212
3213    // H_latent = J H_user J  (J diagonal). This is the exact inverse of the
3214    // user-scale precision transform applied at fit time.
3215    let mut h_latent = user_hessian.clone();
3216    for i in 0..p {
3217        let ji = jac_diag[i];
3218        if ji != 1.0 {
3219            h_latent.row_mut(i).mapv_inplace(|v| v * ji);
3220            h_latent.column_mut(i).mapv_inplace(|v| v * ji);
3221        }
3222    }
3223
3224    // Draw theta ~ N(theta_mode, H_latent^{-1}) via the Cholesky of H_latent:
3225    // L Lᵀ = H_latent, solve Lᵀ δ = ε so Var(δ) = H_latent^{-1}.
3226    use gam_linalg::faer_ndarray::FaerCholesky as _;
3227    use rand::SeedableRng as _;
3228    let chol = h_latent.cholesky(faer::Side::Lower).map_err(|err| {
3229        EstimationError::InvalidInput(format!(
3230            "bounded posterior sampling: Cholesky of the latent penalized Hessian failed: {err:?}"
3231        ))
3232    })?;
3233    let l = chol.lower_triangular();
3234
3235    let mut draws = Array2::<f64>::zeros((n_draws, p));
3236    let mut eps = Array1::<f64>::zeros(p);
3237    let mut delta = Array1::<f64>::zeros(p);
3238    let mut rng = rand::rngs::StdRng::seed_from_u64(base_seed);
3239    for k in 0..n_draws {
3240        for e in eps.iter_mut() {
3241            *e = standard_normal_draw(&mut rng);
3242        }
3243        solve_lower_transpose_into(&l, &eps, &mut delta);
3244        for i in 0..p {
3245            // δ has covariance `H_latent⁻¹`; scaling by √cov_scale lifts it to
3246            // the dispersion-correct posterior covariance `cov_scale·H_latent⁻¹`.
3247            draws[(k, i)] = theta_mode[i] + sqrt_cov_scale * delta[i];
3248        }
3249        // Push bounded columns through the exact interval map so every draw is
3250        // strictly inside (min, max); leave unconstrained columns untouched.
3251        for bc in bounded_columns {
3252            let (beta_draw, _, _) = bounded_latent_to_user(draws[(k, bc.col_idx)], bc.min, bc.max);
3253            draws[(k, bc.col_idx)] = beta_draw;
3254        }
3255    }
3256
3257    Ok(draws)
3258}
3259
3260/// Box-Muller standard-normal draw (kept local so the bounded sampler does not
3261/// depend on the HMC module's RNG plumbing).
3262#[inline]
3263fn standard_normal_draw<R: rand::Rng + ?Sized>(rng: &mut R) -> f64 {
3264    use rand::RngExt as _;
3265    let u1 = rng.random::<f64>().max(1e-16);
3266    let u2 = rng.random::<f64>();
3267    (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
3268}
3269
3270/// Solve `Lᵀ x = b` for a lower-triangular `L` (back substitution), writing the
3271/// result into `out`. Used to turn a standard-normal `b` into a draw with
3272/// covariance `(L Lᵀ)^{-1}`.
3273fn solve_lower_transpose_into(l: &Array2<f64>, b: &Array1<f64>, out: &mut Array1<f64>) {
3274    let p = l.nrows();
3275    for i in (0..p).rev() {
3276        let mut acc = b[i];
3277        for j in (i + 1)..p {
3278            acc -= l[(j, i)] * out[j];
3279        }
3280        let diag = l[(i, i)];
3281        out[i] = if diag.abs() > 0.0 { acc / diag } else { 0.0 };
3282    }
3283}
3284
3285fn bounded_latent_derivatives(theta: f64, min: f64, max: f64) -> (f64, f64, f64, f64, f64) {
3286    let z = stable_sigmoid(theta);
3287    let width = max - min;
3288    let s = z * (1.0 - z);
3289    let beta = min + width * z;
3290    let db_dtheta = width * s;
3291    let d2b_dtheta2 = width * s * (1.0 - 2.0 * z);
3292    let d3b_dtheta3 = width * s * (1.0 - 6.0 * z + 6.0 * z * z);
3293    (beta, z, db_dtheta, d2b_dtheta2, d3b_dtheta3)
3294}
3295
3296fn bounded_prior_terms(theta: f64, prior: &BoundedCoefficientPriorSpec) -> (f64, f64, f64, f64) {
3297    let (a, b) = match prior {
3298        // `None` means constrained MLE with no extra prior term on the bounded coefficient.
3299        BoundedCoefficientPriorSpec::None => return (0.0, 0.0, 0.0, 0.0),
3300        // Uniform on the normalized user-scale coefficient z in (0, 1). In latent space this is
3301        // exactly the Jacobian term for the logistic transform, up to an additive width constant.
3302        BoundedCoefficientPriorSpec::Uniform => (1.0, 1.0),
3303        BoundedCoefficientPriorSpec::Beta { a, b } => (*a, *b),
3304    };
3305    let z = stable_sigmoid(theta).clamp(1e-12, 1.0 - 1e-12);
3306    let logp = a * z.ln() + b * (1.0 - z).ln();
3307    let grad = a - (a + b) * z;
3308    let neghess = (a + b) * z * (1.0 - z);
3309    let neghess_derivative = (a + b) * z * (1.0 - z) * (1.0 - 2.0 * z);
3310    (logp, grad, neghess, neghess_derivative)
3311}
3312
3313fn evaluate_standard_familyobservations(
3314    family: LikelihoodSpec,
3315    latent_cloglog_state: Option<&LatentCLogLogState>,
3316    mixture_link_state: Option<&MixtureLinkState>,
3317    sas_link_state: Option<&SasLinkState>,
3318    y: &Array1<f64>,
3319    weights: &Array1<f64>,
3320    eta: &Array1<f64>,
3321) -> Result<StandardFamilyObservationState, EstimationError> {
3322    const PROB_EPS: f64 = 1e-10;
3323    const MU_DERIV_EPS: f64 = 1e-12;
3324    let n = y.len();
3325    if weights.len() != n || eta.len() != n {
3326        crate::bail_invalid_estim!("bounded family observation size mismatch");
3327    }
3328
3329    let mut mu = Array1::<f64>::zeros(n);
3330    let mut score = Array1::<f64>::zeros(n);
3331    let mut fisherweight = Array1::<f64>::zeros(n);
3332    let mut neghessian_eta = Array1::<f64>::zeros(n);
3333    let mut neghessian_eta_derivative = Array1::<f64>::zeros(n);
3334    let mut log_likelihood = 0.0;
3335
3336    for i in 0..n {
3337        let w = weights[i].max(0.0);
3338        let yi = y[i];
3339        let eta_i = eta[i];
3340        match (&family.response, &family.link) {
3341            (ResponseFamily::Gaussian, _) => {
3342                let resid = yi - eta_i;
3343                mu[i] = eta_i;
3344                score[i] = w * resid;
3345                fisherweight[i] = w.max(MU_DERIV_EPS);
3346                neghessian_eta[i] = w;
3347                neghessian_eta_derivative[i] = 0.0;
3348                log_likelihood += -0.5 * w * resid * resid;
3349            }
3350            (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Logit)) => {
3351                let jet = logit_inverse_link_jet5(eta_i);
3352                mu[i] = jet.mu;
3353                score[i] = w * (yi - jet.mu);
3354                fisherweight[i] = jet.d1.max(MU_DERIV_EPS);
3355                neghessian_eta[i] = jet.d1;
3356                neghessian_eta_derivative[i] = jet.d2;
3357                let logmu = -gam_linalg::utils::stable_softplus(-eta_i);
3358                let log_one_minusmu = -gam_linalg::utils::stable_softplus(eta_i);
3359                log_likelihood += w * (yi * logmu + (1.0 - yi) * log_one_minusmu);
3360            }
3361            (ResponseFamily::Binomial, _) => {
3362                let inverse_link = if let Some(state) = latent_cloglog_state {
3363                    Some(InverseLink::LatentCLogLog(*state))
3364                } else if let Some(state) = mixture_link_state {
3365                    Some(InverseLink::Mixture(state.clone()))
3366                } else {
3367                    sas_link_state.map(|state| {
3368                        if family.is_binomial_beta_logistic() {
3369                            InverseLink::BetaLogistic(*state)
3370                        } else {
3371                            InverseLink::Sas(*state)
3372                        }
3373                    })
3374                };
3375                let strategy_spec = LikelihoodSpec {
3376                    response: family.response.clone(),
3377                    link: inverse_link.clone().unwrap_or_else(|| family.link.clone()),
3378                };
3379                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3380                let mu_i_raw = jet.mu;
3381                let dmu_deta_raw = jet.d1;
3382                let mu_i: f64 = mu_i_raw.clamp(PROB_EPS, 1.0 - PROB_EPS);
3383                let dmu_deta = dmu_deta_raw.max(MU_DERIV_EPS);
3384                let d2mu_deta2 = jet.d2;
3385                let d3mu_deta3 = jet.d3;
3386                let var = (mu_i * (1.0 - mu_i)).max(PROB_EPS);
3387                let lmu = (yi - mu_i) / var;
3388                let lmumu = -(yi / (mu_i * mu_i)) - ((1.0 - yi) / ((1.0 - mu_i) * (1.0 - mu_i)));
3389                let lmumumu = 2.0 * yi / (mu_i * mu_i * mu_i)
3390                    - 2.0 * (1.0 - yi) / ((1.0 - mu_i) * (1.0 - mu_i) * (1.0 - mu_i));
3391                mu[i] = mu_i;
3392                score[i] = w * lmu * dmu_deta;
3393                fisherweight[i] = (w * dmu_deta * dmu_deta / var).max(MU_DERIV_EPS);
3394                neghessian_eta[i] = -w * (lmumu * dmu_deta * dmu_deta + lmu * d2mu_deta2);
3395                neghessian_eta_derivative[i] = -w
3396                    * (lmumumu * dmu_deta * dmu_deta * dmu_deta
3397                        + 3.0 * lmumu * dmu_deta * d2mu_deta2
3398                        + lmu * d3mu_deta3);
3399                log_likelihood += w * (yi * mu_i.ln() + (1.0 - yi) * (1.0 - mu_i).ln());
3400            }
3401            (ResponseFamily::Poisson, _) => {
3402                crate::bail_invalid_estim!(
3403                    "bounded linear terms are not supported for PoissonLog fits"
3404                );
3405            }
3406            (ResponseFamily::Tweedie { .. }, _) => {
3407                crate::bail_invalid_estim!(
3408                    "bounded linear terms are not supported for Tweedie fits"
3409                );
3410            }
3411            (ResponseFamily::NegativeBinomial { .. }, _) => {
3412                crate::bail_invalid_estim!(
3413                    "bounded linear terms are not supported for NegativeBinomial fits"
3414                );
3415            }
3416            (ResponseFamily::Beta { .. }, _) => {
3417                crate::bail_invalid_estim!(
3418                    "bounded linear terms are not supported for BetaLogit fits"
3419                );
3420            }
3421            (ResponseFamily::Gamma, _) => {
3422                crate::bail_invalid_estim!(
3423                    "bounded linear terms are not supported for GammaLog fits"
3424                );
3425            }
3426            (ResponseFamily::RoystonParmar, _) => {
3427                crate::bail_invalid_estim!(
3428                    "bounded linear terms are not supported for survival model fits"
3429                );
3430            }
3431        }
3432    }
3433
3434    Ok(StandardFamilyObservationState {
3435        eta: eta.clone(),
3436        mu,
3437        score,
3438        fisherweight,
3439        neghessian_eta,
3440        neghessian_eta_derivative,
3441        log_likelihood,
3442    })
3443}
3444
3445#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3446enum SpatialAdaptiveHyperKind {
3447    LogLambdaMagnitude,
3448    LogLambdaGradient,
3449    LogLambdaCurvature,
3450    LogEpsilonMagnitude,
3451    LogEpsilonGradient,
3452    LogEpsilonCurvature,
3453}
3454
3455impl SpatialAdaptiveHyperKind {
3456    fn component_index(self) -> usize {
3457        match self {
3458            SpatialAdaptiveHyperKind::LogLambdaMagnitude
3459            | SpatialAdaptiveHyperKind::LogEpsilonMagnitude => 0,
3460            SpatialAdaptiveHyperKind::LogLambdaGradient
3461            | SpatialAdaptiveHyperKind::LogEpsilonGradient => 1,
3462            SpatialAdaptiveHyperKind::LogLambdaCurvature
3463            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => 2,
3464        }
3465    }
3466
3467    fn is_log_lambda(self) -> bool {
3468        matches!(
3469            self,
3470            SpatialAdaptiveHyperKind::LogLambdaMagnitude
3471                | SpatialAdaptiveHyperKind::LogLambdaGradient
3472                | SpatialAdaptiveHyperKind::LogLambdaCurvature
3473        )
3474    }
3475
3476    fn is_log_epsilon(self) -> bool {
3477        matches!(
3478            self,
3479            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
3480                | SpatialAdaptiveHyperKind::LogEpsilonGradient
3481                | SpatialAdaptiveHyperKind::LogEpsilonCurvature
3482        )
3483    }
3484}
3485
3486#[derive(Clone, Copy, Debug)]
3487struct SpatialAdaptiveHyperSpec {
3488    cache_index: usize,
3489    kind: SpatialAdaptiveHyperKind,
3490}
3491
3492#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3493enum SpatialAdaptiveExplicitSecondOrderKind {
3494    StructuralZero,
3495    LocalAlphaAlpha,
3496    LocalAlphaEta,
3497    SharedEtaEta,
3498}
3499
3500/// Penalty family selected within one adaptive smooth cache. The component index
3501/// (0/1/2) used throughout the runtime caches maps onto these three operators:
3502/// the scalar magnitude operator `d0`, the grouped gradient operator `d1`, and
3503/// the grouped curvature operator `d2`.
3504#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3505enum AdaptiveComponent {
3506    Magnitude,
3507    Gradient,
3508    Curvature,
3509}
3510
3511impl AdaptiveComponent {
3512    fn from_index(index: usize) -> Result<Self, String> {
3513        match index {
3514            0 => Ok(AdaptiveComponent::Magnitude),
3515            1 => Ok(AdaptiveComponent::Gradient),
3516            2 => Ok(AdaptiveComponent::Curvature),
3517            other => Err(SmoothError::invalid_index(format!(
3518                "invalid adaptive component index {}",
3519                other
3520            ))
3521            .into()),
3522        }
3523    }
3524}
3525
3526/// Which hyper-derivative of the adaptive penalty's local pieces to assemble.
3527/// Each variant selects one accessor triple (objective scalar, beta-mixed
3528/// gradient, beta hessian) on the per-component exact state; the operator
3529/// embedding around those accessors is identical across variants.
3530#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3531enum HyperDerivativeKind {
3532    /// First derivative in `log lambda` (rho): the bare penalty pieces.
3533    Rho,
3534    /// First derivative in `log epsilon`.
3535    LogEpsilonFirst,
3536    /// Second derivative in `log epsilon`.
3537    LogEpsilonSecond,
3538}
3539
3540/// Which directional-drift hyper-derivative of the adaptive penalty Hessian to
3541/// assemble: the bare rho drift, or the shared-`log epsilon` drift. Both share
3542/// the per-component direction projection, operator embedding, and global
3543/// embedding; only the directional state accessor differs.
3544#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3545enum HyperDriftKind {
3546    Rho,
3547    LogEpsilon,
3548}
3549
3550impl SpatialAdaptiveHyperSpec {
3551    fn component_index(self) -> usize {
3552        self.kind.component_index()
3553    }
3554
3555    fn explicit_second_order_kind(self, other: Self) -> SpatialAdaptiveExplicitSecondOrderKind {
3556        if self.component_index() != other.component_index() {
3557            return SpatialAdaptiveExplicitSecondOrderKind::StructuralZero;
3558        }
3559        match (
3560            self.kind.is_log_lambda(),
3561            other.kind.is_log_lambda(),
3562            self.kind.is_log_epsilon(),
3563            other.kind.is_log_epsilon(),
3564        ) {
3565            (true, true, false, false) if self.cache_index == other.cache_index => {
3566                SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaAlpha
3567            }
3568            (true, false, false, true) | (false, true, true, false) => {
3569                SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaEta
3570            }
3571            (false, false, true, true) => SpatialAdaptiveExplicitSecondOrderKind::SharedEtaEta,
3572            _ => SpatialAdaptiveExplicitSecondOrderKind::StructuralZero,
3573        }
3574    }
3575}
3576
3577#[derive(Clone, Debug)]
3578struct SpatialAdaptiveTermHyperParams {
3579    lambda: [f64; 3],
3580    epsilon: [f64; 3],
3581}
3582
3583#[derive(Clone)]
3584struct SpatialAdaptiveExactEvaluation {
3585    obs: StandardFamilyObservationState,
3586    adaptive_states: Vec<SpatialPenaltyExactState>,
3587    adaptive_penalty_value: f64,
3588    adaptive_penaltygradient: Array1<f64>,
3589    adaptive_penaltyhessian: Array2<f64>,
3590    fixed_quadraticvalue: f64,
3591    fixed_quadraticgradient: Array1<f64>,
3592    fixed_quadratichessian: Array2<f64>,
3593}
3594
3595#[derive(Clone)]
3596struct CachedSpatialAdaptiveExactEvaluation {
3597    beta: Array1<f64>,
3598    eval: Arc<SpatialAdaptiveExactEvaluation>,
3599}
3600
3601impl SpatialAdaptiveExactEvaluation {
3602    fn total_penalty_value(&self) -> f64 {
3603        self.adaptive_penalty_value + self.fixed_quadraticvalue
3604    }
3605
3606    fn total_penaltygradient(&self) -> Array1<f64> {
3607        &self.adaptive_penaltygradient + &self.fixed_quadraticgradient
3608    }
3609
3610    fn total_penaltyhessian(&self) -> Array2<f64> {
3611        &self.adaptive_penaltyhessian + &self.fixed_quadratichessian
3612    }
3613
3614    fn totalobjectivehessian(&self, design: &Array2<f64>) -> Result<Array2<f64>, String> {
3615        let mut out = xt_diag_x_dense(design.view(), self.obs.neghessian_eta.view())?;
3616        out += &self.total_penaltyhessian();
3617        Ok(out)
3618    }
3619}
3620
3621#[derive(Clone)]
3622struct SpatialAdaptiveExactFamily {
3623    family: LikelihoodSpec,
3624    latent_cloglog_state: Option<LatentCLogLogState>,
3625    mixture_link_state: Option<MixtureLinkState>,
3626    sas_link_state: Option<SasLinkState>,
3627    y: Arc<Array1<f64>>,
3628    weights: Arc<Array1<f64>>,
3629    design: Arc<Array2<f64>>,
3630    offset: Arc<Array1<f64>>,
3631    linear_constraints: Option<LinearInequalityConstraints>,
3632    runtime_caches: Arc<Vec<SpatialOperatorRuntimeCache>>,
3633    adaptive_params: Vec<SpatialAdaptiveTermHyperParams>,
3634    fixed_quadratichessian: Arc<Array2<f64>>,
3635    hyperspecs: Arc<Vec<SpatialAdaptiveHyperSpec>>,
3636    exact_eval_cache: Arc<Mutex<Option<CachedSpatialAdaptiveExactEvaluation>>>,
3637}
3638
3639impl SpatialAdaptiveExactFamily {
3640    fn with_adaptive_params(
3641        &self,
3642        adaptive_params: Vec<SpatialAdaptiveTermHyperParams>,
3643        fixed_quadratichessian: Arc<Array2<f64>>,
3644    ) -> Self {
3645        Self {
3646            family: self.family.clone(),
3647            latent_cloglog_state: self.latent_cloglog_state,
3648            mixture_link_state: self.mixture_link_state.clone(),
3649            sas_link_state: self.sas_link_state,
3650            y: self.y.clone(),
3651            weights: self.weights.clone(),
3652            design: self.design.clone(),
3653            offset: self.offset.clone(),
3654            linear_constraints: self.linear_constraints.clone(),
3655            runtime_caches: self.runtime_caches.clone(),
3656            adaptive_params,
3657            fixed_quadratichessian,
3658            hyperspecs: self.hyperspecs.clone(),
3659            exact_eval_cache: Arc::new(Mutex::new(None)),
3660        }
3661    }
3662
3663    fn total_eta(&self, beta: &Array1<f64>) -> Array1<f64> {
3664        gam_linalg::faer_ndarray::fast_av(self.design.as_ref(), beta) + self.offset.as_ref()
3665    }
3666
3667    fn fixed_quadratic_terms(&self, beta: &Array1<f64>) -> (f64, Array1<f64>) {
3668        let grad = self.fixed_quadratichessian.dot(beta);
3669        let value = 0.5 * beta.dot(&grad);
3670        (value, grad)
3671    }
3672
3673    fn adaptive_penalty_value_only(&self, beta: &Array1<f64>) -> Result<f64, String> {
3674        let mut penalty_value = 0.0;
3675        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
3676            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
3677                format!(
3678                    "missing adaptive parameter block for cache {}",
3679                    cache.termname
3680                )
3681            })?;
3682            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
3683            let state =
3684                SpatialPenaltyExactState::from_beta_local(beta_local, cache, params.epsilon)
3685                    .map_err(|e| e.to_string())?;
3686            penalty_value += params.lambda[0] * state.magnitude.penalty_value();
3687            penalty_value += params.lambda[1] * state.gradient.penalty_value();
3688            penalty_value += params.lambda[2] * state.curvature.penalty_value();
3689        }
3690        Ok(penalty_value)
3691    }
3692
3693    fn zero_hyper_parts(&self) -> (Array1<f64>, Array2<f64>) {
3694        let total_dim = self.design.ncols();
3695        (
3696            Array1::<f64>::zeros(total_dim),
3697            Array2::<f64>::zeros((total_dim, total_dim)),
3698        )
3699    }
3700
3701    fn embed_local_hyper_parts(
3702        &self,
3703        coeff_range: &Range<usize>,
3704        local_grad: &Array1<f64>,
3705        local_hess: &Array2<f64>,
3706    ) -> (Array1<f64>, Array2<f64>) {
3707        let (mut beta_mixed, mut betahessian) = self.zero_hyper_parts();
3708        beta_mixed
3709            .slice_mut(s![coeff_range.clone()])
3710            .assign(local_grad);
3711        betahessian
3712            .slice_mut(s![coeff_range.clone(), coeff_range.clone()])
3713            .assign(local_hess);
3714        (beta_mixed, betahessian)
3715    }
3716
3717    fn embed_local_hyper_hessian(
3718        &self,
3719        coeff_range: &Range<usize>,
3720        local_hess: &Array2<f64>,
3721    ) -> Array2<f64> {
3722        let total_dim = self.design.ncols();
3723        let mut out = Array2::<f64>::zeros((total_dim, total_dim));
3724        out.slice_mut(s![coeff_range.clone(), coeff_range.clone()])
3725            .assign(local_hess);
3726        out
3727    }
3728
3729    /// Unified per-block hyper-derivative assembly. Owns the shared cache /
3730    /// hyperparameter / exact-state lookup, the component -> operator selection
3731    /// (scalar magnitude `d0`, grouped gradient `d1`, grouped curvature `d2`),
3732    /// and the global embedding via [`Self::embed_local_hyper_parts`]. The only
3733    /// piece that varies with `derivative` is the per-component accessor triple
3734    /// (objective scalar, beta-mixed gradient, beta hessian) read off the exact
3735    /// state. Returns `(objective, beta_mixed, betahessian)`, each already
3736    /// scaled by the component's penalty weight `lambda`.
3737    fn adaptive_block_eval(
3738        &self,
3739        eval: &SpatialAdaptiveExactEvaluation,
3740        cache_idx: usize,
3741        component: AdaptiveComponent,
3742        derivative: HyperDerivativeKind,
3743    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3744        let cache = self
3745            .runtime_caches
3746            .get(cache_idx)
3747            .ok_or_else(|| format!("adaptive cache index {} out of bounds", cache_idx))?;
3748        let params = self
3749            .adaptive_params
3750            .get(cache_idx)
3751            .ok_or_else(|| format!("adaptive hyperparameter block {} out of bounds", cache_idx))?;
3752        let state = eval
3753            .adaptive_states
3754            .get(cache_idx)
3755            .ok_or_else(|| format!("adaptive exact state index {} out of bounds", cache_idx))?;
3756
3757        let (objective_local, beta_mixed_local, betahessian_local) = match component {
3758            AdaptiveComponent::Magnitude => {
3759                let lambda = params.lambda[0];
3760                let mag = &state.magnitude;
3761                let (objective, gradient_coeff, hessian_diag) = match derivative {
3762                    HyperDerivativeKind::Rho => (
3763                        mag.penalty_value(),
3764                        mag.betagradient_coeff(),
3765                        mag.betahessian_diag(),
3766                    ),
3767                    HyperDerivativeKind::LogEpsilonFirst => (
3768                        mag.log_epsilon_gradient_terms().sum(),
3769                        mag.log_epsilon_betagradient_coeff(),
3770                        mag.log_epsilon_betahessian_diag(),
3771                    ),
3772                    HyperDerivativeKind::LogEpsilonSecond => (
3773                        mag.log_epsilon_hessian_terms().sum(),
3774                        mag.log_epsilon_beta_mixed_second_coeff(),
3775                        mag.log_epsilon_betahessian_second_diag(),
3776                    ),
3777                };
3778                (
3779                    lambda * objective,
3780                    lambda * scalar_operatorgradient(&cache.d0, &gradient_coeff),
3781                    lambda * scalar_operatorhessian(&cache.d0, &hessian_diag),
3782                )
3783            }
3784            AdaptiveComponent::Gradient => {
3785                let lambda = params.lambda[1];
3786                let grad = &state.gradient;
3787                let (objective, gradient_blocks, hessian_blocks) = match derivative {
3788                    HyperDerivativeKind::Rho => (
3789                        grad.penalty_value(),
3790                        grad.betagradient_blocks(),
3791                        grad.betahessian_blocks(),
3792                    ),
3793                    HyperDerivativeKind::LogEpsilonFirst => (
3794                        grad.log_epsilon_gradient_terms().sum(),
3795                        grad.log_epsilon_betagradient_blocks(),
3796                        grad.log_epsilon_betahessian_blocks(),
3797                    ),
3798                    HyperDerivativeKind::LogEpsilonSecond => (
3799                        grad.log_epsilon_hessian_terms().sum(),
3800                        grad.log_epsilon_beta_mixed_second_blocks(),
3801                        grad.log_epsilon_betahessian_second_blocks(),
3802                    ),
3803                };
3804                (
3805                    lambda * objective,
3806                    lambda
3807                        * grouped_operatorgradient(&cache.d1, cache.dimension, &gradient_blocks)
3808                            .map_err(|e| e.to_string())?,
3809                    lambda
3810                        * grouped_operatorhessian(&cache.d1, cache.dimension, &hessian_blocks)
3811                            .map_err(|e| e.to_string())?,
3812                )
3813            }
3814            AdaptiveComponent::Curvature => {
3815                let lambda = params.lambda[2];
3816                let group = cache.dimension * cache.dimension;
3817                let curv = &state.curvature;
3818                let (objective, gradient_blocks, hessian_blocks) = match derivative {
3819                    HyperDerivativeKind::Rho => (
3820                        curv.penalty_value(),
3821                        curv.betagradient_blocks(),
3822                        curv.betahessian_blocks(),
3823                    ),
3824                    HyperDerivativeKind::LogEpsilonFirst => (
3825                        curv.log_epsilon_gradient_terms().sum(),
3826                        curv.log_epsilon_betagradient_blocks(),
3827                        curv.log_epsilon_betahessian_blocks(),
3828                    ),
3829                    HyperDerivativeKind::LogEpsilonSecond => (
3830                        curv.log_epsilon_hessian_terms().sum(),
3831                        curv.log_epsilon_beta_mixed_second_blocks(),
3832                        curv.log_epsilon_betahessian_second_blocks(),
3833                    ),
3834                };
3835                (
3836                    lambda * objective,
3837                    lambda
3838                        * grouped_operatorgradient(&cache.d2, group, &gradient_blocks)
3839                            .map_err(|e| e.to_string())?,
3840                    lambda
3841                        * grouped_operatorhessian(&cache.d2, group, &hessian_blocks)
3842                            .map_err(|e| e.to_string())?,
3843                )
3844            }
3845        };
3846
3847        let (beta_mixed, betahessian) = self.embed_local_hyper_parts(
3848            &cache.coeff_global_range,
3849            &beta_mixed_local,
3850            &betahessian_local,
3851        );
3852        Ok((objective_local, beta_mixed, betahessian))
3853    }
3854
3855    fn adaptive_shared_log_epsilon_parts(
3856        &self,
3857        eval: &SpatialAdaptiveExactEvaluation,
3858        component: usize,
3859    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3860        // Exact shared-log-epsilon first-order pieces:
3861        //
3862        //   J_{eta_p}         = sum_m lambda_{m,p} U_{m,p,eta},
3863        //   J_{beta,eta_p}    = sum_m lambda_{m,p} U_{m,p,beta eta},
3864        //   J_{beta,beta,eta} = sum_m lambda_{m,p} U_{m,p,beta beta eta}.
3865        self.adaptive_shared_block_eval(eval, component, HyperDerivativeKind::LogEpsilonFirst)
3866    }
3867
3868    fn adaptive_shared_log_epsilon_second_parts(
3869        &self,
3870        eval: &SpatialAdaptiveExactEvaluation,
3871        component: usize,
3872    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3873        // Exact shared-log-epsilon second-order pieces:
3874        //
3875        //   J_{eta_p,eta_p}            = sum_m lambda_{m,p} U_{m,p,eta eta},
3876        //   J_{beta,eta_p,eta_p}       = sum_m lambda_{m,p} U_{m,p,beta eta eta},
3877        //   J_{beta,beta,eta_p,eta_p}  = sum_m lambda_{m,p} U_{m,p,beta beta eta eta}.
3878        self.adaptive_shared_block_eval(eval, component, HyperDerivativeKind::LogEpsilonSecond)
3879    }
3880
3881    /// Sum a per-block hyper-derivative across every adaptive term for one shared
3882    /// `log epsilon` coordinate (selected by `component`). The three log-epsilon
3883    /// coordinates are shared globally by penalty type, so each contributes the
3884    /// matching component's block from every cache.
3885    fn adaptive_shared_block_eval(
3886        &self,
3887        eval: &SpatialAdaptiveExactEvaluation,
3888        component: usize,
3889        derivative: HyperDerivativeKind,
3890    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3891        let component = AdaptiveComponent::from_index(component)?;
3892        let (mut score, mut hessian) = self.zero_hyper_parts();
3893        let mut objective = 0.0;
3894        for cache_idx in 0..self.runtime_caches.len() {
3895            let (local_objective, local_score, local_hessian) =
3896                self.adaptive_block_eval(eval, cache_idx, component, derivative)?;
3897            objective += local_objective;
3898            score += &local_score;
3899            hessian += &local_hessian;
3900        }
3901        Ok((objective, score, hessian))
3902    }
3903
3904    fn adaptive_shared_log_epsilon_drift(
3905        &self,
3906        eval: &SpatialAdaptiveExactEvaluation,
3907        component: usize,
3908        direction: &Array1<f64>,
3909    ) -> Result<Array2<f64>, String> {
3910        // Exact shared-log-epsilon Hessian drift:
3911        //
3912        //   T_{eta_p}[u] = sum_m lambda_{m,p} D_beta(U_{m,p,beta beta eta})[u].
3913        let component = AdaptiveComponent::from_index(component)?;
3914        let total_dim = self.design.ncols();
3915        let mut total = Array2::<f64>::zeros((total_dim, total_dim));
3916        for cache_idx in 0..self.runtime_caches.len() {
3917            total += &self.adaptive_block_drift_eval(
3918                eval,
3919                cache_idx,
3920                component,
3921                HyperDriftKind::LogEpsilon,
3922                direction,
3923            )?;
3924        }
3925        Ok(total)
3926    }
3927
3928    fn adaptive_explicit_second_order_parts(
3929        &self,
3930        eval: &SpatialAdaptiveExactEvaluation,
3931        left: SpatialAdaptiveHyperSpec,
3932        right: SpatialAdaptiveHyperSpec,
3933    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3934        // Structural sparsity from the adaptive penalty algebra:
3935        //
3936        //   - alpha_{m,p} / alpha_{n,r} is nonzero only when (m,p) = (n,r),
3937        //   - alpha_{m,p} / eta_r is nonzero only when p = r,
3938        //   - eta_p / eta_r is nonzero only when p = r,
3939        //
3940        // with eta_p contributions summed over all adaptive terms m because the
3941        // three log-epsilon coordinates are shared globally by penalty type.
3942        match left.explicit_second_order_kind(right) {
3943            SpatialAdaptiveExplicitSecondOrderKind::StructuralZero => {
3944                let (score, hessian) = self.zero_hyper_parts();
3945                Ok((0.0, score, hessian))
3946            }
3947            SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaAlpha => self.adaptive_block_eval(
3948                eval,
3949                left.cache_index,
3950                AdaptiveComponent::from_index(left.component_index())?,
3951                HyperDerivativeKind::Rho,
3952            ),
3953            SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaEta => {
3954                let local_alpha = if left.kind.is_log_lambda() {
3955                    left
3956                } else {
3957                    right
3958                };
3959                self.adaptive_block_eval(
3960                    eval,
3961                    local_alpha.cache_index,
3962                    AdaptiveComponent::from_index(local_alpha.component_index())?,
3963                    HyperDerivativeKind::LogEpsilonFirst,
3964                )
3965            }
3966            SpatialAdaptiveExplicitSecondOrderKind::SharedEtaEta => {
3967                self.adaptive_shared_log_epsilon_second_parts(eval, left.component_index())
3968            }
3969        }
3970    }
3971
3972    /// Unified per-block directional-drift assembly. Owns the shared cache /
3973    /// hyperparameter / exact-state lookup, the per-component direction
3974    /// projection through the collocation operators, the operator embedding, and
3975    /// the global embedding via [`Self::embed_local_hyper_hessian`]. The only
3976    /// piece that varies with `drift` is the directional state accessor:
3977    /// [`HyperDriftKind::Rho`] takes the bare directional Hessian drift, while
3978    /// [`HyperDriftKind::LogEpsilon`] takes its `log epsilon` derivative.
3979    fn adaptive_block_drift_eval(
3980        &self,
3981        eval: &SpatialAdaptiveExactEvaluation,
3982        cache_idx: usize,
3983        component: AdaptiveComponent,
3984        drift: HyperDriftKind,
3985        direction: &Array1<f64>,
3986    ) -> Result<Array2<f64>, String> {
3987        let cache = self
3988            .runtime_caches
3989            .get(cache_idx)
3990            .ok_or_else(|| format!("adaptive cache index {} out of bounds", cache_idx))?;
3991        let params = self
3992            .adaptive_params
3993            .get(cache_idx)
3994            .ok_or_else(|| format!("adaptive hyperparameter block {} out of bounds", cache_idx))?;
3995        let state = eval
3996            .adaptive_states
3997            .get(cache_idx)
3998            .ok_or_else(|| format!("adaptive exact state index {} out of bounds", cache_idx))?;
3999        let direction_local = direction.slice(s![cache.coeff_global_range.clone()]);
4000
4001        let local_hessian = match component {
4002            AdaptiveComponent::Magnitude => {
4003                let d0_u = cache.d0.dot(&direction_local);
4004                let mag = &state.magnitude;
4005                let diag = match drift {
4006                    HyperDriftKind::Rho => mag.directionalhessian_diag(&d0_u),
4007                    HyperDriftKind::LogEpsilon => {
4008                        mag.log_epsilon_betahessian_directional_diag(&d0_u)
4009                    }
4010                };
4011                params.lambda[0] * scalar_operatorhessian(&cache.d0, &diag)
4012            }
4013            AdaptiveComponent::Gradient => {
4014                let d1_u = cache.d1.dot(&direction_local);
4015                let direction_blocks = collocationgradient_blocks(&d1_u, cache.dimension)
4016                    .map_err(|e| e.to_string())?;
4017                let grad = &state.gradient;
4018                let blocks = match drift {
4019                    HyperDriftKind::Rho => grad.directionalhessian_blocks(&direction_blocks),
4020                    HyperDriftKind::LogEpsilon => {
4021                        grad.log_epsilon_betahessian_directional_blocks(&direction_blocks)
4022                    }
4023                };
4024                params.lambda[1]
4025                    * grouped_operatorhessian(&cache.d1, cache.dimension, &blocks)
4026                        .map_err(|e| e.to_string())?
4027            }
4028            AdaptiveComponent::Curvature => {
4029                let group = cache.dimension * cache.dimension;
4030                let d2_u = cache.d2.dot(&direction_local);
4031                let direction_blocks =
4032                    collocationhessian_blocks(&d2_u, cache.dimension).map_err(|e| e.to_string())?;
4033                let curv = &state.curvature;
4034                let blocks = match drift {
4035                    HyperDriftKind::Rho => curv.directionalhessian_blocks(&direction_blocks),
4036                    HyperDriftKind::LogEpsilon => {
4037                        curv.log_epsilon_betahessian_directional_blocks(&direction_blocks)
4038                    }
4039                };
4040                params.lambda[2]
4041                    * grouped_operatorhessian(&cache.d2, group, &blocks)
4042                        .map_err(|e| e.to_string())?
4043            }
4044        };
4045
4046        Ok(self.embed_local_hyper_hessian(&cache.coeff_global_range, &local_hessian))
4047    }
4048
4049    fn adaptive_hyper_parts(
4050        &self,
4051        eval: &SpatialAdaptiveExactEvaluation,
4052        hyper: SpatialAdaptiveHyperSpec,
4053    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
4054        match hyper.kind {
4055            // Per-term `log lambda` (rho) hyper-derivative: the bare penalty
4056            // pieces for this cache's selected component.
4057            SpatialAdaptiveHyperKind::LogLambdaMagnitude
4058            | SpatialAdaptiveHyperKind::LogLambdaGradient
4059            | SpatialAdaptiveHyperKind::LogLambdaCurvature => self.adaptive_block_eval(
4060                eval,
4061                hyper.cache_index,
4062                AdaptiveComponent::from_index(hyper.component_index())?,
4063                HyperDerivativeKind::Rho,
4064            ),
4065            // Shared `log epsilon` hyper-derivative: summed across all terms.
4066            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
4067            | SpatialAdaptiveHyperKind::LogEpsilonGradient
4068            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => {
4069                self.adaptive_shared_log_epsilon_parts(eval, hyper.component_index())
4070            }
4071        }
4072    }
4073
4074    fn exact_evaluation_uncached(
4075        &self,
4076        beta: &Array1<f64>,
4077    ) -> Result<SpatialAdaptiveExactEvaluation, String> {
4078        let eta = self.total_eta(beta);
4079        let obs = evaluate_standard_familyobservations(
4080            self.family.clone(),
4081            self.latent_cloglog_state.as_ref(),
4082            self.mixture_link_state.as_ref(),
4083            self.sas_link_state.as_ref(),
4084            &self.y,
4085            &self.weights,
4086            &eta,
4087        )
4088        .map_err(|e| e.to_string())?;
4089        let p = beta.len();
4090        let mut penalty_value = 0.0;
4091        let mut penaltygradient = Array1::<f64>::zeros(p);
4092        let mut penaltyhessian = Array2::<f64>::zeros((p, p));
4093        let mut adaptive_states = Vec::with_capacity(self.runtime_caches.len());
4094
4095        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4096            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4097                format!(
4098                    "missing adaptive parameter block for cache {}",
4099                    cache.termname
4100                )
4101            })?;
4102            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
4103            let state =
4104                SpatialPenaltyExactState::from_beta_local(beta_local, cache, params.epsilon)
4105                    .map_err(|e| e.to_string())?;
4106
4107            let g0 = scalar_operatorgradient(&cache.d0, &state.magnitude.betagradient_coeff());
4108            let gg = grouped_operatorgradient(
4109                &cache.d1,
4110                cache.dimension,
4111                &state.gradient.betagradient_blocks(),
4112            )
4113            .map_err(|e| e.to_string())?;
4114            let gc = grouped_operatorgradient(
4115                &cache.d2,
4116                cache.dimension * cache.dimension,
4117                &state.curvature.betagradient_blocks(),
4118            )
4119            .map_err(|e| e.to_string())?;
4120            let h0 = scalar_operatorhessian(&cache.d0, &state.magnitude.betahessian_diag());
4121            let hg = grouped_operatorhessian(
4122                &cache.d1,
4123                cache.dimension,
4124                &state.gradient.betahessian_blocks(),
4125            )
4126            .map_err(|e| e.to_string())?;
4127            let hc = grouped_operatorhessian(
4128                &cache.d2,
4129                cache.dimension * cache.dimension,
4130                &state.curvature.betahessian_blocks(),
4131            )
4132            .map_err(|e| e.to_string())?;
4133
4134            let lambda0 = params.lambda[0];
4135            let lambdag = params.lambda[1];
4136            let lambdac = params.lambda[2];
4137
4138            penalty_value += lambda0 * state.magnitude.penalty_value();
4139            penalty_value += lambdag * state.gradient.penalty_value();
4140            penalty_value += lambdac * state.curvature.penalty_value();
4141
4142            let range = cache.coeff_global_range.clone();
4143            {
4144                let mut grad_local = penaltygradient.slice_mut(s![range.clone()]);
4145                grad_local += &(g0.mapv(|v| lambda0 * v));
4146                grad_local += &(gg.mapv(|v| lambdag * v));
4147                grad_local += &(gc.mapv(|v| lambdac * v));
4148            }
4149            {
4150                let mut h_local = penaltyhessian.slice_mut(s![range.clone(), range]);
4151                h_local += &h0.mapv(|v| lambda0 * v);
4152                h_local += &hg.mapv(|v| lambdag * v);
4153                h_local += &hc.mapv(|v| lambdac * v);
4154            }
4155
4156            adaptive_states.push(state);
4157        }
4158
4159        let (fixed_quadraticvalue, fixed_quadraticgradient) = self.fixed_quadratic_terms(beta);
4160        Ok(SpatialAdaptiveExactEvaluation {
4161            obs,
4162            adaptive_states,
4163            adaptive_penalty_value: penalty_value,
4164            adaptive_penaltygradient: penaltygradient,
4165            adaptive_penaltyhessian: penaltyhessian,
4166            fixed_quadraticvalue,
4167            fixed_quadraticgradient,
4168            fixed_quadratichessian: self.fixed_quadratichessian.as_ref().clone(),
4169        })
4170    }
4171
4172    fn exact_evaluation(
4173        &self,
4174        beta: &Array1<f64>,
4175    ) -> Result<Arc<SpatialAdaptiveExactEvaluation>, String> {
4176        {
4177            let cache = self
4178                .exact_eval_cache
4179                .lock()
4180                .map_err(|_| "spatial adaptive exact-evaluation cache lock poisoned".to_string())?;
4181            if let Some(cached) = cache.as_ref()
4182                && cached.beta.len() == beta.len()
4183                && cached
4184                    .beta
4185                    .iter()
4186                    .zip(beta.iter())
4187                    .all(|(&left, &right)| left == right)
4188            {
4189                return Ok(Arc::clone(&cached.eval));
4190            }
4191        }
4192
4193        let eval = Arc::new(self.exact_evaluation_uncached(beta)?);
4194        let mut cache = self
4195            .exact_eval_cache
4196            .lock()
4197            .map_err(|_| "spatial adaptive exact-evaluation cache lock poisoned".to_string())?;
4198        *cache = Some(CachedSpatialAdaptiveExactEvaluation {
4199            beta: beta.clone(),
4200            eval: Arc::clone(&eval),
4201        });
4202        Ok(eval)
4203    }
4204
4205    fn exacthessian_directional_derivative_from_evaluation(
4206        &self,
4207        beta: &Array1<f64>,
4208        eval: &SpatialAdaptiveExactEvaluation,
4209        direction: &Array1<f64>,
4210    ) -> Result<Array2<f64>, String> {
4211        assert_eq!(
4212            beta.len(),
4213            direction.len(),
4214            "beta/direction length mismatch",
4215        );
4216        let d_eta = gam_linalg::faer_ndarray::fast_av(self.design.as_ref(), direction);
4217        let mut total = xt_diag_x_dense(
4218            self.design.view(),
4219            (&eval.obs.neghessian_eta_derivative * &d_eta).view(),
4220        )?;
4221        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4222            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4223                format!(
4224                    "missing adaptive parameter block for cache {}",
4225                    cache.termname
4226                )
4227            })?;
4228            let state = eval
4229                .adaptive_states
4230                .get(cache_idx)
4231                .ok_or_else(|| format!("missing adaptive state for cache {}", cache.termname))?;
4232            let direction_local = direction.slice(s![cache.coeff_global_range.clone()]);
4233            let d0_u = cache.d0.dot(&direction_local);
4234            let d1_u = cache.d1.dot(&direction_local);
4235            let d2_u = cache.d2.dot(&direction_local);
4236            let h0 =
4237                scalar_operatorhessian(&cache.d0, &state.magnitude.directionalhessian_diag(&d0_u))
4238                    .mapv(|v| params.lambda[0] * v);
4239            let hg = grouped_operatorhessian(
4240                &cache.d1,
4241                cache.dimension,
4242                &state.gradient.directionalhessian_blocks(
4243                    &collocationgradient_blocks(&d1_u, cache.dimension)
4244                        .map_err(|e| e.to_string())?,
4245                ),
4246            )
4247            .map_err(|e| e.to_string())?
4248            .mapv(|v| params.lambda[1] * v);
4249            let hc = grouped_operatorhessian(
4250                &cache.d2,
4251                cache.dimension * cache.dimension,
4252                &state.curvature.directionalhessian_blocks(
4253                    &collocationhessian_blocks(&d2_u, cache.dimension)
4254                        .map_err(|e| e.to_string())?,
4255                ),
4256            )
4257            .map_err(|e| e.to_string())?
4258            .mapv(|v| params.lambda[2] * v);
4259            let range = cache.coeff_global_range.clone();
4260            let mut local = total.slice_mut(s![range.clone(), range]);
4261            local += &h0;
4262            local += &hg;
4263            local += &hc;
4264        }
4265        Ok(total)
4266    }
4267
4268    /// Exact second directional derivative `D²_β H[u, v]` of the joint
4269    /// (likelihood + adaptive Charbonnier penalty) Hessian, needed so the outer
4270    /// LAML's joint-Jeffreys curvature drift `D_β H_Φ[β̇]` is exact rather than
4271    /// silently dropped (which leaves the outer hypergradient inconsistent with
4272    /// the `½log|H+H_Φ|` objective it folds `H_Φ` into).
4273    ///
4274    /// The data block contributes `Xᵀ diag(ℓ'''(η_i) (Xu)_i (Xv)_i) X`, where
4275    /// `ℓ'''` is the third derivative of the per-observation log-likelihood in
4276    /// `η`. The observation state exposes the working weight `w=−ℓ''` and its
4277    /// first `η`-derivative `w'` (`neghessian_eta_derivative`) but not `w''`, so
4278    /// the exact data term is available only on the **constant-weight** path
4279    /// (`w' ≡ 0`, e.g. Gaussian identity), where `w'' ≡ 0` and the data block
4280    /// second derivative vanishes. On a varying-weight family we return `None`
4281    /// (the safe, pre-existing behavior: the drift degrades to zero rather than
4282    /// to a wrong value) until the observation contract carries `w''`.
4283    ///
4284    /// The penalty block is always exact: with `λ_m G_mᵀ B_m(G_m β) G_m` the
4285    /// per-component penalty Hessian, `D²_β` is `λ_m Σ_k G_mᵀ N_m,k G_m` using the
4286    /// scalar (`second_directionalhessian_diag`) / grouped
4287    /// (`second_directionalhessian_blocks`) fourth-derivative contractions.
4288    fn exacthessian_second_directional_derivative_from_evaluation(
4289        &self,
4290        eval: &SpatialAdaptiveExactEvaluation,
4291        direction_u: &Array1<f64>,
4292        direction_v: &Array1<f64>,
4293    ) -> Result<Option<Array2<f64>>, String> {
4294        let p = self.design.ncols();
4295        // Data block: exact only when the working weight is constant in η.
4296        if eval.obs.neghessian_eta_derivative.iter().any(|&w| w != 0.0) {
4297            return Ok(None);
4298        }
4299        let mut total = Array2::<f64>::zeros((p, p));
4300        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4301            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4302                format!(
4303                    "missing adaptive parameter block for cache {}",
4304                    cache.termname
4305                )
4306            })?;
4307            let state = eval
4308                .adaptive_states
4309                .get(cache_idx)
4310                .ok_or_else(|| format!("missing adaptive state for cache {}", cache.termname))?;
4311            let u_local = direction_u.slice(s![cache.coeff_global_range.clone()]);
4312            let v_local = direction_v.slice(s![cache.coeff_global_range.clone()]);
4313
4314            // Magnitude (scalar d0).
4315            let q0_u = cache.d0.dot(&u_local);
4316            let q0_v = cache.d0.dot(&v_local);
4317            let h0 = scalar_operatorhessian(
4318                &cache.d0,
4319                &state.magnitude.second_directionalhessian_diag(&q0_u, &q0_v),
4320            )
4321            .mapv(|x| params.lambda[0] * x);
4322
4323            // Gradient (grouped d1, block dim = dimension).
4324            let a1 = collocationgradient_blocks(&cache.d1.dot(&u_local), cache.dimension)
4325                .map_err(|e| e.to_string())?;
4326            let b1 = collocationgradient_blocks(&cache.d1.dot(&v_local), cache.dimension)
4327                .map_err(|e| e.to_string())?;
4328            let hg = grouped_operatorhessian(
4329                &cache.d1,
4330                cache.dimension,
4331                &state.gradient.second_directionalhessian_blocks(&a1, &b1),
4332            )
4333            .map_err(|e| e.to_string())?
4334            .mapv(|x| params.lambda[1] * x);
4335
4336            // Curvature (grouped d2, block dim = dimension²).
4337            let a2 = collocationhessian_blocks(&cache.d2.dot(&u_local), cache.dimension)
4338                .map_err(|e| e.to_string())?;
4339            let b2 = collocationhessian_blocks(&cache.d2.dot(&v_local), cache.dimension)
4340                .map_err(|e| e.to_string())?;
4341            let hc = grouped_operatorhessian(
4342                &cache.d2,
4343                cache.dimension * cache.dimension,
4344                &state.curvature.second_directionalhessian_blocks(&a2, &b2),
4345            )
4346            .map_err(|e| e.to_string())?
4347            .mapv(|x| params.lambda[2] * x);
4348
4349            let range = cache.coeff_global_range.clone();
4350            let mut local = total.slice_mut(s![range.clone(), range]);
4351            local += &h0;
4352            local += &hg;
4353            local += &hc;
4354        }
4355        Ok(Some(total))
4356    }
4357}
4358
4359impl CustomFamily for SpatialAdaptiveExactFamily {
4360    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
4361    // flat-prior exact-Newton objective carries no Jeffreys term), so families
4362    // that historically armed the term by default opt back in explicitly.
4363    fn joint_jeffreys_term_required(&self) -> bool {
4364        true
4365    }
4366
4367    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
4368        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4369        let eval = self.exact_evaluation(beta)?;
4370        let mut gradient = fast_atv(&self.design, &eval.obs.score);
4371        gradient -= &eval.total_penaltygradient();
4372        let mut hessian = xt_diag_x_dense(self.design.view(), eval.obs.neghessian_eta.view())?;
4373        hessian += &eval.total_penaltyhessian();
4374        Ok(FamilyEvaluation {
4375            log_likelihood: eval.obs.log_likelihood - eval.total_penalty_value(),
4376            blockworking_sets: vec![BlockWorkingSet::ExactNewton {
4377                gradient,
4378                hessian: SymmetricMatrix::Dense(hessian),
4379            }],
4380        })
4381    }
4382
4383    fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
4384        let state = expect_single_block_state(block_states, "spatial adaptive exact family")?;
4385        let beta = &state.beta;
4386        let obs = evaluate_standard_familyobservations(
4387            self.family.clone(),
4388            self.latent_cloglog_state.as_ref(),
4389            self.mixture_link_state.as_ref(),
4390            self.sas_link_state.as_ref(),
4391            &self.y,
4392            &self.weights,
4393            &state.eta,
4394        )
4395        .map_err(|e| e.to_string())?;
4396        let adaptive_penalty = self.adaptive_penalty_value_only(beta)?;
4397        let (fixed_quadratic, _) = self.fixed_quadratic_terms(beta);
4398        Ok(obs.log_likelihood - adaptive_penalty - fixed_quadratic)
4399    }
4400
4401    fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
4402        ExactNewtonOuterObjective::StrictPseudoLaplace
4403    }
4404
4405    fn exact_newton_joint_hessian(
4406        &self,
4407        block_states: &[ParameterBlockState],
4408    ) -> Result<Option<Array2<f64>>, String> {
4409        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4410        let eval = self.exact_evaluation(beta)?;
4411        Ok(Some(eval.totalobjectivehessian(&self.design)?))
4412    }
4413
4414    fn exact_newton_hessian_directional_derivative(
4415        &self,
4416        block_states: &[ParameterBlockState],
4417        block_idx: usize,
4418        d_beta: &Array1<f64>,
4419    ) -> Result<Option<Array2<f64>>, String> {
4420        expect_block_idx_zero(block_idx, "spatial adaptive exact family", "")?;
4421        self.exact_newton_joint_hessian_directional_derivative(block_states, d_beta)
4422    }
4423
4424    fn exact_newton_joint_hessian_directional_derivative(
4425        &self,
4426        block_states: &[ParameterBlockState],
4427        d_beta_flat: &Array1<f64>,
4428    ) -> Result<Option<Array2<f64>>, String> {
4429        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4430        if d_beta_flat.len() != beta.len() {
4431            return Err(SmoothError::dimension_mismatch(format!(
4432                "spatial adaptive exact family direction length mismatch: got {}, expected {}",
4433                d_beta_flat.len(),
4434                beta.len()
4435            ))
4436            .into());
4437        }
4438        let eval = self.exact_evaluation(beta)?;
4439        Ok(Some(
4440            self.exacthessian_directional_derivative_from_evaluation(beta, &eval, d_beta_flat)?,
4441        ))
4442    }
4443
4444    fn exact_newton_joint_hessiansecond_directional_derivative(
4445        &self,
4446        block_states: &[ParameterBlockState],
4447        d_beta_u_flat: &Array1<f64>,
4448        d_betav_flat: &Array1<f64>,
4449    ) -> Result<Option<Array2<f64>>, String> {
4450        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4451        if d_beta_u_flat.len() != beta.len() || d_betav_flat.len() != beta.len() {
4452            return Err(SmoothError::dimension_mismatch(format!(
4453                "spatial adaptive exact family second-direction length mismatch: got ({}, {}), expected {}",
4454                d_beta_u_flat.len(),
4455                d_betav_flat.len(),
4456                beta.len()
4457            ))
4458            .into());
4459        }
4460        let eval = self.exact_evaluation(beta)?;
4461        self.exacthessian_second_directional_derivative_from_evaluation(
4462            &eval,
4463            d_beta_u_flat,
4464            d_betav_flat,
4465        )
4466    }
4467
4468    fn block_linear_constraints(
4469        &self,
4470        block_states: &[ParameterBlockState],
4471        block_idx: usize,
4472        block_spec: &ParameterBlockSpec,
4473    ) -> Result<Option<LinearInequalityConstraints>, String> {
4474        assert!(!block_states.is_empty(), "block_states must be non-empty");
4475        assert!(
4476            !block_spec.name.is_empty(),
4477            "block spec name must be non-empty",
4478        );
4479        expect_block_idx_zero(block_idx, "spatial adaptive exact family", "")?;
4480        Ok(self.linear_constraints.clone())
4481    }
4482
4483    fn exact_newton_joint_psi_terms(
4484        &self,
4485        block_states: &[ParameterBlockState],
4486        specs: &[ParameterBlockSpec],
4487        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4488        psi_index: usize,
4489    ) -> Result<Option<ExactNewtonJointPsiTerms>, String> {
4490        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4491            return Err(SmoothError::dimension_mismatch(format!(
4492                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4493                block_states.len(),
4494                specs.len(),
4495                derivative_blocks.len()
4496            ))
4497            .into());
4498        }
4499        derivative_blocks[0]
4500            .get(psi_index)
4501            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4502        let hyper = self
4503            .hyperspecs
4504            .get(psi_index)
4505            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4506        let beta = &block_states[0].beta;
4507        let eval = self.exact_evaluation(beta)?;
4508        let (direct, beta_mixed, betahessian_explicit) =
4509            self.adaptive_hyper_parts(&eval, *hyper)?;
4510
4511        // Exact pseudo-Laplace psi-gradient.
4512        //
4513        // For one hyperparameter coordinate a we use the exact formula
4514        //
4515        //   d/da L_tilde
4516        //   = J_a + 0.5 tr(H^{-1} Hdot_a),
4517        //
4518        // with
4519        //
4520        //   H u_a   = J_{beta,a},
4521        //   beta_a  = -u_a,
4522        //   Hdot_a  = J_{beta,beta,a} + D_beta(H)[beta_a]
4523        //           = J_{beta,beta,a} - D_beta(H)[u_a].
4524        //
4525        // Here:
4526        //   - `direct` is J_a,
4527        //   - `beta_mixed` is J_{beta,a},
4528        //   - `betahessian_explicit` is J_{beta,beta,a},
4529        //   - `exacthessian_directional_derivative_from_evaluation(..., u)` returns
4530        //     D_beta(H)[u] for the exact likelihood-plus-Charbonnier model.
4531        Ok(Some(ExactNewtonJointPsiTerms {
4532            objective_psi: direct,
4533            score_psi: beta_mixed,
4534            hessian_psi: betahessian_explicit,
4535            hessian_psi_operator: None,
4536        }))
4537    }
4538
4539    fn exact_newton_joint_psisecond_order_terms(
4540        &self,
4541        block_states: &[ParameterBlockState],
4542        specs: &[ParameterBlockSpec],
4543        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4544        psi_i: usize,
4545        psi_j: usize,
4546    ) -> Result<Option<gam_problem::ExactNewtonJointPsiSecondOrderTerms>, String> {
4547        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4548            return Err(SmoothError::dimension_mismatch(format!(
4549                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4550                block_states.len(),
4551                specs.len(),
4552                derivative_blocks.len()
4553            ))
4554            .into());
4555        }
4556        derivative_blocks[0]
4557            .get(psi_i)
4558            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_i))?;
4559        derivative_blocks[0]
4560            .get(psi_j)
4561            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_j))?;
4562        let hyper_i = self
4563            .hyperspecs
4564            .get(psi_i)
4565            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_i))?;
4566        let hyper_j = self
4567            .hyperspecs
4568            .get(psi_j)
4569            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_j))?;
4570        let beta = &block_states[0].beta;
4571        let eval = self.exact_evaluation(beta)?;
4572        let (objective_psi_psi, score_psi_psi, hessian_psi_psi) =
4573            self.adaptive_explicit_second_order_parts(&eval, *hyper_i, *hyper_j)?;
4574
4575        Ok(Some(
4576            gam_problem::ExactNewtonJointPsiSecondOrderTerms {
4577                objective_psi_psi,
4578                score_psi_psi,
4579                hessian_psi_psi,
4580                hessian_psi_psi_operator: None,
4581            },
4582        ))
4583    }
4584
4585    fn exact_newton_joint_psihessian_directional_derivative(
4586        &self,
4587        block_states: &[ParameterBlockState],
4588        specs: &[ParameterBlockSpec],
4589        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4590        psi_index: usize,
4591        direction: &Array1<f64>,
4592    ) -> Result<Option<Array2<f64>>, String> {
4593        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4594            return Err(SmoothError::dimension_mismatch(format!(
4595                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4596                block_states.len(),
4597                specs.len(),
4598                derivative_blocks.len()
4599            ))
4600            .into());
4601        }
4602        let beta = &block_states[0].beta;
4603        if direction.len() != beta.len() {
4604            return Err(SmoothError::dimension_mismatch(format!(
4605                "spatial adaptive exact family direction length mismatch: got {}, expected {}",
4606                direction.len(),
4607                beta.len()
4608            ))
4609            .into());
4610        }
4611        derivative_blocks[0]
4612            .get(psi_index)
4613            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4614        let hyper = self
4615            .hyperspecs
4616            .get(psi_index)
4617            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4618        let eval = self.exact_evaluation(beta)?;
4619        let drift = match hyper.kind {
4620            SpatialAdaptiveHyperKind::LogLambdaMagnitude
4621            | SpatialAdaptiveHyperKind::LogLambdaGradient
4622            | SpatialAdaptiveHyperKind::LogLambdaCurvature => self.adaptive_block_drift_eval(
4623                &eval,
4624                hyper.cache_index,
4625                AdaptiveComponent::from_index(hyper.kind.component_index())?,
4626                HyperDriftKind::Rho,
4627                direction,
4628            )?,
4629            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
4630            | SpatialAdaptiveHyperKind::LogEpsilonGradient
4631            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => self
4632                .adaptive_shared_log_epsilon_drift(
4633                    &eval,
4634                    hyper.kind.component_index(),
4635                    direction,
4636                )?,
4637        };
4638        Ok(Some(drift))
4639    }
4640}
4641
4642fn expect_single_block_state<'a>(
4643    block_states: &'a [ParameterBlockState],
4644    family_name: &str,
4645) -> Result<&'a ParameterBlockState, String> {
4646    crate::block_layout::block_count::validate_block_count::<SmoothError>(
4647        family_name,
4648        1,
4649        block_states.len(),
4650    )?;
4651    Ok(&block_states[0])
4652}
4653
4654fn expect_block_idx_zero(block_idx: usize, family_name: &str, context: &str) -> Result<(), String> {
4655    if block_idx != 0 {
4656        return Err(SmoothError::invalid_index(format!(
4657            "{family_name} expects block_idx 0{context}, got {block_idx}"
4658        ))
4659        .into());
4660    }
4661    Ok::<(), _>(())
4662}
4663
4664impl BoundedLinearFamily {
4665    fn bounded_term_derivative_data(
4666        &self,
4667        latent_beta: &Array1<f64>,
4668    ) -> (
4669        Array1<f64>,
4670        Array1<f64>,
4671        Array1<f64>,
4672        Array1<f64>,
4673        Array1<f64>,
4674    ) {
4675        let p = latent_beta.len();
4676        let mut beta_user = latent_beta.clone();
4677        let mut jac_diag = Array1::<f64>::ones(p);
4678        let mut second_diag = Array1::<f64>::zeros(p);
4679        let mut third_diag = Array1::<f64>::zeros(p);
4680        let mut priorthird = Array1::<f64>::zeros(p);
4681        for term in &self.bounded_terms {
4682            let (beta, _, db_dtheta, d2b_dtheta2, d3b_dtheta3) =
4683                bounded_latent_derivatives(latent_beta[term.col_idx], term.min, term.max);
4684            beta_user[term.col_idx] = beta;
4685            jac_diag[term.col_idx] = db_dtheta;
4686            second_diag[term.col_idx] = d2b_dtheta2;
4687            third_diag[term.col_idx] = d3b_dtheta3;
4688            let (_, _, _, prior_neghess_derivative) =
4689                bounded_prior_terms(latent_beta[term.col_idx], &term.prior);
4690            priorthird[term.col_idx] = prior_neghess_derivative;
4691        }
4692        (beta_user, jac_diag, second_diag, third_diag, priorthird)
4693    }
4694
4695    fn user_beta_and_jacobian(&self, latent_beta: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
4696        let (beta_user, jac_diag, _, _, _) = self.bounded_term_derivative_data(latent_beta);
4697        (beta_user, jac_diag)
4698    }
4699
4700    fn nonlinear_offset_from_latent(&self, latent_beta: &Array1<f64>) -> Array1<f64> {
4701        let mut offset = self.offset.clone();
4702        for term in &self.bounded_terms {
4703            let (beta, _, _) =
4704                bounded_latent_to_user(latent_beta[term.col_idx], term.min, term.max);
4705            offset.scaled_add(beta, &self.design.column(term.col_idx));
4706        }
4707        offset
4708    }
4709
4710    fn effective_design_for_latent(&self, jac_diag: &Array1<f64>) -> Array2<f64> {
4711        let mut x_eff = self.design.clone();
4712        for term in &self.bounded_terms {
4713            x_eff
4714                .column_mut(term.col_idx)
4715                .mapv_inplace(|v| v * jac_diag[term.col_idx]);
4716        }
4717        x_eff
4718    }
4719
4720    fn exacthessian_andgradient(
4721        &self,
4722        latent_beta: &Array1<f64>,
4723    ) -> Result<
4724        (
4725            StandardFamilyObservationState,
4726            Array2<f64>,
4727            Array1<f64>,
4728            f64,
4729            Array1<f64>,
4730            Array1<f64>,
4731            Array1<f64>,
4732        ),
4733        String,
4734    > {
4735        let (_, jac_diag, second_diag, third_diag, priorthird) =
4736            self.bounded_term_derivative_data(latent_beta);
4737        let x_eff = self.effective_design_for_latent(&jac_diag);
4738        let eta =
4739            self.designzeroed.dot(latent_beta) + self.nonlinear_offset_from_latent(latent_beta);
4740        let obs = evaluate_standard_familyobservations(
4741            self.family.clone(),
4742            self.latent_cloglog_state.as_ref(),
4743            self.mixture_link_state.as_ref(),
4744            self.sas_link_state.as_ref(),
4745            &self.y,
4746            &self.weights,
4747            &eta,
4748        )
4749        .map_err(|e| e.to_string())?;
4750
4751        let mut priorgrad = Array1::<f64>::zeros(latent_beta.len());
4752        let mut prior_neghess = Array2::<f64>::zeros((latent_beta.len(), latent_beta.len()));
4753        let mut prior_loglik = 0.0;
4754        for term in &self.bounded_terms {
4755            let (logp, grad, neghess, _) =
4756                bounded_prior_terms(latent_beta[term.col_idx], &term.prior);
4757            prior_loglik += logp;
4758            priorgrad[term.col_idx] += grad;
4759            prior_neghess[[term.col_idx, term.col_idx]] += neghess;
4760        }
4761
4762        let mut hessian = xt_diag_x_dense(x_eff.view(), obs.neghessian_eta.view())?;
4763        let mut gradient = fast_atv(&x_eff, &obs.score);
4764        for term in &self.bounded_terms {
4765            let score_beta = self.design.column(term.col_idx).dot(&obs.score);
4766            hessian[[term.col_idx, term.col_idx]] -= score_beta * second_diag[term.col_idx];
4767        }
4768        hessian += &prior_neghess;
4769        gradient += &priorgrad;
4770
4771        Ok((
4772            obs,
4773            hessian,
4774            gradient,
4775            prior_loglik,
4776            second_diag,
4777            third_diag,
4778            priorthird,
4779        ))
4780    }
4781
4782    fn evaluation_from_latent(
4783        &self,
4784        latent_beta: &Array1<f64>,
4785    ) -> Result<
4786        (
4787            StandardFamilyObservationState,
4788            Array2<f64>,
4789            Array1<f64>,
4790            f64,
4791        ),
4792        String,
4793    > {
4794        let (obs, hessian, gradient, prior_loglik, _, _, _) =
4795            self.exacthessian_andgradient(latent_beta)?;
4796        Ok((obs, hessian, gradient, prior_loglik))
4797    }
4798}
4799
4800impl CustomFamily for BoundedLinearFamily {
4801    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
4802    // flat-prior exact-Newton objective carries no Jeffreys term), so families
4803    // that historically armed the term by default opt back in explicitly.
4804    fn joint_jeffreys_term_required(&self) -> bool {
4805        true
4806    }
4807
4808    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
4809        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4810        let (obs, hessian, gradient, prior_loglik) = self.evaluation_from_latent(latent_beta)?;
4811        Ok(FamilyEvaluation {
4812            log_likelihood: obs.log_likelihood + prior_loglik,
4813            blockworking_sets: vec![BlockWorkingSet::ExactNewton {
4814                gradient,
4815                hessian: SymmetricMatrix::Dense(hessian),
4816            }],
4817        })
4818    }
4819
4820    fn exact_newton_joint_hessian(
4821        &self,
4822        block_states: &[ParameterBlockState],
4823    ) -> Result<Option<Array2<f64>>, String> {
4824        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4825        let (_, hessian, _, _) = self.evaluation_from_latent(latent_beta)?;
4826        Ok(Some(hessian))
4827    }
4828
4829    fn exact_newton_hessian_directional_derivative(
4830        &self,
4831        block_states: &[ParameterBlockState],
4832        block_idx: usize,
4833        d_beta: &Array1<f64>,
4834    ) -> Result<Option<Array2<f64>>, String> {
4835        expect_block_idx_zero(block_idx, "bounded linear family", "")?;
4836        self.exact_newton_joint_hessian_directional_derivative(block_states, d_beta)
4837    }
4838
4839    fn exact_newton_joint_hessian_directional_derivative(
4840        &self,
4841        block_states: &[ParameterBlockState],
4842        d_beta_flat: &Array1<f64>,
4843    ) -> Result<Option<Array2<f64>>, String> {
4844        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4845        if d_beta_flat.len() != latent_beta.len() {
4846            return Err(SmoothError::dimension_mismatch(format!(
4847                "bounded linear family directional derivative length mismatch: got {}, expected {}",
4848                d_beta_flat.len(),
4849                latent_beta.len()
4850            ))
4851            .into());
4852        }
4853
4854        let (obs, _, _, _, second_diag, third_diag, priorthird) =
4855            self.exacthessian_andgradient(latent_beta)?;
4856
4857        let (_, jac_diag, _, _, _) = self.bounded_term_derivative_data(latent_beta);
4858        let x_eff = self.effective_design_for_latent(&jac_diag);
4859        let deta = x_eff.dot(d_beta_flat);
4860        let d_neghess_eta = &obs.neghessian_eta_derivative * &deta;
4861
4862        let mut dx_eff = Array2::<f64>::zeros(x_eff.raw_dim());
4863        for term in &self.bounded_terms {
4864            let scale = second_diag[term.col_idx] * d_beta_flat[term.col_idx];
4865            if scale != 0.0 {
4866                let mut col = dx_eff.column_mut(term.col_idx);
4867                col.assign(&self.design.column(term.col_idx));
4868                col.mapv_inplace(|v| v * scale);
4869            }
4870        }
4871
4872        let mut dhessian = xt_diag_x_dense(x_eff.view(), d_neghess_eta.view())?;
4873        let mut wxdx = Array2::<f64>::zeros((x_eff.ncols(), x_eff.ncols()));
4874        for i in 0..x_eff.nrows() {
4875            let wi = obs.neghessian_eta[i];
4876            if wi == 0.0 {
4877                continue;
4878            }
4879            for a in 0..x_eff.ncols() {
4880                let xa = x_eff[[i, a]];
4881                for b in 0..x_eff.ncols() {
4882                    wxdx[[a, b]] += wi * (dx_eff[[i, a]] * x_eff[[i, b]] + xa * dx_eff[[i, b]]);
4883                }
4884            }
4885        }
4886        dhessian += &wxdx;
4887
4888        let d_score = -&obs.neghessian_eta * &deta;
4889        for term in &self.bounded_terms {
4890            let score_beta = self.design.column(term.col_idx).dot(&obs.score);
4891            let d_score_beta = self.design.column(term.col_idx).dot(&d_score);
4892            dhessian[[term.col_idx, term.col_idx]] -= d_score_beta * second_diag[term.col_idx]
4893                + score_beta * third_diag[term.col_idx] * d_beta_flat[term.col_idx];
4894            dhessian[[term.col_idx, term.col_idx]] +=
4895                priorthird[term.col_idx] * d_beta_flat[term.col_idx];
4896        }
4897
4898        Ok(Some(dhessian))
4899    }
4900
4901    fn block_geometry(
4902        &self,
4903        block_states: &[ParameterBlockState],
4904        spec: &ParameterBlockSpec,
4905    ) -> Result<(DesignMatrix, Array1<f64>), String> {
4906        if block_states.is_empty() {
4907            return Ok((
4908                DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(
4909                    self.designzeroed.clone(),
4910                )),
4911                self.offset.clone(),
4912            ));
4913        }
4914        let offset = self.nonlinear_offset_from_latent(
4915            &expect_single_block_state(block_states, "bounded linear family")?.beta,
4916        );
4917        let x = if spec.design.ncols() == self.designzeroed.ncols() {
4918            self.designzeroed.clone()
4919        } else {
4920            return Err(SmoothError::dimension_mismatch(
4921                "bounded linear family design column mismatch",
4922            )
4923            .into());
4924        };
4925        Ok((
4926            DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(x)),
4927            offset,
4928        ))
4929    }
4930
4931    fn block_geometry_is_dynamic(&self) -> bool {
4932        true
4933    }
4934
4935    fn block_geometry_directional_derivative(
4936        &self,
4937        block_states: &[ParameterBlockState],
4938        block_idx: usize,
4939        spec: &ParameterBlockSpec,
4940        d_beta: &Array1<f64>,
4941    ) -> Result<Option<BlockGeometryDirectionalDerivative>, String> {
4942        expect_block_idx_zero(
4943            block_idx,
4944            "bounded linear family",
4945            " for geometry derivative",
4946        )?;
4947        expect_single_block_state(block_states, "bounded linear family")?;
4948        if d_beta.len() != spec.design.ncols() {
4949            return Err(SmoothError::dimension_mismatch(format!(
4950                "bounded linear family geometry derivative direction mismatch: got {}, expected {}",
4951                d_beta.len(),
4952                spec.design.ncols()
4953            ))
4954            .into());
4955        }
4956        let (_, jac_diag, _, _, _) = self.bounded_term_derivative_data(&block_states[0].beta);
4957        let mut d_offset = Array1::<f64>::zeros(self.offset.len());
4958        let has_drift = self
4959            .bounded_terms
4960            .iter()
4961            .any(|term| jac_diag[term.col_idx] != 0.0 && d_beta[term.col_idx] != 0.0);
4962        if !has_drift {
4963            return Ok(Some(BlockGeometryDirectionalDerivative {
4964                d_design: None,
4965                d_offset,
4966            }));
4967        }
4968        for term in &self.bounded_terms {
4969            let col = term.col_idx;
4970            let drift = jac_diag[col] * d_beta[col];
4971            if drift != 0.0 {
4972                d_offset.scaled_add(drift, &self.design.column(col));
4973            }
4974        }
4975        Ok(Some(BlockGeometryDirectionalDerivative {
4976            d_design: None,
4977            d_offset,
4978        }))
4979    }
4980}
4981
4982#[inline]
4983fn dense_diag_gram_chunkrows(p: usize) -> usize {
4984    const MIN_ROWS: usize = 512;
4985    const MAX_ROWS: usize = 2048;
4986    const TARGET_BYTES: usize = 2 * 1024 * 1024;
4987    let bytes_per_row = p.max(1) * std::mem::size_of::<f64>();
4988    (TARGET_BYTES / bytes_per_row).clamp(MIN_ROWS, MAX_ROWS)
4989}
4990
4991fn xt_diag_x_dense(x: ArrayView2<'_, f64>, w: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
4992    if x.nrows() != w.len() {
4993        return Err(SmoothError::dimension_mismatch("xt_diag_x_dense row mismatch").into());
4994    }
4995    let (n, p) = x.dim();
4996    if n == 0 || p == 0 {
4997        return Ok(Array2::<f64>::zeros((p, p)));
4998    }
4999
5000    const STREAMING_BYTES_THRESHOLD: usize = 8 * 1024 * 1024;
5001    let dense_work_bytes = n
5002        .checked_mul(p)
5003        .and_then(|cells| cells.checked_mul(std::mem::size_of::<f64>()))
5004        .unwrap_or(usize::MAX);
5005    if dense_work_bytes <= STREAMING_BYTES_THRESHOLD {
5006        let mut weighted = x.to_owned();
5007        ndarray::Zip::from(weighted.rows_mut())
5008            .and(w)
5009            .par_for_each(|mut row, wi| row *= *wi);
5010        return Ok(fast_atb(&x, &weighted));
5011    }
5012
5013    let chunkrows = dense_diag_gram_chunkrows(p).min(n);
5014    let mut weighted_chunk = Array2::<f64>::zeros((chunkrows, p));
5015    let mut out = Array2::<f64>::zeros((p, p));
5016    for row_start in (0..n).step_by(chunkrows) {
5017        let rows = (n - row_start).min(chunkrows);
5018        let x_chunk = x.slice(s![row_start..row_start + rows, ..]);
5019        {
5020            let mut chunk = weighted_chunk.slice_mut(s![0..rows, ..]);
5021            for local_row in 0..rows {
5022                let scale = w[row_start + local_row];
5023                if scale == 0.0 {
5024                    chunk.row_mut(local_row).fill(0.0);
5025                    continue;
5026                }
5027                for col in 0..p {
5028                    chunk[[local_row, col]] = x_chunk[[local_row, col]] * scale;
5029                }
5030            }
5031        }
5032        out += &fast_atb(&x_chunk, &weighted_chunk.slice(s![0..rows, ..]));
5033    }
5034    Ok(out)
5035}
5036
5037fn trace_of_dense_product(a: &Array2<f64>, b: &Array2<f64>) -> Result<f64, String> {
5038    if a.nrows() != a.ncols() || b.nrows() != b.ncols() || a.nrows() != b.nrows() {
5039        return Err(
5040            SmoothError::dimension_mismatch("trace_of_dense_product dimension mismatch").into(),
5041        );
5042    }
5043    let mut trace = 0.0;
5044    for i in 0..a.nrows() {
5045        for j in 0..a.ncols() {
5046            trace += a[[i, j]] * b[[j, i]];
5047        }
5048    }
5049    Ok(trace)
5050}
5051
5052fn exact_bounded_edf(
5053    penalties: &[PenaltySpec],
5054    lambdas: &Array1<f64>,
5055    latent_cov: &Array2<f64>,
5056) -> Result<(Vec<f64>, Vec<f64>, f64), EstimationError> {
5057    if penalties.len() != lambdas.len() {
5058        crate::bail_invalid_estim!(
5059            "bounded EDF penalty/lambda mismatch: {} penalties vs {} lambdas",
5060            penalties.len(),
5061            lambdas.len()
5062        );
5063    }
5064    if latent_cov.nrows() != latent_cov.ncols() {
5065        crate::bail_invalid_estim!("bounded EDF covariance must be square");
5066    }
5067
5068    let p = latent_cov.nrows();
5069    let mut s_lambda = Array2::<f64>::zeros((p, p));
5070    let mut edf_by_block = Vec::with_capacity(penalties.len());
5071    // Raw per-block penalty trace tr_kk = λ_kk·tr(H⁻¹S_kk) (issue #1219).
5072    let mut penalty_block_trace = Vec::with_capacity(penalties.len());
5073    let mut trace_sum = 0.0;
5074
5075    for (k, ps) in penalties.iter().enumerate() {
5076        let lambda_k = lambdas[k];
5077        match ps {
5078            PenaltySpec::Block {
5079                local, col_range, ..
5080            } => {
5081                s_lambda
5082                    .slice_mut(ndarray::s![col_range.clone(), col_range.clone()])
5083                    .scaled_add(lambda_k, local);
5084                // Compute penalty rank from the block-local matrix directly.
5085                let penalty_rank =
5086                    local
5087                        .nrows()
5088                        .saturating_sub(estimate_penalty_nullity(local).map_err(|e| {
5089                            EstimationError::InvalidInput(format!("bounded EDF rank failed: {e}"))
5090                        })?);
5091                // Trace only involves the block slice of latent_cov.
5092                let cov_block = latent_cov.slice(ndarray::s![col_range.clone(), col_range.clone()]);
5093                let trace_k = lambda_k
5094                    * trace_of_dense_product(&cov_block.to_owned(), local)
5095                        .map_err(EstimationError::InvalidInput)?;
5096                trace_sum += trace_k;
5097                penalty_block_trace.push(trace_k);
5098                let p_k = penalty_rank as f64;
5099                edf_by_block.push((p_k - trace_k).clamp(0.0, p_k));
5100            }
5101            PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5102                s_lambda.scaled_add(lambda_k, m);
5103                let penalty_rank = p.saturating_sub(estimate_penalty_nullity(m).map_err(|e| {
5104                    EstimationError::InvalidInput(format!("bounded EDF rank failed: {e}"))
5105                })?);
5106                let trace_k = lambda_k
5107                    * trace_of_dense_product(latent_cov, m)
5108                        .map_err(EstimationError::InvalidInput)?;
5109                trace_sum += trace_k;
5110                penalty_block_trace.push(trace_k);
5111                let p_k = penalty_rank as f64;
5112                edf_by_block.push((p_k - trace_k).clamp(0.0, p_k));
5113            }
5114        }
5115    }
5116
5117    let nullity_total = estimate_penalty_nullity(&s_lambda)
5118        .map_err(|e| EstimationError::InvalidInput(format!("bounded EDF nullity failed: {e}")))?
5119        as f64;
5120    let edf_total = (p as f64 - trace_sum).clamp(nullity_total, p as f64);
5121    Ok((edf_by_block, penalty_block_trace, edf_total))
5122}
5123
5124/// Symmetric posterior-precision inverse for the bounded-coefficient path.
5125///
5126/// The penalised Hessian at a strict posterior maximum is SPD, so its inverse
5127/// is the posterior covariance. We eigendecompose the symmetric precision and
5128/// invert the positive-eigenvalue subspace, projecting out the (rare)
5129/// structural null directions a penalised model leaves flat rather than
5130/// δ-ridging them — the same honest pseudo-inverse contract the strict
5131/// pseudo-Laplace covariance uses (gam#748). A genuinely indefinite precision
5132/// (a negative eigenvalue beyond rounding) means the reported mode is not a
5133/// posterior maximum and is surfaced as a fit-quality error rather than
5134/// masked.
5135fn symmetric_positive_definite_inverse_or_pseudo(
5136    precision: &Array2<f64>,
5137) -> Result<Array2<f64>, EstimationError> {
5138    use gam_linalg::faer_ndarray::FaerEigh;
5139    let p = precision.nrows();
5140    if precision.ncols() != p {
5141        crate::bail_invalid_estim!(
5142            "posterior precision inverse requires a square matrix, got {}x{}",
5143            precision.nrows(),
5144            precision.ncols()
5145        );
5146    }
5147    if p == 0 {
5148        return Ok(Array2::<f64>::zeros((0, 0)));
5149    }
5150    let symmetric = (precision + &precision.t().to_owned()) * 0.5;
5151    let (evals, evecs) = symmetric.eigh(faer::Side::Lower).map_err(|e| {
5152        EstimationError::InvalidInput(format!(
5153            "posterior precision eigendecomposition failed: {e}"
5154        ))
5155    })?;
5156    let max_abs_eval = evals.iter().fold(0.0_f64, |acc, &ev| acc.max(ev.abs()));
5157    let tol =
5158        (10.0 * f64::EPSILON * (p as f64) * (p as f64) * max_abs_eval).max(100.0 * f64::EPSILON);
5159    if let Some(&min_eval) = evals
5160        .iter()
5161        .filter(|&&ev| ev < -tol)
5162        .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
5163    {
5164        crate::bail_invalid_estim!(
5165            "bounded posterior precision is non-PD at the converged optimum (min eigenvalue \
5166             {min_eval:.6e} < -tol={tol:.6e}); the reported mode is not a strict posterior \
5167             maximum, so a covariance would be meaningless"
5168        );
5169    }
5170    // Σ = U diag(1/λ_+) Uᵀ over the positive-eigenvalue subspace.
5171    let mut scaled = evecs.clone();
5172    for (j, &ev) in evals.iter().enumerate() {
5173        let inv = if ev > tol { 1.0 / ev } else { 0.0 };
5174        scaled.column_mut(j).mapv_inplace(|v| v * inv);
5175    }
5176    let cov = scaled.dot(&evecs.t());
5177    Ok((&cov + &cov.t().to_owned()) * 0.5)
5178}
5179
5180fn transform_bounded_latent_precision_to_user_internal(
5181    latent_precision: &Array2<f64>,
5182    jac_diag: &Array1<f64>,
5183) -> Result<Array2<f64>, EstimationError> {
5184    let p = latent_precision.nrows();
5185    if latent_precision.ncols() != p || jac_diag.len() != p {
5186        crate::bail_invalid_estim!(
5187            "bounded precision transform dimension mismatch: precision is {}x{}, jacobian has {} entries",
5188            latent_precision.nrows(),
5189            latent_precision.ncols(),
5190            jac_diag.len()
5191        );
5192    }
5193    let mut out = latent_precision.clone();
5194    for i in 0..p {
5195        let scale = jac_diag[i];
5196        if !scale.is_finite() || scale <= 0.0 {
5197            crate::bail_invalid_estim!(
5198                "bounded precision transform requires a positive finite coefficient jacobian; column {i} has {scale}"
5199            );
5200        }
5201        if scale != 1.0 {
5202            out.row_mut(i).mapv_inplace(|v| v / scale);
5203            out.column_mut(i).mapv_inplace(|v| v / scale);
5204        }
5205    }
5206    Ok(out)
5207}
5208
5209fn fit_bounded_term_collection_with_design(
5210    y: ArrayView1<'_, f64>,
5211    weights: ArrayView1<'_, f64>,
5212    offset: ArrayView1<'_, f64>,
5213    spec: &TermCollectionSpec,
5214    design: &TermCollectionDesign,
5215    heuristic_lambdas: Option<&[f64]>,
5216    family: LikelihoodSpec,
5217    options: &FitOptions,
5218) -> Result<FittedTermCollection, EstimationError> {
5219    let conditioning_cols: Vec<usize> = spec
5220        .linear_terms
5221        .iter()
5222        .enumerate()
5223        .filter_map(|(j, linear)| {
5224            (!linear.double_penalty).then_some(design.intercept_range.end + j)
5225        })
5226        .collect();
5227    let conditioning = LinearFitConditioning::from_columns(design, &conditioning_cols);
5228    let dense_design = design.design.to_dense_cow();
5229    let fit_design = conditioning.apply_to_design(&dense_design);
5230    let fit_penalties = conditioning
5231        .transform_blockwise_penalties_to_internal(&design.penalties, design.design.ncols());
5232    if design.linear_constraints.is_some() {
5233        crate::bail_invalid_estim!(
5234            "bounded() terms are not yet compatible with explicit linear constraints"
5235        );
5236    }
5237    let mut bounded_terms = Vec::<BoundedLinearTermMeta>::new();
5238    for (j, term) in spec.linear_terms.iter().enumerate() {
5239        if term.double_penalty
5240            && matches!(
5241                term.coefficient_geometry,
5242                LinearCoefficientGeometry::Bounded { .. }
5243            )
5244        {
5245            crate::bail_invalid_estim!(
5246                "bounded linear term '{}' cannot also use double_penalty",
5247                term.name
5248            );
5249        }
5250        if let LinearCoefficientGeometry::Bounded { min, max, prior } =
5251            term.coefficient_geometry.clone()
5252        {
5253            let col_idx = design.intercept_range.end + j;
5254            let (min_internal, max_internal) = conditioning.internal_bounds_for(col_idx, min, max);
5255            bounded_terms.push(BoundedLinearTermMeta {
5256                col_idx,
5257                min: min_internal,
5258                max: max_internal,
5259                prior,
5260            });
5261        }
5262    }
5263    if bounded_terms.is_empty() {
5264        crate::bail_invalid_estim!("internal bounded fit path called with no bounded terms");
5265    }
5266
5267    let mut designzeroed = fit_design.clone();
5268    let mut initial_beta = Array1::<f64>::zeros(fit_design.ncols());
5269    for term in &bounded_terms {
5270        designzeroed.column_mut(term.col_idx).fill(0.0);
5271        initial_beta[term.col_idx] = bounded_logit(0.5);
5272    }
5273
5274    let initial_log_lambdas = heuristic_lambdas
5275        .map(|vals| Array1::from_vec(vals.to_vec()))
5276        .unwrap_or_else(|| Array1::zeros(fit_penalties.len()));
5277    if initial_log_lambdas.len() != fit_penalties.len() {
5278        crate::bail_invalid_estim!(
5279            "heuristic lambda length mismatch for bounded model: got {}, expected {}",
5280            initial_log_lambdas.len(),
5281            fit_penalties.len()
5282        );
5283    }
5284
5285    let is_beta_logistic = family.is_binomial_beta_logistic();
5286    let family_adapter = BoundedLinearFamily {
5287        family: family.clone(),
5288        latent_cloglog_state: options.latent_cloglog,
5289        mixture_link_state: options
5290            .mixture_link
5291            .clone()
5292            .as_ref()
5293            .map(state_fromspec)
5294            .transpose()
5295            .map_err(EstimationError::InvalidInput)?,
5296        sas_link_state: options
5297            .sas_link
5298            .map(|spec| {
5299                if is_beta_logistic {
5300                    state_from_beta_logisticspec(spec)
5301                } else {
5302                    state_from_sasspec(spec)
5303                }
5304            })
5305            .transpose()
5306            .map_err(EstimationError::InvalidInput)?,
5307        y: y.to_owned(),
5308        weights: weights.to_owned(),
5309        design: fit_design.clone(),
5310        designzeroed: designzeroed.clone(),
5311        offset: offset.to_owned(),
5312        bounded_terms: bounded_terms.clone(),
5313    };
5314    let blockspec = ParameterBlockSpec {
5315        name: "eta".to_string(),
5316        design: DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(designzeroed)),
5317        offset: offset.to_owned(),
5318        penalties: fit_penalties
5319            .iter()
5320            .map(|ps| match ps {
5321                PenaltySpec::Block {
5322                    local, col_range, ..
5323                } => PenaltyMatrix::Blockwise {
5324                    local: local.clone(),
5325                    col_range: col_range.clone(),
5326                    total_dim: design.design.ncols(),
5327                },
5328                PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5329                    PenaltyMatrix::Dense(m.clone())
5330                }
5331            })
5332            .collect(),
5333        nullspace_dims: design.nullspace_dims.clone(),
5334        initial_log_lambdas,
5335        initial_beta: Some(initial_beta),
5336        gauge_priority: 100,
5337        // Report the true β-dependent Jacobian (bounded columns scaled by
5338        // dβ/dθ) to the identifiability audit so it does not mistake the
5339        // deliberately-zeroed placeholder columns for a structural rank
5340        // deficiency. The inner solve still drives η through the family
5341        // adapter, so this does not affect the fit geometry.
5342        jacobian_callback: Some(Arc::new(BoundedEffectiveJacobian {
5343            design: fit_design.clone(),
5344            bounded_terms: bounded_terms.clone(),
5345        })),
5346        stacked_design: None,
5347        stacked_offset: None,
5348    };
5349    let fit = fit_custom_family(
5350        &family_adapter,
5351        &[blockspec],
5352        &BlockwiseFitOptions {
5353            inner_max_cycles: options.max_iter,
5354            inner_tol: options.tol,
5355            outer_max_iter: options.max_iter,
5356            outer_tol: options.tol,
5357            // The bounded path builds its own user-scale covariance below by
5358            // inverting the user-scale penalised Hessian (delta-method through
5359            // the bounded transform's Jacobian + the conditioning map), so it
5360            // does not consume the inner solver's optional canonical-space
5361            // `covariance_conditional`. Inverting the reported precision
5362            // directly guarantees `inv(penalized_hessian) == covariance` and
5363            // works on every bounded fit — including the common no-smoothing
5364            // path where the inner solve surfaces no covariance at all (the
5365            // gam#854 "bounded fit emits no user-scale covariance" symptom).
5366            compute_covariance: false,
5367            ..BlockwiseFitOptions::default()
5368        },
5369    )
5370    .map_err(EstimationError::CustomFamily)?;
5371
5372    let latent_beta = fit.block_states[0].beta.clone();
5373    let (beta_user_internal, jac_diag) = family_adapter.user_beta_and_jacobian(&latent_beta);
5374    let beta_user = conditioning.backtransform_beta(&beta_user_internal);
5375
5376    let (eta_state, h_data, _, _) = family_adapter
5377        .evaluation_from_latent(&latent_beta)
5378        .map_err(EstimationError::InvalidInput)?;
5379    let p_fit = fit_design.ncols();
5380    let mut s_lambda_internal = Array2::<f64>::zeros((p_fit, p_fit));
5381    for (k, penalty) in fit_penalties.iter().enumerate() {
5382        match penalty {
5383            PenaltySpec::Block {
5384                local, col_range, ..
5385            } => {
5386                s_lambda_internal
5387                    .slice_mut(ndarray::s![col_range.clone(), col_range.clone()])
5388                    .scaled_add(fit.lambdas[k], local);
5389            }
5390            PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5391                s_lambda_internal.scaled_add(fit.lambdas[k], m);
5392            }
5393        }
5394    }
5395    let mut latent_precision = h_data.clone();
5396    latent_precision += &s_lambda_internal;
5397    let user_precision_internal =
5398        transform_bounded_latent_precision_to_user_internal(&latent_precision, &jac_diag)?;
5399    let penalized_hessian =
5400        conditioning.transform_penalized_hessian_to_original(&user_precision_internal);
5401
5402    // User-scale posterior covariance via the delta method. The reported
5403    // geometry precision `penalized_hessian` is the user-scale penalized
5404    // Hessian `H_user = C⁻ᵀ J⁻¹ (H_latent + S_λ) J⁻¹ C⁻¹` (latent precision
5405    // pushed through the bounded transform's Jacobian `J = diag(dβ_user/dθ)`
5406    // and the conditioning map `C`). Its exact inverse `H_user⁻¹` is the
5407    // delta-method pushforward of the latent posterior precision-inverse
5408    // `(H_latent + S_λ)⁻¹` — but on the UNSCALED (unit-dispersion) scale. For a
5409    // free-dispersion family (profiled Gaussian) the reported coefficient
5410    // covariance is `Vb = φ̂ · H_user⁻¹` with `φ̂ = σ̂²`, so the unscaled inverse
5411    // below is multiplied by the dispersion scale `cov_scale` once `σ̂²` is
5412    // known (after the EDF, which sets the residual d.f.). For fixed-scale
5413    // families (Binomial, `φ ≡ 1`) `cov_scale == 1` and `Vb = H_user⁻¹`
5414    // unchanged. Skipping this scale was gam#1514: an interior, well-identified
5415    // Gaussian bounded slope reported an SE ≈ 1/√Σ(xᵢ−x̄)² instead of
5416    // σ̂/√Σ(xᵢ−x̄)², i.e. ~`1/σ̂` (≈20×) too wide.
5417    //
5418    // Inverting the same matrix the geometry reports keeps
5419    // `inv(penalized_hessian) == cov_scale⁻¹ · covariance` and removes the
5420    // dependency on the inner solver's optional, canonical-space
5421    // `covariance_conditional` (which is `None` whenever the bounded blockspec
5422    // carries no smoothing parameters — the no-rho fit path — leaving a bounded
5423    // fit with a populated precision but no user-scale covariance, the gam#854
5424    // symptom). The latent precision is SPD at a strict posterior maximum; on a
5425    // marginally-indefinite boundary Hessian we invert the positive-eigenvalue
5426    // subspace (the structural null space of a penalised model is a flat
5427    // posterior direction, not something to ridge away), matching the
5428    // strict-pseudo-Laplace covariance contract (gam#748).
5429    let beta_covariance_unscaled = if options.compute_inference {
5430        Some(symmetric_positive_definite_inverse_or_pseudo(
5431            &penalized_hessian,
5432        )?)
5433    } else {
5434        None
5435    };
5436    // EDF `p − Σ_k λ_k tr(H_latent⁻¹ S_k)` is computed in the *latent*
5437    // (untransformed) coordinate system the penalties `fit_penalties` live in,
5438    // so it needs the latent posterior covariance `(H_latent + S_λ)⁻¹`, not the
5439    // user-scale one. Invert the same latent precision that produced the
5440    // reported user precision so the two are an exact transform pair.
5441    let latent_cov = if options.compute_inference {
5442        Some(symmetric_positive_definite_inverse_or_pseudo(
5443            &latent_precision,
5444        )?)
5445    } else {
5446        None
5447    };
5448    let s_lambda_original = weighted_blockwise_penalty_sum(
5449        &design.penalties,
5450        fit.lambdas.as_slice().unwrap(),
5451        design.design.ncols(),
5452    );
5453    let penalty_term = beta_user.dot(&s_lambda_original.dot(&beta_user));
5454    let deviance = if family.is_gaussian_identity() {
5455        y.iter()
5456            .zip(eta_state.mu.iter())
5457            .zip(weights.iter())
5458            .map(|((&yy, &mu), &w)| w.max(0.0) * (yy - mu) * (yy - mu))
5459            .sum()
5460    } else {
5461        -2.0 * eta_state.log_likelihood
5462    };
5463    let (edf_by_block, penalty_block_trace, edf_total) = if let Some(cov) = latent_cov.as_ref() {
5464        exact_bounded_edf(&fit_penalties, &fit.lambdas, cov)?
5465    } else {
5466        (
5467            vec![0.0; fit_penalties.len()],
5468            vec![0.0; fit_penalties.len()],
5469            0.0,
5470        )
5471    };
5472
5473    // Dispersion. The bounded fit's working weight is scale-free for a profiled
5474    // Gaussian (`W = priorweights`), so the unscaled penalized Hessian carries
5475    // unit implicit dispersion and the reported coefficient covariance must be
5476    // restored to `Vb = σ̂²·H_user⁻¹` with the REML residual variance
5477    // `σ̂² = RSS/(n − edf_total)` — identical to the ordinary GAM path
5478    // (`solver/estimate/optimizer.rs`). Fixed-scale families (Binomial here,
5479    // `φ ≡ 1`) keep their full Fisher information in `W`, so `cov_scale == 1`
5480    // and the covariance is `H_user⁻¹` unscaled. The single source of truth for
5481    // the per-family scale is `GlmLikelihoodSpec::coefficient_covariance_scale`
5482    // / `dispersion_from_likelihood`, reused verbatim so the bounded path can
5483    // never drift from the standard contract (gam#1514).
5484    let glm_likelihood = gam_spec::GlmLikelihoodSpec::canonical(family.clone());
5485    let standard_deviation = if family.is_gaussian_identity() {
5486        let denom = if options.compute_inference {
5487            (y.len() as f64 - edf_total).max(1.0)
5488        } else {
5489            (y.len() as f64).max(1.0)
5490        };
5491        (deviance / denom).sqrt()
5492    } else {
5493        1.0
5494    };
5495    let cov_scale = glm_likelihood
5496        .coefficient_covariance_scale(standard_deviation * standard_deviation)
5497        .max(f64::MIN_POSITIVE);
5498    let dispersion = gam_solve::estimate::dispersion_from_likelihood(&glm_likelihood, standard_deviation);
5499    // Apply the dispersion scale to the unscaled inverse, producing the reported
5500    // `Vb = cov_scale · H_user⁻¹` and its diagonal standard errors. The stored
5501    // `penalized_hessian` stays UNSCALED (`H_user`) per the dispersion-ownership
5502    // contract in `inference::dispersion_cov`; the sampler re-applies `√cov_scale`
5503    // when it reconstructs the latent posterior (see `sample_standard_bounded`).
5504    let beta_covariance = beta_covariance_unscaled.map(|mut cov| {
5505        if cov_scale != 1.0 {
5506            cov.mapv_inplace(|v| v * cov_scale);
5507        }
5508        cov
5509    });
5510    let beta_standard_errors = beta_covariance
5511        .as_ref()
5512        .map(|cov| Array1::from_iter((0..cov.nrows()).map(|i| cov[[i, i]].max(0.0).sqrt())));
5513
5514    let geometry = Some(gam_solve::estimate::FitGeometry {
5515        penalized_hessian: penalized_hessian.clone().into(),
5516        working_weights: eta_state.fisherweight.clone(),
5517        working_response: {
5518            let mut working_response = eta_state.eta.clone();
5519            for i in 0..working_response.len() {
5520                let wi = eta_state.fisherweight[i].max(1e-12);
5521                working_response[i] += eta_state.score[i] / wi;
5522            }
5523            working_response
5524        },
5525    });
5526    let max_abs_eta = eta_state
5527        .eta
5528        .iter()
5529        .fold(0.0_f64, |acc, &v| acc.max(v.abs()));
5530    Ok(FittedTermCollection {
5531        fit: {
5532            let log_lambdas = fit.lambdas.mapv(|v| v.max(1e-300).ln());
5533            let inf = FitInference {
5534                edf_by_block,
5535                penalty_block_trace,
5536                edf_total,
5537                smoothing_correction: None,
5538                // Boundary adapter: `penalized_hessian` storage is now
5539                // `UnscaledPrecision`.
5540                penalized_hessian: penalized_hessian.clone().into(),
5541                working_weights: eta_state.fisherweight.clone(),
5542                working_response: {
5543                    let mut working_response = eta_state.eta.clone();
5544                    for i in 0..working_response.len() {
5545                        let wi = eta_state.fisherweight[i].max(1e-12);
5546                        working_response[i] += eta_state.score[i] / wi;
5547                    }
5548                    working_response
5549                },
5550                reparam_qs: None,
5551                dispersion,
5552                beta_covariance: beta_covariance
5553                    .clone()
5554                    .map(gam_problem::dispersion_cov::PhiScaledCovariance::from),
5555                beta_standard_errors,
5556                beta_covariance_corrected: None,
5557                beta_standard_errors_corrected: None,
5558                beta_covariance_frequentist: None,
5559                coefficient_influence: None,
5560                weighted_gram: None,
5561                bias_correction_beta: None,
5562            };
5563            let covariance_conditional = beta_covariance;
5564            let pirls_status_val = if fit.outer_converged {
5565                gam_solve::pirls::PirlsStatus::Converged
5566            } else {
5567                gam_solve::pirls::PirlsStatus::StalledAtValidMinimum
5568            };
5569            UnifiedFitResult::try_from_parts(UnifiedFitResultParts {
5570                blocks: vec![gam_solve::estimate::FittedBlock {
5571                    beta: beta_user.clone(),
5572                    role: gam_problem::BlockRole::Mean,
5573                    edf: edf_total,
5574                    lambdas: fit.lambdas.clone(),
5575                }],
5576                log_lambdas,
5577                lambdas: fit.lambdas,
5578                likelihood_scale: family.default_scale_metadata(),
5579                likelihood_family: Some(family),
5580                log_likelihood_normalization:
5581                    gam_spec::LogLikelihoodNormalization::UserProvided,
5582                log_likelihood: eta_state.log_likelihood,
5583                deviance,
5584                reml_score: fit.penalized_objective,
5585                stable_penalty_term: penalty_term,
5586                penalized_objective: fit.penalized_objective,
5587                used_device: false,
5588                outer_iterations: fit.outer_iterations,
5589                outer_converged: fit.outer_converged,
5590                outer_gradient_norm: fit.outer_gradient_norm,
5591                standard_deviation,
5592                covariance_conditional,
5593                covariance_corrected: None,
5594                inference: Some(inf),
5595                fitted_link: gam_solve::estimate::FittedLinkState::Standard(None),
5596                geometry,
5597                block_states: Vec::new(),
5598                pirls_status: pirls_status_val,
5599                max_abs_eta,
5600                constraint_kkt: None,
5601                artifacts: gam_solve::estimate::FitArtifacts {
5602                    pirls: None,
5603                    ..Default::default()
5604                },
5605                inner_cycles: 0,
5606            })?
5607        },
5608        design: design.clone(),
5609        adaptive_diagnostics: None,
5610    })
5611}
5612
5613fn enforce_term_constraint_feasibility(
5614    design: &TermCollectionDesign,
5615    fit: &UnifiedFitResult,
5616) -> Result<(), EstimationError> {
5617    // Geometric (per-row-scaled) tolerance, matching the public contract on
5618    // `ACTIVE_SET_PRIMAL_FEASIBILITY_TOL` and the diagnostic that
5619    // `compute_constraint_kkt_diagnostics` exposes via `fit.constraint_kkt`.
5620    // Lower-bound rows are unit-norm (a_i = e_i) so the scale-invariant and
5621    // raw checks coincide there. Linear-inequality rows generally are NOT
5622    // unit-norm — e.g. a B-spline endpoint-derivative clamp at k = 12 carries
5623    // ‖a_i‖ ≈ 38, so a 1e-6 raw residual is only 2.6e-8 in geometric units.
5624    // Holding this gate to raw 1e-7 while the in-solver acceptance gate
5625    // measures geometric 1e-8 is the inconsistency that made well-conditioned
5626    // clamped fits get rejected after they completed cleanly.
5627    /// Raw (unscaled) constraint-residual tolerance for the post-fit feasibility
5628    /// audit; kept loose enough to be consistent with the geometric in-solver
5629    /// acceptance gate on non-unit-norm linear-inequality rows (see comment).
5630    const CONSTRAINT_FEASIBILITY_RAW_TOL: f64 = 1e-7;
5631    let tol = CONSTRAINT_FEASIBILITY_RAW_TOL;
5632    let smooth_start = design
5633        .design
5634        .ncols()
5635        .saturating_sub(design.smooth.total_smooth_cols());
5636    let mut violations: Vec<String> = Vec::new();
5637    for term in &design.smooth.terms {
5638        let gr = (smooth_start + term.coeff_range.start)..(smooth_start + term.coeff_range.end);
5639        let beta_local = fit.beta.slice(s![gr.clone()]).to_owned();
5640        if let Some(lb) = term.lower_bounds_local.as_ref() {
5641            let mut worst = 0.0_f64;
5642            let mut worst_idx = 0usize;
5643            for i in 0..lb.len().min(beta_local.len()) {
5644                if lb[i].is_finite() {
5645                    let viol = (lb[i] - beta_local[i]).max(0.0);
5646                    if viol > worst {
5647                        worst = viol;
5648                        worst_idx = i;
5649                    }
5650                }
5651            }
5652            if worst > tol {
5653                violations.push(format!(
5654                    "term='{}' kind=lower-bound maxviolation={:.3e} coeff_index={}",
5655                    term.name, worst, worst_idx
5656                ));
5657            }
5658        }
5659        if let Some(lin) = term.linear_constraints_local.as_ref() {
5660            let mut worst = 0.0_f64;
5661            let mut worstrow = 0usize;
5662            for i in 0..lin.a.nrows() {
5663                let norm = lin.a.row(i).dot(&lin.a.row(i)).sqrt();
5664                let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
5665                let s = (lin.a.row(i).dot(&beta_local) - lin.b[i]) * inv;
5666                let viol = (-s).max(0.0);
5667                if viol > worst {
5668                    worst = viol;
5669                    worstrow = i;
5670                }
5671            }
5672            if worst > tol {
5673                violations.push(format!(
5674                    "term='{}' kind=linear-inequality maxviolation={:.3e} row={}",
5675                    term.name, worst, worstrow
5676                ));
5677            }
5678        }
5679    }
5680
5681    if !violations.is_empty() {
5682        let mut msg = format!(
5683            "constraint violation after fit ({} violating term constraints): {}",
5684            violations.len(),
5685            violations.join(" | ")
5686        );
5687        if let Some(kkt) = fit.constraint_kkt.as_ref() {
5688            msg.push_str(&format!(
5689                "; KKT[primal={:.3e}, dual={:.3e}, comp={:.3e}, stat={:.3e}]",
5690                kkt.primal_feasibility, kkt.dual_feasibility, kkt.complementarity, kkt.stationarity
5691            ));
5692        }
5693        return Err(EstimationError::ParameterConstraintViolation(msg));
5694    }
5695    Ok(())
5696}
5697
5698fn stratified_spatial_subsample(
5699    data: ArrayView2<'_, f64>,
5700    spec: &TermCollectionSpec,
5701    target_size: usize,
5702) -> Vec<usize> {
5703    use rand::SeedableRng;
5704    use rand::rngs::StdRng;
5705    use rand::seq::SliceRandom;
5706
5707    let n = data.nrows();
5708    if n <= target_size {
5709        return (0..n).collect();
5710    }
5711
5712    let spatial_cols: Option<Vec<usize>> =
5713        spec.smooth_terms.iter().find_map(|term| match &term.basis {
5714            SmoothBasisSpec::ThinPlate { feature_cols, .. }
5715            | SmoothBasisSpec::Matern { feature_cols, .. }
5716            | SmoothBasisSpec::Duchon { feature_cols, .. } => {
5717                if !feature_cols.is_empty() {
5718                    Some(feature_cols.clone())
5719                } else {
5720                    None
5721                }
5722            }
5723            _ => None,
5724        });
5725
5726    let cols = match spatial_cols {
5727        Some(c) if !c.is_empty() => c,
5728        _ => {
5729            let mut rng = StdRng::seed_from_u64(spatial_subsample_seed(data, &[], target_size));
5730            let mut indices: Vec<usize> = (0..n).collect();
5731            indices.shuffle(&mut rng);
5732            indices.truncate(target_size);
5733            indices.sort_unstable();
5734            return indices;
5735        }
5736    };
5737    let mut rng = StdRng::seed_from_u64(spatial_subsample_seed(data, &cols, target_size));
5738
5739    let d = cols.len();
5740    let mut mins = vec![f64::INFINITY; d];
5741    let mut maxs = vec![f64::NEG_INFINITY; d];
5742    for i in 0..n {
5743        for (ax, &col) in cols.iter().enumerate() {
5744            let v = data[[i, col]];
5745            if v < mins[ax] {
5746                mins[ax] = v;
5747            }
5748            if v > maxs[ax] {
5749                maxs[ax] = v;
5750            }
5751        }
5752    }
5753
5754    // Aim for roughly this many sampled points per stratification cell so each
5755    // occupied cell can contribute a representative draw without collapsing the
5756    // grid to one point per cell.
5757    const TARGET_POINTS_PER_CELL: usize = 5;
5758    let total_cells_target = (target_size / TARGET_POINTS_PER_CELL).max(1);
5759    let cells_per_axis = ((total_cells_target as f64).powf(1.0 / d as f64)).ceil() as usize;
5760    let cells_per_axis = cells_per_axis.max(1);
5761
5762    let mut cell_members: std::collections::HashMap<Vec<usize>, Vec<usize>> =
5763        std::collections::HashMap::new();
5764    for i in 0..n {
5765        let mut cell_key = Vec::with_capacity(d);
5766        for (ax, &col) in cols.iter().enumerate() {
5767            let range = maxs[ax] - mins[ax];
5768            let cell = if range <= 0.0 {
5769                0
5770            } else {
5771                let frac = (data[[i, col]] - mins[ax]) / range;
5772                (frac * cells_per_axis as f64).floor() as usize
5773            };
5774            cell_key.push(cell.min(cells_per_axis - 1));
5775        }
5776        cell_members.entry(cell_key).or_default().push(i);
5777    }
5778
5779    let mut selected: Vec<usize> = Vec::with_capacity(target_size);
5780    let mut remaining_budget = target_size;
5781    let mut remaining_population = n;
5782
5783    let mut cells: Vec<(Vec<usize>, Vec<usize>)> = cell_members.into_iter().collect();
5784    cells.sort_by(|a, b| a.0.cmp(&b.0));
5785
5786    for (_, members) in &mut cells {
5787        if remaining_budget == 0 {
5788            break;
5789        }
5790        let alloc = ((members.len() as f64 / remaining_population as f64) * remaining_budget as f64)
5791            .round() as usize;
5792        let alloc = alloc.max(1).min(members.len()).min(remaining_budget);
5793        members.shuffle(&mut rng);
5794        selected.extend_from_slice(&members[..alloc]);
5795        remaining_budget = remaining_budget.saturating_sub(alloc);
5796        remaining_population = remaining_population.saturating_sub(members.len());
5797    }
5798
5799    if selected.len() > target_size {
5800        selected.shuffle(&mut rng);
5801        selected.truncate(target_size);
5802    }
5803
5804    selected.sort_unstable();
5805    selected
5806}
5807
5808fn spatial_subsample_seed(
5809    data: ArrayView2<'_, f64>,
5810    spatial_cols: &[usize],
5811    target_size: usize,
5812) -> u64 {
5813    let mut state = 0x5350_4154_4941_4C53_u64;
5814    spatial_seed_mix(&mut state, data.nrows() as u64);
5815    spatial_seed_mix(&mut state, data.ncols() as u64);
5816    spatial_seed_mix(&mut state, target_size as u64);
5817    spatial_seed_mix(&mut state, spatial_cols.len() as u64);
5818    for &col in spatial_cols {
5819        spatial_seed_mix(&mut state, col as u64);
5820    }
5821
5822    if data.nrows() > 0 {
5823        let mid = data.nrows() / 2;
5824        let last = data.nrows() - 1;
5825        for &row in &[0usize, mid, last] {
5826            for &col in spatial_cols {
5827                let value = data[[row, col]];
5828                spatial_seed_mix(&mut state, value.to_bits());
5829            }
5830        }
5831    }
5832    state
5833}
5834
5835#[inline]
5836fn spatial_seed_mix(state: &mut u64, value: u64) {
5837    // Canonical SplitMix64 step over `value + state` (the step adds G itself),
5838    // then an extra rotate-multiply avalanche unique to the spatial seed mix.
5839    let mut s = value.wrapping_add(*state);
5840    let z = gam_linalg::utils::splitmix64(&mut s);
5841    *state ^= z;
5842    *state = (*state).rotate_left(27).wrapping_mul(0x3C79_AC49_2BA7_B653);
5843}
5844
5845fn sampled_rows(data: ArrayView2<'_, f64>, indices: &[usize]) -> Array2<f64> {
5846    let mut sampled = Array2::<f64>::zeros((indices.len(), data.ncols()));
5847    for (new_row, &orig_row) in indices.iter().enumerate() {
5848        sampled.row_mut(new_row).assign(&data.row(orig_row));
5849    }
5850    sampled
5851}
5852
5853fn spatial_term_user_centers(term: &SmoothTermSpec) -> Option<ArrayView2<'_, f64>> {
5854    match spatial_term_center_strategy(term) {
5855        Some(CenterStrategy::UserProvided(centers)) => Some(centers.view()),
5856        _ => None,
5857    }
5858}
5859
5860fn finite_centered_axis_contrasts(values: &[f64], expected_dim: usize) -> Option<Vec<f64>> {
5861    if values.len() != expected_dim || expected_dim <= 1 {
5862        return None;
5863    }
5864    if values.iter().any(|value| !value.is_finite()) {
5865        return None;
5866    }
5867    Some(center_aniso_log_scales(values))
5868}
5869
5870fn blended_pilot_axis_contrasts(
5871    pilot_data: ArrayView2<'_, f64>,
5872    term: &SmoothTermSpec,
5873    centers: ArrayView2<'_, f64>,
5874) -> Option<Vec<f64>> {
5875    let d = centers.ncols();
5876    if d <= 1 {
5877        return None;
5878    }
5879    let center_eta = initial_aniso_contrasts(centers);
5880    let data_eta = standardized_spatial_term_data(pilot_data, term)
5881        .ok()
5882        .and_then(|x| finite_centered_axis_contrasts(&initial_aniso_contrasts(x.view()), d));
5883    let center_eta = finite_centered_axis_contrasts(&center_eta, d)?;
5884    let blended = match data_eta {
5885        Some(data_eta) => center_eta
5886            .iter()
5887            .zip(data_eta.iter())
5888            .map(|(&from_centers, &from_data)| 0.5 * (from_centers + from_data))
5889            .collect::<Vec<_>>(),
5890        None => center_eta,
5891    };
5892    finite_centered_axis_contrasts(&blended, d)
5893}
5894
5895fn apply_pilot_spatial_psi_reseed(
5896    pilot_data: ArrayView2<'_, f64>,
5897    spec: &TermCollectionSpec,
5898    spatial_terms: &[usize],
5899    kappa_options: &SpatialLengthScaleOptimizationOptions,
5900) -> Result<TermCollectionSpec, EstimationError> {
5901    let dims_per_term = spatial_dims_per_term(spec, spatial_terms);
5902    let use_aniso = has_aniso_terms(spec, spatial_terms);
5903    let log_kappa0 = if use_aniso {
5904        SpatialLogKappaCoords::from_length_scales_aniso(spec, spatial_terms, kappa_options)
5905    } else {
5906        SpatialLogKappaCoords::from_length_scales(spec, spatial_terms, kappa_options)
5907    };
5908    let log_kappa0 = log_kappa0.reseed_from_data(pilot_data, spec, spatial_terms, kappa_options);
5909    let log_kappa_lower = if use_aniso {
5910        SpatialLogKappaCoords::lower_bounds_aniso_from_data(
5911            pilot_data,
5912            spec,
5913            spatial_terms,
5914            &dims_per_term,
5915            kappa_options,
5916        )
5917    } else {
5918        SpatialLogKappaCoords::lower_bounds_from_data(
5919            pilot_data,
5920            spec,
5921            spatial_terms,
5922            kappa_options,
5923        )
5924    };
5925    let log_kappa_upper = if use_aniso {
5926        SpatialLogKappaCoords::upper_bounds_aniso_from_data(
5927            pilot_data,
5928            spec,
5929            spatial_terms,
5930            &dims_per_term,
5931            kappa_options,
5932        )
5933    } else {
5934        SpatialLogKappaCoords::upper_bounds_from_data(
5935            pilot_data,
5936            spec,
5937            spatial_terms,
5938            kappa_options,
5939        )
5940    };
5941    log_kappa0
5942        .clamp_to_bounds(&log_kappa_lower, &log_kappa_upper)
5943        .apply_tospec(spec, spatial_terms)
5944}
5945
5946pub(crate) fn apply_spatial_anisotropy_pilot_initializer(
5947    data: ArrayView2<'_, f64>,
5948    spec: &mut TermCollectionSpec,
5949    spatial_terms: &[usize],
5950    target_size: usize,
5951    kappa_options: &SpatialLengthScaleOptimizationOptions,
5952) -> usize {
5953    if target_size == 0 || data.nrows() <= target_size.saturating_mul(2) || spatial_terms.is_empty()
5954    {
5955        return 0;
5956    }
5957    if !has_aniso_terms(spec, spatial_terms) {
5958        return 0;
5959    }
5960    let indices = stratified_spatial_subsample(data, spec, target_size);
5961    let pilot_data = sampled_rows(data, &indices);
5962    let mut working = spec.clone();
5963    let mut updated_terms = 0usize;
5964    const GEOMETRY_UPDATES: usize = 2;
5965
5966    for pass in 0..GEOMETRY_UPDATES {
5967        let planned_terms = match plan_joint_spatial_centers_for_term_blocks(
5968            pilot_data.view(),
5969            &[working.smooth_terms.clone()],
5970        )
5971        .and_then(|mut blocks| {
5972            blocks.pop().ok_or_else(|| {
5973                BasisError::InvalidInput(
5974                    "pilot geometry initializer produced no smooth-term block".to_string(),
5975                )
5976            })
5977        }) {
5978            Ok(terms) => terms,
5979            Err(err) => {
5980                log::warn!(
5981                    "[spatial-kappa] pilot geometry initializer skipped after center planning failed: {err}"
5982                );
5983                return updated_terms;
5984            }
5985        };
5986
5987        for &term_idx in spatial_terms {
5988            let Some(current_eta) = get_spatial_aniso_log_scales(&working, term_idx) else {
5989                continue;
5990            };
5991            let Some(d) = get_spatial_feature_dim(&working, term_idx) else {
5992                continue;
5993            };
5994            if d <= 1 || current_eta.len() != d {
5995                continue;
5996            }
5997            let Some(planned_term) = planned_terms.get(term_idx) else {
5998                continue;
5999            };
6000            let Some(centers) = spatial_term_user_centers(planned_term) else {
6001                continue;
6002            };
6003            let Some(eta) = blended_pilot_axis_contrasts(pilot_data.view(), planned_term, centers)
6004            else {
6005                continue;
6006            };
6007            if set_spatial_aniso_log_scales(&mut working, term_idx, eta).is_ok() {
6008                updated_terms += usize::from(pass == 0);
6009            }
6010        }
6011
6012        match apply_pilot_spatial_psi_reseed(
6013            pilot_data.view(),
6014            &working,
6015            spatial_terms,
6016            kappa_options,
6017        ) {
6018            Ok(updated) => {
6019                working = updated;
6020            }
6021            Err(err) => {
6022                log::warn!(
6023                    "[spatial-kappa] pilot geometry ψ reseed skipped after deterministic initializer error: {err}"
6024                );
6025                break;
6026            }
6027        }
6028    }
6029
6030    if updated_terms > 0 {
6031        log::info!(
6032            "[spatial-kappa] initialized anisotropy from {}-row pilot geometry for {} spatial term(s); proceeding to full-data optimization",
6033            indices.len(),
6034            updated_terms
6035        );
6036        *spec = working;
6037    }
6038    updated_terms
6039}
6040
6041pub(crate) fn spatial_length_scale_term_indices(spec: &TermCollectionSpec) -> Vec<usize> {
6042    spec.smooth_terms
6043        .iter()
6044        .enumerate()
6045        .filter_map(|(idx, _)| spatial_term_supports_hyper_optimization(spec, idx).then_some(idx))
6046        .collect()
6047}
6048
6049/// Returns `true` when every spatial term in `spec` has a locked kernel
6050/// scale (explicit `length_scale=X` without anisotropy) and therefore
6051/// contributes no outer ψ/κ optimization axis. Empty term collections
6052/// also return `true` — there are no kappas to optimize.
6053///
6054/// Used by family entry points that want to honor a user-supplied scalar
6055/// length scale exactly: when all spatial terms are locked the n-block
6056/// joint-spatial outer solver has nothing to optimize, and routing
6057/// through it merely spends ~80 outer iters chasing a stalled ARC at the
6058/// user's chosen ρ. Skipping straight to the rho-only path avoids that
6059/// waste and respects the user's explicit kernel-scale input.
6060fn fit_score(fit: &UnifiedFitResult) -> f64 {
6061    if fit.reml_score.is_finite() {
6062        return fit.reml_score;
6063    }
6064    let score = 0.5 * fit.deviance + 0.5 * fit.stable_penalty_term;
6065    if score.is_finite() {
6066        score
6067    } else {
6068        f64::INFINITY
6069    }
6070}
6071
6072/// Classify an outer-evaluation error as a *recoverable trial-point
6073/// infeasibility* versus a genuine fatal failure.
6074///
6075/// The spatial-κ / anisotropy outer optimizer probes a sequence of trial
6076/// hyperparameters. At an extreme trial point the realized kernel design or
6077/// its ψ-derivatives may simply be non-constructible — e.g. a learned
6078/// per-axis log-scale stretches the anisotropic distance `r = |Λh|` until the
6079/// Duchon polyharmonic blocks `r^(2m−d)` overflow, or a degenerate metric
6080/// collapses two centers onto a non-C² collision. Those points lie outside
6081/// the model's feasible domain; the principled response is to treat them like
6082/// the cost-only path already does (objective `+∞`) so the line-search /
6083/// trust-region solver retreats, rather than aborting the entire REML fit.
6084///
6085/// A `BasisError` is exactly this class: it means "the basis/design cannot be
6086/// built at this hyperparameter". Everything else (PIRLS divergence wired to a
6087/// distinct variant, layout/topology invariants, over-parameterization) stays
6088/// fatal so genuine bugs are never masked.
6089fn is_recoverable_trial_point_error(err: &EstimationError) -> bool {
6090    matches!(err, EstimationError::BasisError(_))
6091}
6092
6093fn require_successful_spatial_optimization_result<T>(
6094    initial_score: f64,
6095    result: Result<Option<(T, f64)>, EstimationError>,
6096) -> Result<T, EstimationError> {
6097    match result {
6098        Ok(Some((value, exact_score))) => {
6099            // Allow rounding-level worsening: REML scores accumulate
6100            // log-determinant terms whose finite-precision re-evaluation
6101            // can drift well past 1e-10 absolute near a converged optimum
6102            // (we have seen ~1e-6 between two evaluations whose printed
6103            // values round to identical 6-digit scientific). Reject genuine
6104            // worsenings (>1 unit) but admit anything within ~1e-6
6105            // absolute / 1e-8 relative — meaningful REML gains are
6106            // orders of magnitude larger.
6107            const SCORE_DRIFT_ABS_TOL: f64 = 1e-6;
6108            const SCORE_DRIFT_REL_TOL: f64 = 1e-8;
6109            let tol = SCORE_DRIFT_ABS_TOL.max(initial_score.abs() * SCORE_DRIFT_REL_TOL);
6110            if exact_score <= initial_score + tol {
6111                Ok(value)
6112            } else {
6113                Err(EstimationError::RemlOptimizationFailed(format!(
6114                    "spatial kappa optimization made REML score worse ({initial_score:.6e} -> {exact_score:.6e})"
6115                )))
6116            }
6117        }
6118        Ok(None) => Err(EstimationError::RemlOptimizationFailed(
6119            "spatial kappa optimization is unavailable for one or more eligible spatial terms"
6120                .to_string(),
6121        )),
6122        Err(err) => Err(EstimationError::RemlOptimizationFailed(format!(
6123            "spatial kappa optimization failed: {err}"
6124        ))),
6125    }
6126}
6127
6128fn external_opts_for_design(
6129    family: &LikelihoodSpec,
6130    design: &TermCollectionDesign,
6131    options: &FitOptions,
6132) -> ExternalOptimOptions {
6133    ExternalOptimOptions {
6134        family: family.clone(),
6135        latent_cloglog: options.latent_cloglog,
6136        mixture_link: options.mixture_link.clone(),
6137        optimize_mixture: options.optimize_mixture,
6138        sas_link: options.sas_link,
6139        optimize_sas: options.optimize_sas,
6140        compute_inference: options.compute_inference,
6141        skip_rho_posterior_inference: options.skip_rho_posterior_inference,
6142        max_iter: options.max_iter,
6143        tol: options.tol,
6144        nullspace_dims: design.nullspace_dims.clone(),
6145        linear_constraints: design.linear_constraints.clone(),
6146        firth_bias_reduction: Some(options.firth_bias_reduction),
6147        penalty_shrinkage_floor: options.penalty_shrinkage_floor,
6148        rho_prior: options.rho_prior.clone(),
6149        // Propagate Kronecker structure so the joint optimizer minimizes the
6150        // same REML surface as the baseline/refit (adaptive_fit_options_base).
6151        kronecker_penalty_system: design.kronecker_penalty_system(),
6152        kronecker_factored: design
6153            .smooth
6154            .terms
6155            .iter()
6156            .find_map(|t| t.kronecker_factored.clone()),
6157        persist_warm_start_disk: options.persist_warm_start_disk,
6158    }
6159}
6160
6161/// Evaluate the joint REML cost, gradient, and Hessian result at a given θ = [ρ, ψ]
6162/// for a single-block term collection with spatial hyperparameters.
6163///
6164/// This provides a direct evaluation of the profiled REML objective using the
6165/// external-caller interface, which exposes exact cost/gradient/Hessian without
6166/// running the full outer smoothing loop. The returned tuple is
6167/// `(cost, gradient, hessian)` in the joint [ρ, ψ] space.
6168fn evaluate_joint_reml_outer_eval_at_theta(
6169    evaluator: &mut gam_solve::estimate::ExternalJointHyperEvaluator<'_>,
6170    design: &TermCollectionDesign,
6171    theta: &Array1<f64>,
6172    rho_dim: usize,
6173    hyper_dirs: Vec<gam_solve::estimate::reml::DirectionalHyperParam>,
6174    warm_start_beta: Option<ArrayView1<'_, f64>>,
6175    order: gam_solve::rho_optimizer::OuterEvalOrder,
6176    design_revision: Option<u64>,
6177) -> Result<
6178    (
6179        f64,
6180        Array1<f64>,
6181        gam_problem::HessianResult,
6182    ),
6183    EstimationError,
6184> {
6185    evaluator.evaluate_with_order(
6186        &design.design,
6187        &design.penalties,
6188        &design.nullspace_dims,
6189        design.linear_constraints.clone(),
6190        theta,
6191        rho_dim,
6192        hyper_dirs,
6193        warm_start_beta,
6194        "evaluate_joint_reml_outer_eval_at_theta",
6195        order,
6196        design_revision,
6197    )
6198}
6199
6200fn evaluate_joint_reml_efs_at_theta(
6201    evaluator: &mut gam_solve::estimate::ExternalJointHyperEvaluator<'_>,
6202    design: &TermCollectionDesign,
6203    theta: &Array1<f64>,
6204    rho_dim: usize,
6205    hyper_dirs: Vec<gam_solve::estimate::reml::DirectionalHyperParam>,
6206    warm_start_beta: Option<ArrayView1<'_, f64>>,
6207    design_revision: Option<u64>,
6208) -> Result<gam_problem::EfsEval, EstimationError> {
6209    evaluator.evaluate_efs(
6210        &design.design,
6211        &design.penalties,
6212        &design.nullspace_dims,
6213        design.linear_constraints.clone(),
6214        theta,
6215        rho_dim,
6216        hyper_dirs,
6217        warm_start_beta,
6218        "evaluate_joint_reml_efs_at_theta",
6219        design_revision,
6220    )
6221}
6222
6223fn exact_joint_spatial_outer_hessian_available(
6224    family: &LikelihoodSpec,
6225    design: &TermCollectionDesign,
6226) -> bool {
6227    // Every `LikelihoodSpec` variant (Gaussian, Binomial-*, Poisson, Gamma,
6228    // Royston-Parmar) routes through the unified evaluator's outer-Hessian
6229    // path: Gaussian Identity uses the no-correction dense form, all GLM
6230    // variants supply scalar-GLM derivative ingredients consumed by
6231    // `compute_outer_hessian` / `build_outer_hessian_operator`, and the
6232    // (n, p, K) crossover in `prefer_outer_hessian_operator` chooses the
6233    // matrix-free `HessianResult::Operator` representation at large scale
6234    // for dense-lazy designs.  The previous `Identity || sparse_design`
6235    // gate predates that operator routing and forced binomial+logit+Matern
6236    // (and any other non-Gaussian dense-lazy spatial design) onto the
6237    // gradient-only BFGS path even though analytic Hessian is fully
6238    // available — capability check, not cost.  Match every variant
6239    // explicitly so any future family addition (which may not yet provide
6240    // outer-Hessian ingredients) forces an authoring decision here rather
6241    // than silently inheriting `true`.
6242    // Every supported response (Gaussian, Binomial-*, Poisson, Tweedie,
6243    // NegativeBinomial, Beta, Gamma, Royston-Parmar) routes through the
6244    // unified evaluator's outer-Hessian path; the spec-level capability
6245    // check therefore always succeeds. Match every response explicitly so
6246    // any future family addition (which may not yet provide outer-Hessian
6247    // ingredients) forces an authoring decision here rather than silently
6248    // inheriting `true`.
6249    let family_supported = match &family.response {
6250        ResponseFamily::Gaussian
6251        | ResponseFamily::Binomial
6252        | ResponseFamily::Poisson
6253        | ResponseFamily::Tweedie { .. }
6254        | ResponseFamily::NegativeBinomial { .. }
6255        | ResponseFamily::Beta { .. }
6256        | ResponseFamily::Gamma
6257        | ResponseFamily::RoystonParmar => true,
6258    };
6259    // A design with zero columns has no joint outer-Hessian to compute;
6260    // the analytic path is only meaningful for non-empty parameter blocks.
6261    family_supported && design.design.ncols() > 0
6262}
6263
6264fn smooth_term_penalty_index(
6265    spec: &TermCollectionSpec,
6266    design: &TermCollectionDesign,
6267    term_idx: usize,
6268) -> Option<usize> {
6269    if term_idx >= design.smooth.terms.len() || term_idx >= spec.smooth_terms.len() {
6270        return None;
6271    }
6272    if design.smooth.terms[term_idx].penalties_local.is_empty() {
6273        return None;
6274    }
6275    let linear_penalties = spec
6276        .linear_terms
6277        .iter()
6278        .filter(|t| t.double_penalty)
6279        .count()
6280        * 2;
6281    let random_penalties = design
6282        .random_effect_ranges
6283        .iter()
6284        .filter(|(_, range)| !range.is_empty())
6285        .count();
6286    let smooth_offset = linear_penalties + random_penalties;
6287    let local_offset = design
6288        .smooth
6289        .terms
6290        .iter()
6291        .take(term_idx)
6292        .map(|term| term.penalties_local.len())
6293        .sum::<usize>();
6294    Some(smooth_offset + local_offset)
6295}
6296
6297fn try_build_spatial_term_log_kappa_derivativeinfo(
6298    data: ArrayView2<'_, f64>,
6299    resolvedspec: &TermCollectionSpec,
6300    design: &TermCollectionDesign,
6301    term_idx: usize,
6302) -> Result<Option<SpatialPsiDerivative>, EstimationError> {
6303    let Some((
6304        global_range,
6305        total_p,
6306        x_psi_local,
6307        s_psi_local_check,
6308        x_psi_psi_local,
6309        s_psi_psi_local,
6310        s_psi_components_local,
6311        s_psi_psi_components_local,
6312        implicit_operator,
6313    )) = try_build_spatial_term_log_kappa_derivative(data, resolvedspec, design, term_idx)?
6314    else {
6315        return Ok(None);
6316    };
6317    let Some(penalty_start) = smooth_term_penalty_index(resolvedspec, design, term_idx) else {
6318        return Ok(None);
6319    };
6320    if s_psi_components_local.is_empty() || s_psi_psi_components_local.is_empty() {
6321        return Ok(None);
6322    }
6323    if s_psi_components_local.len() != s_psi_psi_components_local.len() {
6324        return Ok(None);
6325    }
6326    let penalty_indices = (0..s_psi_components_local.len())
6327        .map(|j| penalty_start + j)
6328        .collect::<Vec<_>>();
6329    let penalty_index = penalty_indices[0];
6330    if s_psi_local_check.nrows() == 0 || s_psi_psi_local.nrows() == 0 {
6331        return Ok(None);
6332    }
6333    Ok(Some(SpatialPsiDerivative {
6334        penalty_index,
6335        penalty_indices,
6336        global_range,
6337        total_p,
6338        x_psi_local,
6339        s_psi_components_local,
6340        x_psi_psi_local,
6341        s_psi_psi_components_local,
6342        aniso_group_id: None,
6343        aniso_cross_designs: None,
6344        aniso_cross_penalty_provider: None,
6345        implicit_operator,
6346        implicit_axis: 0,
6347    }))
6348}
6349
6350pub(crate) fn try_build_spatial_log_kappa_derivativeinfo_list(
6351    data: ArrayView2<'_, f64>,
6352    resolvedspec: &TermCollectionSpec,
6353    design: &TermCollectionDesign,
6354    spatial_terms: &[usize],
6355) -> Result<Option<Vec<SpatialPsiDerivative>>, EstimationError> {
6356    let mut out = Vec::new();
6357    let mut aniso_gid = 0usize;
6358    for &term_idx in spatial_terms {
6359        if spatial_term_uses_per_axis_psi(resolvedspec, term_idx) {
6360            if let Some(entries) = try_build_spatial_term_log_kappa_aniso_derivativeinfos(
6361                data,
6362                resolvedspec,
6363                design,
6364                term_idx,
6365                aniso_gid,
6366            )? {
6367                aniso_gid += 1;
6368                out.extend(entries);
6369                continue;
6370            } else {
6371                return Ok(None);
6372            }
6373        }
6374        let Some(info) =
6375            try_build_spatial_term_log_kappa_derivativeinfo(data, resolvedspec, design, term_idx)?
6376        else {
6377            return Ok(None);
6378        };
6379        out.push(info);
6380    }
6381    Ok(Some(out))
6382}
6383
6384/// For an aniso term with d axes, produce d `SpatialPsiDerivative` entries.
6385fn try_build_spatial_term_log_kappa_aniso_derivativeinfos(
6386    data: ArrayView2<'_, f64>,
6387    resolvedspec: &TermCollectionSpec,
6388    design: &TermCollectionDesign,
6389    term_idx: usize,
6390    aniso_group_id: usize,
6391) -> Result<Option<Vec<SpatialPsiDerivative>>, EstimationError> {
6392    let Some(smooth_term) = design.smooth.terms.get(term_idx) else {
6393        return Ok(None);
6394    };
6395    let Some(termspec) = resolvedspec.smooth_terms.get(term_idx) else {
6396        return Ok(None);
6397    };
6398    let mut aniso_result = match &termspec.basis {
6399        SmoothBasisSpec::Sphere { .. } => return Ok(None),
6400        SmoothBasisSpec::Matern {
6401            feature_cols,
6402            spec,
6403            input_scales,
6404        } => {
6405            let mut x = select_columns(data, feature_cols).map_err(EstimationError::from)?;
6406            if let Some(s) = input_scales {
6407                apply_input_standardization(&mut x, s);
6408            }
6409            build_matern_basis_log_kappa_aniso_derivatives(x.view(), spec)
6410                .map_err(EstimationError::from)?
6411        }
6412        // Measure-jet: the grouped dial coordinates ride the same per-axis
6413        // carrier. The producer runs on the FROZEN spec (the driver runs
6414        // post-freeze), so per-trial rebuilds move only the dials; the
6415        // coordinate layout, zero design drift, and shared candidate
6416        // normalization are owned by `build_measure_jet_basis_psi_derivatives`.
6417        SmoothBasisSpec::MeasureJet {
6418            feature_cols,
6419            spec,
6420            input_scales,
6421        } => {
6422            let mut x = select_columns(data, feature_cols).map_err(EstimationError::from)?;
6423            if let Some(s) = input_scales {
6424                apply_input_standardization(&mut x, s);
6425            }
6426            build_measure_jet_basis_psi_derivatives(x.view(), spec)
6427                .map_err(EstimationError::from)?
6428        }
6429        _ => return Ok(None),
6430    };
6431    // Get number of axes from the shared operator when available; otherwise
6432    // fall back to the dense design list.
6433    let d = if let Some(ref op) = aniso_result.implicit_operator {
6434        op.n_axes()
6435    } else if !aniso_result.design_first.is_empty() {
6436        aniso_result.design_first.len()
6437    } else {
6438        0
6439    };
6440    if d == 0 {
6441        return Ok(None);
6442    }
6443    let Some(penalty_start) = smooth_term_penalty_index(resolvedspec, design, term_idx) else {
6444        return Ok(None);
6445    };
6446    let p_total = design.design.ncols();
6447    let smooth_start = p_total.saturating_sub(design.smooth.total_smooth_cols());
6448    let global_range = (smooth_start + smooth_term.coeff_range.start)
6449        ..(smooth_start + smooth_term.coeff_range.end);
6450    let num_penalties = aniso_result.penalties_first[0].len();
6451    let penalty_indices: Vec<usize> = (0..num_penalties).map(|j| penalty_start + j).collect();
6452    let penalties_cross_provider = aniso_result.penalties_cross_provider.clone();
6453
6454    // Dense first/diagonal-second matrices may be present even when the shared
6455    // operator is available. The operator remains the canonical source for
6456    // exact cross-axis second derivatives.
6457    let use_implicit_design = aniso_result.design_first.is_empty();
6458    let implicit_op_arc = aniso_result
6459        .implicit_operator
6460        .as_ref()
6461        .map(|op| std::sync::Arc::new(op.clone()));
6462
6463    let mut entries = Vec::with_capacity(d);
6464    for a in 0..d {
6465        let (x_psi_local, x_psi_psi_local) = if use_implicit_design {
6466            // Implicit path: design-derivative matvecs will be dispatched through
6467            // the ImplicitDerivativeOp inside HyperDesignDerivative, so we do NOT
6468            // need to materialize the dense (n x p) matrices here.  Store empty
6469            // placeholders — they are never read when the implicit operator is
6470            // present (spatial_log_kappa_hyper_dirs_frominfo_list uses from_implicit).
6471            (Array2::<f64>::zeros((0, 0)), Array2::<f64>::zeros((0, 0)))
6472        } else {
6473            // Move the dense (n × p) matrices out of aniso_result instead of
6474            // cloning. Each axis index `a` is read exactly once across the
6475            // loop, and aniso_result is dropped at function exit, so leaving
6476            // empty placeholders behind in those vec slots is safe.
6477            let x_first = std::mem::take(&mut aniso_result.design_first[a]);
6478            let x_second = std::mem::take(&mut aniso_result.design_second_diag[a]);
6479            if x_first.ncols() != smooth_term.coeff_range.len() {
6480                return Ok(None);
6481            }
6482            (x_first, x_second)
6483        };
6484        let s_psi_components = std::mem::take(&mut aniso_result.penalties_first[a]);
6485        let s_psi_psi_components = std::mem::take(&mut aniso_result.penalties_second_diag[a]);
6486        // Build cross-design entries for other axes b != a in this group.
6487        // These will be indexed by (b, cross_matrix) where b is the axis
6488        // offset within the d-entry block.
6489        // Cross-axis second derivatives are sourced from the shared operator,
6490        // so we only need placeholder entries to preserve the axis layout.
6491        let cross_designs = if implicit_op_arc.is_some() {
6492            let mut cd = Vec::with_capacity(d - 1);
6493            for b in 0..d {
6494                if b == a {
6495                    continue;
6496                }
6497                cd.push((b, Array2::<f64>::zeros((0, 0))));
6498            }
6499            cd
6500        } else if !aniso_result.design_second_cross.is_empty() {
6501            let mut cd = Vec::new();
6502            for (cross_idx, &(pa, pb)) in aniso_result.design_second_cross_pairs.iter().enumerate()
6503            {
6504                if pa == a {
6505                    cd.push((pb, aniso_result.design_second_cross[cross_idx].clone()));
6506                } else if pb == a {
6507                    cd.push((pa, aniso_result.design_second_cross[cross_idx].clone()));
6508                }
6509            }
6510            cd
6511        } else {
6512            Vec::new()
6513        };
6514        let cross_penalty_provider = if d > 1 {
6515            let penalties_cross_provider = penalties_cross_provider.clone();
6516            Some(std::sync::Arc::new(
6517                move |b_axis: usize| -> Result<Vec<Array2<f64>>, EstimationError> {
6518                    if b_axis == a {
6519                        return Ok(Vec::new());
6520                    }
6521                    let (axis_lo, axis_hi) = if a < b_axis { (a, b_axis) } else { (b_axis, a) };
6522                    if let Some(provider) = penalties_cross_provider.as_ref() {
6523                        provider
6524                            .evaluate(axis_lo, axis_hi)
6525                            .map_err(EstimationError::from)
6526                    } else {
6527                        // No provider: either the pair is unregistered, or it
6528                        // was registered without data (early-return raw-operator
6529                        // paths). Both cases contribute no cross penalties.
6530                        Ok(Vec::new())
6531                    }
6532                },
6533            )
6534                as std::sync::Arc<
6535                    dyn Fn(usize) -> Result<Vec<Array2<f64>>, EstimationError>
6536                        + Send
6537                        + Sync
6538                        + 'static,
6539                >)
6540        } else {
6541            None
6542        };
6543
6544        entries.push(SpatialPsiDerivative {
6545            penalty_index: penalty_indices[0],
6546            penalty_indices: penalty_indices.clone(),
6547            global_range: global_range.clone(),
6548            total_p: p_total,
6549            x_psi_local,
6550            s_psi_components_local: s_psi_components,
6551            x_psi_psi_local,
6552            s_psi_psi_components_local: s_psi_psi_components,
6553            aniso_group_id: Some(aniso_group_id),
6554            aniso_cross_designs: if cross_designs.is_empty() {
6555                None
6556            } else {
6557                Some(cross_designs)
6558            },
6559            aniso_cross_penalty_provider: cross_penalty_provider,
6560            implicit_operator: implicit_op_arc.clone(),
6561            implicit_axis: a,
6562        });
6563    }
6564    Ok(Some(entries))
6565}