Skip to main content

gam_models/fit_orchestration/drivers/
design_construction.rs

1// #1521: `build_term_collection_design` and its term-design subgraph were
2// relocated DOWN into `gam_terms::smooth` (see `gam_terms::smooth::term_design`).
3// The joint-build variants below STAY here: they return a `gam_solve`
4// `EstimationError` / call `freeze_term_collection_from_design`
5// (`spatial_optimization.rs`), so they belong to the gam-models orchestration
6// tier. They reach the relocated `build_term_collection_design_inner` /
7// `build_term_collection_design` via the module's `use gam_terms::smooth::*`.
8pub fn build_term_collection_designs_joint(
9    data: ArrayView2<'_, f64>,
10    specs: &[TermCollectionSpec],
11) -> Result<Vec<TermCollectionDesign>, BasisError> {
12    for spec in specs {
13        validate_term_collection_finite_inputs(data, spec)?;
14    }
15    let smooth_blocks = specs
16        .iter()
17        .map(|spec| spec.smooth_terms.clone())
18        .collect::<Vec<_>>();
19    let planned_blocks = plan_joint_spatial_centers_for_term_blocks(data, &smooth_blocks)?;
20    let mut out = Vec::with_capacity(specs.len());
21    for (spec, planned_terms) in specs.iter().zip(planned_blocks.into_iter()) {
22        let mut planned_spec = spec.clone();
23        planned_spec.smooth_terms = planned_terms;
24        out.push(build_term_collection_design_inner(data, &planned_spec)?);
25    }
26    Ok(out)
27}
28
29pub fn build_term_collection_designs_and_freeze_joint(
30    data: ArrayView2<'_, f64>,
31    specs: &[TermCollectionSpec],
32) -> Result<(Vec<TermCollectionDesign>, Vec<TermCollectionSpec>), EstimationError> {
33    let designs = build_term_collection_designs_joint(data, specs)?;
34    let mut resolved_specs = Vec::with_capacity(specs.len());
35    for (spec, design) in specs.iter().zip(designs.iter()) {
36        resolved_specs.push(freeze_term_collection_from_design(spec, design)?);
37    }
38    Ok((designs, resolved_specs))
39}
40
41pub fn fit_term_collection_forspec(
42    data: ArrayView2<'_, f64>,
43    y: ArrayView1<'_, f64>,
44    weights: ArrayView1<'_, f64>,
45    offset: ArrayView1<'_, f64>,
46    spec: &TermCollectionSpec,
47    family: LikelihoodSpec,
48    options: &FitOptions,
49) -> Result<FittedTermCollection, EstimationError> {
50    fit_term_collection_forspecwith_heuristic_lambdas(
51        data, y, weights, offset, spec, None, family, options,
52    )
53}
54
55pub fn fit_term_collection_with_coefficient_groups(
56    data: ArrayView2<'_, f64>,
57    y: ArrayView1<'_, f64>,
58    weights: ArrayView1<'_, f64>,
59    offset: ArrayView1<'_, f64>,
60    spec: &TermCollectionSpec,
61    groups: &[CoefficientGroupSpec],
62    family: LikelihoodSpec,
63    options: &FitOptions,
64) -> Result<FittedTermCollection, EstimationError> {
65    if groups.is_empty() {
66        return fit_term_collection_forspec(data, y, weights, offset, spec, family, options);
67    }
68    let design = build_term_collection_design(data, spec)?;
69    let base_fit_opts = adaptive_fit_options_base(options, &design);
70    let realized = design
71        .realize_coefficient_groups(groups, &base_fit_opts.rho_prior)
72        .map_err(EstimationError::BasisError)?;
73    let mut grouped_options = base_fit_opts.clone();
74    grouped_options.rho_prior = realized.rho_prior;
75    let fitted = FittedTermCollection {
76        fit: gam_solve::estimate::fit_gam_with_penalty_specs(
77            design.design.clone(),
78            y,
79            weights,
80            offset,
81            realized.penalty_specs,
82            realized.nullspace_dims,
83            family.clone(),
84            &grouped_options,
85        )?,
86        design,
87        adaptive_diagnostics: None,
88    };
89    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
90    Ok(fitted)
91}
92
93pub fn fit_term_collection_with_penalty_block_gamma_prior_callback<F>(
94    data: ArrayView2<'_, f64>,
95    y: ArrayView1<'_, f64>,
96    weights: ArrayView1<'_, f64>,
97    offset: ArrayView1<'_, f64>,
98    spec: &TermCollectionSpec,
99    callback: F,
100    family: LikelihoodSpec,
101    options: &FitOptions,
102) -> Result<FittedTermCollection, EstimationError>
103where
104    F: FnMut(&PenaltyBlockGammaPriorMetadata<'_>) -> Option<(f64, f64)>,
105{
106    let design = build_term_collection_design(data, spec)?;
107    let mut fit_opts = adaptive_fit_options_base(options, &design);
108    fit_opts.rho_prior = realize_penalty_block_gamma_priors(&design, callback)
109        .map_err(EstimationError::BasisError)?;
110    let fitted = FittedTermCollection {
111        fit: fit_gamwith_heuristic_lambdas(
112            design.design.clone(),
113            y,
114            weights,
115            offset,
116            &design.penalties,
117            None,
118            family.clone(),
119            &fit_opts,
120        )?,
121        design,
122        adaptive_diagnostics: None,
123    };
124    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
125    Ok(fitted)
126}
127
128pub fn fit_term_collection_with_penalty_block_gamma_priors(
129    data: ArrayView2<'_, f64>,
130    y: ArrayView1<'_, f64>,
131    weights: ArrayView1<'_, f64>,
132    offset: ArrayView1<'_, f64>,
133    spec: &TermCollectionSpec,
134    priors: &[(String, f64, f64)],
135    family: LikelihoodSpec,
136    options: &FitOptions,
137) -> Result<FittedTermCollection, EstimationError> {
138    let design = build_term_collection_design(data, spec)?;
139    let mut fit_opts = adaptive_fit_options_base(options, &design);
140    fit_opts.rho_prior = realize_keyed_penalty_block_gamma_priors(&design, priors)
141        .map_err(EstimationError::BasisError)?;
142    let fitted = FittedTermCollection {
143        fit: fit_gamwith_heuristic_lambdas(
144            design.design.clone(),
145            y,
146            weights,
147            offset,
148            &design.penalties,
149            None,
150            family.clone(),
151            &fit_opts,
152        )?,
153        design,
154        adaptive_diagnostics: None,
155    };
156    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
157    Ok(fitted)
158}
159
160pub fn fit_term_collection_with_coefficient_groups_and_penalty_block_gamma_priors(
161    data: ArrayView2<'_, f64>,
162    y: ArrayView1<'_, f64>,
163    weights: ArrayView1<'_, f64>,
164    offset: ArrayView1<'_, f64>,
165    spec: &TermCollectionSpec,
166    groups: &[CoefficientGroupSpec],
167    priors: &[(String, f64, f64)],
168    family: LikelihoodSpec,
169    options: &FitOptions,
170) -> Result<FittedTermCollection, EstimationError> {
171    if groups.is_empty() {
172        return fit_term_collection_with_penalty_block_gamma_priors(
173            data, y, weights, offset, spec, priors, family, options,
174        );
175    }
176    if priors.is_empty() {
177        return fit_term_collection_with_coefficient_groups(
178            data, y, weights, offset, spec, groups, family, options,
179        );
180    }
181
182    let design = build_term_collection_design(data, spec)?;
183    let base_fit_opts = adaptive_fit_options_base(options, &design);
184    let base_rho_prior = realize_keyed_penalty_block_gamma_priors(&design, priors)
185        .map_err(EstimationError::BasisError)?;
186    let realized = design
187        .realize_coefficient_groups(groups, &base_rho_prior)
188        .map_err(EstimationError::BasisError)?;
189    let mut grouped_options = base_fit_opts.clone();
190    grouped_options.rho_prior = realized.rho_prior;
191    let fitted = FittedTermCollection {
192        fit: gam_solve::estimate::fit_gam_with_penalty_specs(
193            design.design.clone(),
194            y,
195            weights,
196            offset,
197            realized.penalty_specs,
198            realized.nullspace_dims,
199            family.clone(),
200            &grouped_options,
201        )?,
202        design,
203        adaptive_diagnostics: None,
204    };
205    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
206    Ok(fitted)
207}
208
209fn fit_term_collection_forspecwith_heuristic_lambdas(
210    data: ArrayView2<'_, f64>,
211    y: ArrayView1<'_, f64>,
212    weights: ArrayView1<'_, f64>,
213    offset: ArrayView1<'_, f64>,
214    spec: &TermCollectionSpec,
215    heuristic_lambdas: Option<&[f64]>,
216    family: LikelihoodSpec,
217    options: &FitOptions,
218) -> Result<FittedTermCollection, EstimationError> {
219    let base_design = build_term_collection_design(data, spec)?;
220    fit_term_collection_on_realized_design(
221        y,
222        weights,
223        offset,
224        spec,
225        &base_design,
226        heuristic_lambdas,
227        family,
228        options,
229    )
230}
231
232fn has_bounded_linear_terms(spec: &TermCollectionSpec) -> bool {
233    spec.linear_terms.iter().any(|term| {
234        matches!(
235            term.coefficient_geometry,
236            LinearCoefficientGeometry::Bounded { .. }
237        )
238    })
239}
240
241fn fit_term_collection_on_realized_design(
242    y: ArrayView1<'_, f64>,
243    weights: ArrayView1<'_, f64>,
244    offset: ArrayView1<'_, f64>,
245    spec: &TermCollectionSpec,
246    design: &TermCollectionDesign,
247    heuristic_lambdas: Option<&[f64]>,
248    family: LikelihoodSpec,
249    options: &FitOptions,
250) -> Result<FittedTermCollection, EstimationError> {
251    if has_bounded_linear_terms(spec) {
252        return fit_bounded_term_collection_with_design(
253            y,
254            weights,
255            offset,
256            spec,
257            design,
258            heuristic_lambdas,
259            family,
260            options,
261        );
262    }
263    let mut base_fit_opts = adaptive_fit_options_base(options, design);
264    // Lift the symmetric log-λ cap off the smoothing coordinates of
265    // well-determined Gaussian-identity B-spline / thin-plate / tensor smooths so
266    // REML can drive λ to the value the data wants — including λ → ∞ when a
267    // term's signal lives in its penalty null space (#1271 single-penalty tp/ps,
268    // #1266 double-penalty selection). Length-safe: only fires when the inner ρ
269    // aligns 1:1 with the penalty blocks (see `relax_smoothing_rho_prior`).
270    base_fit_opts.rho_prior = relax_smoothing_rho_prior(options, design);
271    let fitted = FittedTermCollection {
272        fit: fit_gamwith_heuristic_lambdas(
273            design.design.clone(),
274            y,
275            weights,
276            offset,
277            &design.penalties,
278            heuristic_lambdas,
279            family.clone(),
280            &base_fit_opts,
281        )?,
282        design: design.clone(),
283        adaptive_diagnostics: None,
284    };
285    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
286
287    let adaptive_opts = options.adaptive_regularization.clone().unwrap_or_default();
288    if !adaptive_opts.enabled {
289        return Ok(fitted);
290    }
291    let runtime_caches = extract_spatial_operator_runtime_caches(spec, &fitted.design)?;
292    if runtime_caches.is_empty() {
293        return Ok(fitted);
294    }
295    // Spatial-adaptive overlay always runs when the operator caches are
296    // non-empty. Catastrophic-overfit protection lives in the operator-log-λ
297    // box bound (Fix B at the BFGS bounds construction), which caps maximum
298    // unpenalization regardless of n. Production fits at n≈300K must run the
299    // overlay; the previous n-gate (n < max(4·p_total, 200)) silently skipped
300    // it for any small-n test, contradicting that contract.
301    fit_term_collectionwith_exact_spatial_adaptive_regularization(
302        fitted,
303        y,
304        weights,
305        offset,
306        family,
307        options,
308        &runtime_caches,
309    )
310}
311
312#[derive(Clone)]
313struct SpatialOperatorRuntimeCache {
314    termname: String,
315    feature_cols: Vec<usize>,
316    coeff_global_range: Range<usize>,
317    mass_penalty_global_idx: usize,
318    tension_penalty_global_idx: usize,
319    stiffness_penalty_global_idx: usize,
320    d0: Array2<f64>,
321    d1: Array2<f64>,
322    d2: Array2<f64>,
323    collocation_points: Array2<f64>,
324    dimension: usize,
325}
326
327#[derive(Clone)]
328struct SpatialAdaptiveWeights {
329    inv_magweight: Array1<f64>,
330    invgradweight: Array1<f64>,
331    inv_lapweight: Array1<f64>,
332}
333
334#[derive(Clone)]
335struct CharbonnierScalarBlockState {
336    signal: Array1<f64>,
337    radius: Array1<f64>,
338    epsilon: f64,
339}
340
341impl CharbonnierScalarBlockState {
342    fn from_signal(signal: Array1<f64>, epsilon: f64) -> Self {
343        let eps = epsilon.max(1e-12);
344        let radius = signal.mapv(|t| (t * t + eps * eps).sqrt());
345        Self {
346            signal,
347            radius,
348            epsilon: eps,
349        }
350    }
351
352    fn absolute_signal(&self) -> Array1<f64> {
353        self.signal.mapv(f64::abs)
354    }
355
356    fn penalty_value(&self) -> f64 {
357        self.radius.iter().map(|r| r - self.epsilon).sum::<f64>()
358    }
359
360    fn betagradient_coeff(&self) -> Array1<f64> {
361        Array1::from_iter(
362            self.signal
363                .iter()
364                .zip(self.radius.iter())
365                .map(|(t, r)| t / r),
366        )
367    }
368
369    fn betahessian_diag(&self) -> Array1<f64> {
370        let eps2 = self.epsilon * self.epsilon;
371        self.radius.mapv(|r| eps2 / r.powi(3))
372    }
373
374    fn log_epsilon_gradient_terms(&self) -> Array1<f64> {
375        let epsilon = self.epsilon;
376        let eps2 = epsilon * epsilon;
377        self.radius.mapv(|r| eps2 / r - epsilon)
378    }
379
380    fn log_epsilon_betagradient_coeff(&self) -> Array1<f64> {
381        let eps2 = self.epsilon * self.epsilon;
382        Array1::from_iter(
383            self.signal
384                .iter()
385                .zip(self.radius.iter())
386                .map(|(t, r)| -eps2 * t / r.powi(3)),
387        )
388    }
389
390    fn log_epsilon_hessian_terms(&self) -> Array1<f64> {
391        let epsilon = self.epsilon;
392        let eps2 = epsilon * epsilon;
393        let eps4 = eps2 * eps2;
394        self.radius
395            .mapv(|r| 2.0 * eps2 / r - eps4 / r.powi(3) - epsilon)
396    }
397
398    fn surrogateweights_posterior_snr(
399        &self,
400        variance: &Array1<f64>,
401        weight_floor: f64,
402        weight_ceiling: f64,
403    ) -> (Array1<f64>, Array1<f64>) {
404        // Posterior-SNR (credible-magnitude) reweighting of the scalar MM
405        // majorizer.
406        //
407        // The magnitude-only surrogate weight uses the *point-estimate* radius
408        //
409        //   r_k^mag = sqrt( t_k^2 + eps^2 ),   t_k = (D0 beta_hat)_k,
410        //   w_k     = 1 / r_k^mag.
411        //
412        // The weight multiplies the local quadratic surrogate penalty
413        // w_k (D0 beta)^2, so a *small* w_k leaves the response un-penalized
414        // (treated as a genuine feature) and a *large* w_k pulls it toward zero
415        // (enforces flatness). The failure of the point-estimate radius is that
416        // a response t_k which is large only because it is poorly determined
417        // gets a tiny weight and is left un-penalized — the weight chases noise
418        // in low-information regions.
419        //
420        // Resolution via the posterior second moment under the working-Laplace
421        // posterior beta ~ N(beta_hat, Sigma_beta), Sigma_beta = H^{-1}: the
422        // variance of the response is
423        //
424        //   Var( (D0 beta)_k ) = (D0 Sigma_beta D0^T)_kk >= 0,
425        //
426        // and the *credible* (noise-floor-corrected) squared magnitude is
427        //
428        //   t_k^credible^2 = max( t_k^2 - Var(...)_k , 0 ),
429        //   r_k^snr        = sqrt( t_k^credible^2 + eps^2 ),
430        //   w_k            = 1 / r_k^snr.
431        //
432        // The principled fix evaluates the MM weight at the *credible* (noise-
433        // floor-corrected) squared magnitude rather than the raw point estimate.
434        // Under the working-Laplace posterior `beta ~ N(beta_hat, Sigma_beta)`,
435        // `Sigma_beta = H^{-1}`, the response `t_k = (D0 beta)_k` has posterior
436        // mean `t_hat_k` and variance `V_k = (D0 Sigma_beta D0^T)_kk >= 0`. The
437        // expected squared response is `E[t_k^2] = t_hat_k^2 + V_k`, so the part
438        // of `t_hat_k^2` that exceeds the noise floor `V_k` is the credibly real
439        // squared magnitude
440        //
441        //   t_k^credible^2 = max( t_hat_k^2 - V_k , 0 ),
442        //   r_k^snr        = sqrt( t_k^credible^2 + eps^2 ),   w_k = 1 / r_k^snr.
443        //
444        // This is the correct realization of the intent. Where the point
445        // estimate is a *credible* edge (t_hat^2 >> V) the credible magnitude is
446        // ~|t_hat| and the weight is essentially `1/|t_hat|` (left un-penalized,
447        // edge preserved). Where the large point-estimate magnitude is *noise*
448        // (t_hat^2 <~ V) the credible magnitude collapses to 0 and the weight
449        // rises to `1/eps` (extra smoothing, noise suppressed). The weight is
450        // monotone non-decreasing in `V`, and is bounded above by `1/eps` — the
451        // *same* ceiling the magnitude-only weight `1/sqrt(t^2 + eps^2)` already
452        // attains at `t = 0` (and clamped by `weight_ceiling`), so it is not an
453        // unbounded blow-up: it only moves the noise-dominated rows to the flat-
454        // response weight they would have had with a credible estimate of zero
455        // curvature. The earlier delta-method form `f + ½ f'' V` was non-monotone
456        // (`f''` flips sign at `2t^2 = eps^2`) and unbounded in `V`, which left
457        // noisy rows under-penalized and was the source of the SNR regression.
458        // With `V == 0` everywhere this degrades exactly to `surrogateweights`
459        // (`1/sqrt(t^2 + eps^2)`), so any covariance-unavailable path is
460        // unchanged.
461        let eps2 = self.epsilon * self.epsilon;
462        let weight = Array1::from_iter(self.signal.iter().zip(variance.iter()).map(|(&t, &v)| {
463            let credible2 = (t * t - v.max(0.0)).max(0.0);
464            let r = (credible2 + eps2).sqrt();
465            (1.0 / r).clamp(weight_floor, weight_ceiling)
466        }));
467        let invweight = weight.mapv(|u| 1.0 / u);
468        (weight, invweight)
469    }
470
471    fn directionalhessian_diag(&self, direction_signal: &Array1<f64>) -> Array1<f64> {
472        // Scalar-image directional third derivative:
473        //
474        // If t(beta) = A beta and
475        //   H(beta) = A^T diag( eps^2 / (t_k(beta)^2 + eps^2)^(3/2) ) A,
476        // then for q = A u,
477        //
478        //   D(H)[u]
479        //   = A^T diag( -3 eps^2 t_k q_k / (t_k^2 + eps^2)^(5/2) ) A.
480        //
481        // This is one of the exact P_{beta,beta,beta}[u] terms needed by the
482        // Laplace hypergradient
483        //
484        //   d/dtheta log det H = tr(H^{-1} Hdot_theta),
485        //   Hdot_theta = J_{beta,beta,theta} + D_beta(H)[beta_theta].
486        let eps2 = self.epsilon * self.epsilon;
487        Array1::from_iter(
488            self.signal
489                .iter()
490                .zip(direction_signal.iter())
491                .zip(self.radius.iter())
492                .map(|((t, q), r)| -3.0 * eps2 * t * q / r.powi(5)),
493        )
494    }
495
496    /// Exact scalar-image fourth derivative contracted along two coefficient
497    /// directions: with `t(β)=Aβ`, `H(β)=Aᵀ diag(ψ''(t_k)) A`,
498    /// `ψ''(t)=ε²/r³`, the second directional derivative of `H` along
499    /// `(u, v)` (signals `q1=A u`, `q2=A v`) is
500    /// `Aᵀ diag( ψ''''(t_k) q1_k q2_k ) A`, with
501    /// `ψ''''(t) = -3 ε² / r⁵ + 15 ε² t² / r⁷`.
502    fn second_directionalhessian_diag(
503        &self,
504        direction1_signal: &Array1<f64>,
505        direction2_signal: &Array1<f64>,
506    ) -> Array1<f64> {
507        let eps2 = self.epsilon * self.epsilon;
508        Array1::from_iter(
509            self.signal
510                .iter()
511                .zip(direction1_signal.iter())
512                .zip(direction2_signal.iter())
513                .zip(self.radius.iter())
514                .map(|(((t, q1), q2), r)| {
515                    let r2 = r * r;
516                    let psi4 = -3.0 * eps2 / r.powi(5) + 15.0 * eps2 * t * t / (r.powi(5) * r2);
517                    psi4 * q1 * q2
518                }),
519        )
520    }
521
522    fn log_epsilon_betahessian_diag(&self) -> Array1<f64> {
523        let eps2 = self.epsilon * self.epsilon;
524        let eps4 = eps2 * eps2;
525        Array1::from_iter(
526            self.signal
527                .iter()
528                .zip(self.radius.iter())
529                .map(|(_, r)| 2.0 * eps2 / r.powi(3) - 3.0 * eps4 / r.powi(5)),
530        )
531    }
532
533    fn log_epsilon_beta_mixed_second_coeff(&self) -> Array1<f64> {
534        let eps2 = self.epsilon * self.epsilon;
535        Array1::from_iter(
536            self.signal
537                .iter()
538                .zip(self.radius.iter())
539                .map(|(t, r)| eps2 * t * (eps2 - 2.0 * t * t) / r.powi(5)),
540        )
541    }
542
543    fn log_epsilon_betahessian_second_diag(&self) -> Array1<f64> {
544        let eps2 = self.epsilon * self.epsilon;
545        let eps4 = eps2 * eps2;
546        let eps6 = eps4 * eps2;
547        Array1::from_iter(
548            self.radius.iter().map(|r| {
549                4.0 * eps2 / r.powi(3) - 18.0 * eps4 / r.powi(5) + 15.0 * eps6 / r.powi(7)
550            }),
551        )
552    }
553
554    fn log_epsilon_betahessian_directional_diag(
555        &self,
556        direction_signal: &Array1<f64>,
557    ) -> Array1<f64> {
558        let eps2 = self.epsilon * self.epsilon;
559        let eps4 = eps2 * eps2;
560        Array1::from_iter(
561            self.signal
562                .iter()
563                .zip(direction_signal.iter())
564                .zip(self.radius.iter())
565                .map(|((t, q), r)| (-6.0 * eps2 * t / r.powi(5) + 15.0 * eps4 * t / r.powi(7)) * q),
566        )
567    }
568}
569
570#[derive(Clone)]
571struct CharbonnierGroupedBlockState {
572    norm: Array1<f64>,
573    radius: Array1<f64>,
574    signal_blocks: Array2<f64>,
575    epsilon: f64,
576}
577
578impl CharbonnierGroupedBlockState {
579    fn from_signal_blocks(signal_blocks: Array2<f64>, epsilon: f64) -> Self {
580        let eps = epsilon.max(1e-12);
581        let norm = Array1::from_iter(
582            signal_blocks
583                .rows()
584                .into_iter()
585                .map(|row| row.iter().map(|v| v * v).sum::<f64>().sqrt()),
586        );
587        let radius = norm.mapv(|g| (g * g + eps * eps).sqrt());
588        Self {
589            norm,
590            radius,
591            signal_blocks,
592            epsilon: eps,
593        }
594    }
595
596    fn penalty_value(&self) -> f64 {
597        self.radius.iter().map(|r| r - self.epsilon).sum::<f64>()
598    }
599
600    fn norm_signal(&self) -> Array1<f64> {
601        self.norm.clone()
602    }
603
604    fn betagradient_blocks(&self) -> Array2<f64> {
605        let mut out = self.signal_blocks.clone();
606        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
607            let scale = 1.0 / self.radius[k];
608            row.mapv_inplace(|v| v * scale);
609        }
610        out
611    }
612
613    fn betahessian_blocks(&self) -> Vec<Array2<f64>> {
614        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
615        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
616            let dim = row.len();
617            let mut block = Array2::<f64>::eye(dim);
618            block.mapv_inplace(|v| v / self.radius[k]);
619            for i in 0..dim {
620                for j in 0..dim {
621                    block[[i, j]] -= row[i] * row[j] / self.radius[k].powi(3);
622                }
623            }
624            out.push(block);
625        }
626        out
627    }
628
629    fn log_epsilon_gradient_terms(&self) -> Array1<f64> {
630        let epsilon = self.epsilon;
631        let eps2 = epsilon * epsilon;
632        self.radius.mapv(|r| eps2 / r - epsilon)
633    }
634
635    fn log_epsilon_betagradient_blocks(&self) -> Array2<f64> {
636        let mut out = self.signal_blocks.clone();
637        let eps2 = self.epsilon * self.epsilon;
638        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
639            let scale = -eps2 / self.radius[k].powi(3);
640            row.mapv_inplace(|v| v * scale);
641        }
642        out
643    }
644
645    fn log_epsilon_hessian_terms(&self) -> Array1<f64> {
646        let epsilon = self.epsilon;
647        let eps2 = epsilon * epsilon;
648        let eps4 = eps2 * eps2;
649        self.radius
650            .mapv(|r| 2.0 * eps2 / r - eps4 / r.powi(3) - epsilon)
651    }
652
653    fn surrogateweights_posterior_snr(
654        &self,
655        variance: &Array1<f64>,
656        weight_floor: f64,
657        weight_ceiling: f64,
658    ) -> (Array1<f64>, Array1<f64>) {
659        // Grouped posterior-SNR (credible-magnitude) reweighting.
660        //
661        // The magnitude-only grouped surrogate weight uses the point-estimate
662        // block norm
663        //
664        //   g_k     = ||v_k||_2,   v_k = G_k beta_hat,
665        //   r_k^mag = sqrt( g_k^2 + eps^2 ),
666        //   w_k     = 1 / r_k^mag.
667        //
668        // The posterior covariance of the *block* response v_k = G_k beta under
669        // beta ~ N(beta_hat, Sigma_beta), Sigma_beta = H^{-1}, has total trace
670        //
671        //   Cov(v_k)     = G_k Sigma_beta G_k^T   (a block_dim x block_dim block),
672        //   variance[k]  = tr(Cov(v_k)) = sum_axis ( G_k[axis] Sigma_beta G_k[axis]^T ),
673        //
674        // i.e. the variance aggregated over the axis-block in the same way
675        // `norm` aggregates ||v_k||^2. As for the scalar block, we deflate the
676        // squared block norm by this noise floor to obtain the credible squared
677        // magnitude and shrink poorly-determined responses toward zero:
678        //
679        //   g_k^credible^2 = max( g_k^2 - tr(Cov(v_k)) , 0 ),
680        //   r_k^snr        = sqrt( g_k^credible^2 + eps^2 ),   w_k = 1 / r_k^snr.
681        //
682        // A block whose norm is credibly large (g_k^2 >> tr Cov) keeps a small
683        // weight (real feature, left un-penalized); a block whose norm is
684        // dominated by posterior variance has its credible norm collapse to 0,
685        // raising the weight to `1/eps` (noise suppressed). The weight is
686        // monotone non-decreasing in `tr Cov` and bounded above by `1/eps` — the
687        // same ceiling the magnitude-only weight already attains at `g = 0`
688        // (and clamped by `weight_ceiling`), so it is not an unbounded blow-up.
689        //
690        // This evaluates the grouped MM weight `f(v) = (||v||^2 + eps^2)^{-1/2}`
691        // at the credible block norm rather than at the raw point estimate. The
692        // expected squared block norm under `v_k ~ N(v_hat_k, C_k)` is
693        // `E[||v_k||^2] = ||v_hat_k||^2 + tr(C_k)`, so the credibly-real squared
694        // norm is `max(g_k^2 - tr(C_k), 0)`, identical in form to the scalar
695        // path (`block_dim == 1` recovers it exactly). The earlier delta-method
696        // correction `½ Σ ∂²f · C` was non-monotone (its sign flips with the
697        // Hessian of `f`) and unbounded in `tr C`, which under-penalized noisy
698        // blocks and was the source of the SNR regression. With `tr C == 0` it
699        // recovers `1/sqrt(g^2 + eps^2)`.
700        let eps2 = self.epsilon * self.epsilon;
701        let weight = Array1::from_iter(self.norm.iter().zip(variance.iter()).map(|(&g, &v)| {
702            let credible2 = (g * g - v.max(0.0)).max(0.0);
703            let r = (credible2 + eps2).sqrt();
704            (1.0 / r).clamp(weight_floor, weight_ceiling)
705        }));
706        let invweight = weight.mapv(|u| 1.0 / u);
707        (weight, invweight)
708    }
709
710    fn directionalhessian_blocks(&self, direction_blocks: &Array2<f64>) -> Vec<Array2<f64>> {
711        // Exact grouped directional third derivative for the slope penalty.
712        //
713        // For each collocation block k:
714        //   v_k = G_k beta,
715        //   q_k = G_k u,
716        //   r_k = sqrt(||v_k||^2 + eps^2),
717        //
718        // the exact Hessian block for psi(g; eps) = sqrt(g^2 + eps^2) - eps is
719        //   B_k,
720        //   B_k = (1 / r_k) I - v_k v_k^T / r_k^3.
721        //
722        // Differentiating B_k along u gives
723        //   M_k(u)
724        //   = -(v_k^T q_k / r_k^3) I
725        //     - (q_k v_k^T + v_k q_k^T) / r_k^3
726        //     + 3 (v_k^T q_k) v_k v_k^T / r_k^5.
727        //
728        // This expression must be symmetric because it is the directional
729        // derivative of the symmetric matrix
730        //
731        //   B_k = (1 / r_k) I - v_k v_k^T / r_k^3.
732        //
733        // The full directional penalty Hessian map is then
734        //   D(H_g)[u] = lambda_g * sum_k G_k^T M_k(u) G_k.
735        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
736        for (k, (v, q)) in self
737            .signal_blocks
738            .rows()
739            .into_iter()
740            .zip(direction_blocks.rows().into_iter())
741            .enumerate()
742        {
743            let dim = v.len();
744            let dot = v.iter().zip(q.iter()).map(|(a, b)| a * b).sum::<f64>();
745            let r3 = self.radius[k].powi(3);
746            let r5 = self.radius[k].powi(5);
747            let mut block = Array2::<f64>::eye(dim);
748            block.mapv_inplace(|x| -dot * x / r3);
749            for i in 0..dim {
750                for j in 0..dim {
751                    block[[i, j]] -= (q[i] * v[j] + v[i] * q[j]) / r3;
752                    block[[i, j]] += 3.0 * dot * v[i] * v[j] / r5;
753                }
754            }
755            out.push(block);
756        }
757        out
758    }
759
760    /// Exact grouped second directional derivative of the slope/curvature block
761    /// Hessian `B_k = (1/r_k) I − v_k v_kᵀ / r_k³` along two coefficient
762    /// directions, with per-block signal images `a_k = G_k u`, `b_k = G_k w`.
763    ///
764    /// `B_k`'s first directional derivative along `a` is
765    ///   `M_k(a) = −(v·a/r³) I − (a vᵀ + v aᵀ)/r³ + 3 (v·a) v vᵀ/r⁵`
766    /// (see `directionalhessian_blocks`). Differentiating `M_k(a)` once more
767    /// along `b` (i.e. `v ← v + t b`) gives the symmetric block
768    ///   `N_k(a,b) = (−a·b/r³ + 3 (v·a)(v·b)/r⁵) I`
769    ///            `  − (a bᵀ + b aᵀ)/r³`
770    ///            `  + 3 (v·b)(a vᵀ + v aᵀ)/r⁵`
771    ///            `  + 3 (a·b) v vᵀ/r⁵`
772    ///            `  + 3 (v·a)(b vᵀ + v bᵀ)/r⁵`
773    ///            `  − 15 (v·a)(v·b) v vᵀ/r⁷`,
774    /// so `D²_β H_g[u,w] = λ_g Σ_k G_kᵀ N_k(a_k,b_k) G_k`. `N_k` is symmetric in
775    /// `a ↔ b`, matching `D²H[u,w] = D²H[w,u]`.
776    fn second_directionalhessian_blocks(
777        &self,
778        direction1_blocks: &Array2<f64>,
779        direction2_blocks: &Array2<f64>,
780    ) -> Vec<Array2<f64>> {
781        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
782        for ((k, v), (a, b)) in self.signal_blocks.rows().into_iter().enumerate().zip(
783            direction1_blocks
784                .rows()
785                .into_iter()
786                .zip(direction2_blocks.rows().into_iter()),
787        ) {
788            let dim = v.len();
789            let dot = |x: ndarray::ArrayView1<'_, f64>, y: ndarray::ArrayView1<'_, f64>| {
790                x.iter().zip(y.iter()).map(|(p, q)| p * q).sum::<f64>()
791            };
792            let sa = dot(v, a);
793            let sb = dot(v, b);
794            let ab = dot(a, b);
795            let r = self.radius[k];
796            let r3 = r.powi(3);
797            let r5 = r.powi(5);
798            let r7 = r5 * r * r;
799            let diag = -ab / r3 + 3.0 * sa * sb / r5;
800            let mut block = Array2::<f64>::eye(dim);
801            block.mapv_inplace(|x| diag * x);
802            for i in 0..dim {
803                for j in 0..dim {
804                    block[[i, j]] -= (a[i] * b[j] + b[i] * a[j]) / r3;
805                    block[[i, j]] += 3.0 * sb * (a[i] * v[j] + v[i] * a[j]) / r5;
806                    block[[i, j]] += 3.0 * ab * v[i] * v[j] / r5;
807                    block[[i, j]] += 3.0 * sa * (b[i] * v[j] + v[i] * b[j]) / r5;
808                    block[[i, j]] -= 15.0 * sa * sb * v[i] * v[j] / r7;
809                }
810            }
811            out.push(block);
812        }
813        out
814    }
815
816    fn log_epsilon_betahessian_blocks(&self) -> Vec<Array2<f64>> {
817        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
818        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
819            let dim = row.len();
820            let r3 = self.radius[k].powi(3);
821            let r5 = self.radius[k].powi(5);
822            let mut block = Array2::<f64>::eye(dim);
823            let eps2 = self.epsilon * self.epsilon;
824            block.mapv_inplace(|v| -eps2 * v / r3);
825            for i in 0..dim {
826                for j in 0..dim {
827                    block[[i, j]] += 3.0 * eps2 * row[i] * row[j] / r5;
828                }
829            }
830            out.push(block);
831        }
832        out
833    }
834
835    fn log_epsilon_beta_mixed_second_blocks(&self) -> Array2<f64> {
836        let mut out = self.signal_blocks.clone();
837        let eps2 = self.epsilon * self.epsilon;
838        for (k, mut row) in out.rows_mut().into_iter().enumerate() {
839            let norm2 = self.norm[k] * self.norm[k];
840            let scale = eps2 * (eps2 - 2.0 * norm2) / self.radius[k].powi(5);
841            row.mapv_inplace(|v| v * scale);
842        }
843        out
844    }
845
846    fn log_epsilon_betahessian_second_blocks(&self) -> Vec<Array2<f64>> {
847        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
848        let eps2 = self.epsilon * self.epsilon;
849        for (k, row) in self.signal_blocks.rows().into_iter().enumerate() {
850            let dim = row.len();
851            let norm2 = self.norm[k] * self.norm[k];
852            let r5 = self.radius[k].powi(5);
853            let r7 = self.radius[k].powi(7);
854            let mut block = Array2::<f64>::eye(dim);
855            block.mapv_inplace(|v| eps2 * (eps2 - 2.0 * norm2) * v / r5);
856            for i in 0..dim {
857                for j in 0..dim {
858                    block[[i, j]] += 3.0 * eps2 * (2.0 * norm2 - 3.0 * eps2) * row[i] * row[j] / r7;
859                }
860            }
861            out.push(block);
862        }
863        out
864    }
865
866    fn log_epsilon_betahessian_directional_blocks(
867        &self,
868        direction_blocks: &Array2<f64>,
869    ) -> Vec<Array2<f64>> {
870        let mut out = Vec::with_capacity(self.signal_blocks.nrows());
871        let eps2 = self.epsilon * self.epsilon;
872        for (k, (v, q)) in self
873            .signal_blocks
874            .rows()
875            .into_iter()
876            .zip(direction_blocks.rows().into_iter())
877            .enumerate()
878        {
879            let dim = v.len();
880            let dot = v.iter().zip(q.iter()).map(|(a, b)| a * b).sum::<f64>();
881            let r5 = self.radius[k].powi(5);
882            let r7 = self.radius[k].powi(7);
883            let mut block = Array2::<f64>::eye(dim);
884            block.mapv_inplace(|x| 3.0 * eps2 * dot * x / r5);
885            for i in 0..dim {
886                for j in 0..dim {
887                    block[[i, j]] += 3.0 * eps2 * (q[i] * v[j] + v[i] * q[j]) / r5;
888                    block[[i, j]] -= 15.0 * eps2 * dot * v[i] * v[j] / r7;
889                }
890            }
891            out.push(block);
892        }
893        out
894    }
895}
896
897fn scalar_operatorgradient(operator: &Array2<f64>, coeff: &Array1<f64>) -> Array1<f64> {
898    operator.t().dot(coeff)
899}
900
901fn scalar_operatorhessian(operator: &Array2<f64>, diag: &Array1<f64>) -> Array2<f64> {
902    let mut weighted = operator.clone();
903    for (k, &w) in diag.iter().enumerate() {
904        weighted.row_mut(k).mapv_inplace(|v| v * w);
905    }
906    let gram = operator.t().dot(&weighted);
907    (&gram + &gram.t().to_owned()) * 0.5
908}
909
910fn grouped_operatorgradient(
911    d1: &Array2<f64>,
912    dimension: usize,
913    blocks: &Array2<f64>,
914) -> Result<Array1<f64>, EstimationError> {
915    if blocks.ncols() != dimension {
916        crate::bail_invalid_estim!(
917            "grouped gradient block dimension mismatch: got {}, expected {dimension}",
918            blocks.ncols()
919        );
920    }
921    if d1.nrows() != blocks.nrows() * dimension {
922        crate::bail_invalid_estim!(
923            "grouped gradient row mismatch: D1 has {} rows, blocks imply {}",
924            d1.nrows(),
925            blocks.nrows() * dimension
926        );
927    }
928    let mut out = Array1::<f64>::zeros(d1.ncols());
929    for k in 0..blocks.nrows() {
930        let gk = d1
931            .slice(s![k * dimension..(k + 1) * dimension, ..])
932            .to_owned();
933        out += &gk.t().dot(&blocks.row(k));
934    }
935    Ok(out)
936}
937
938fn grouped_operatorhessian(
939    d1: &Array2<f64>,
940    dimension: usize,
941    blocks: &[Array2<f64>],
942) -> Result<Array2<f64>, EstimationError> {
943    if d1.nrows() != blocks.len() * dimension {
944        crate::bail_invalid_estim!(
945            "grouped Hessian row mismatch: D1 has {} rows, blocks imply {}",
946            d1.nrows(),
947            blocks.len() * dimension
948        );
949    }
950    let p = d1.ncols();
951    let mut out = Array2::<f64>::zeros((p, p));
952    for (k, block) in blocks.iter().enumerate() {
953        if block.nrows() != dimension || block.ncols() != dimension {
954            crate::bail_invalid_estim!(
955                "grouped Hessian block {k} has shape {}x{}, expected {}x{}",
956                block.nrows(),
957                block.ncols(),
958                dimension,
959                dimension
960            );
961        }
962        let gk = d1
963            .slice(s![k * dimension..(k + 1) * dimension, ..])
964            .to_owned();
965        out += &gk.t().dot(&block.dot(&gk));
966    }
967    Ok((&out + &out.t().to_owned()) * 0.5)
968}
969
970#[derive(Clone)]
971struct SpatialPenaltyExactState {
972    magnitude: CharbonnierScalarBlockState,
973    gradient: CharbonnierGroupedBlockState,
974    curvature: CharbonnierGroupedBlockState,
975}
976
977fn collocationgradient_blocks(
978    gradrows: &Array1<f64>,
979    dimension: usize,
980) -> Result<Array2<f64>, EstimationError> {
981    if dimension == 0 || !gradrows.len().is_multiple_of(dimension) {
982        crate::bail_invalid_estim!(
983            "invalid collocation gradient layout: rows={}, dimension={dimension}",
984            gradrows.len()
985        );
986    }
987    let p = gradrows.len() / dimension;
988    let mut out = Array2::<f64>::zeros((p, dimension));
989    for k in 0..p {
990        for axis in 0..dimension {
991            out[[k, axis]] = gradrows[k * dimension + axis];
992        }
993    }
994    Ok(out)
995}
996
997fn collocationhessian_blocks(
998    hessianrows: &Array1<f64>,
999    dimension: usize,
1000) -> Result<Array2<f64>, EstimationError> {
1001    let block_dim = dimension.checked_mul(dimension).ok_or_else(|| {
1002        EstimationError::InvalidInput("invalid collocation Hessian dimension overflow".to_string())
1003    })?;
1004    if block_dim == 0 || !hessianrows.len().is_multiple_of(block_dim) {
1005        crate::bail_invalid_estim!(
1006            "invalid collocation Hessian layout: rows={}, dimension={dimension}",
1007            hessianrows.len()
1008        );
1009    }
1010    let p = hessianrows.len() / block_dim;
1011    let mut out = Array2::<f64>::zeros((p, block_dim));
1012    for k in 0..p {
1013        for idx in 0..block_dim {
1014            out[[k, idx]] = hessianrows[k * block_dim + idx];
1015        }
1016    }
1017    Ok(out)
1018}
1019
1020impl SpatialPenaltyExactState {
1021    fn from_beta_local(
1022        beta_local: ArrayView1<'_, f64>,
1023        cache: &SpatialOperatorRuntimeCache,
1024        epsilons: [f64; 3],
1025    ) -> Result<Self, EstimationError> {
1026        // Exact collocation-state extraction for the three Charbonnier penalty blocks.
1027        //
1028        // For one spatial smooth term with coefficient vector beta_local, the exact
1029        // operator-decomposition penalty is built from three collocation images:
1030        //
1031        //   magnitude:  f = D0 beta_local
1032        //   slope:      v_k = G_k beta_local
1033        //   curvature:  H_k = D2_k beta_local
1034        //
1035        // where the gradient operator is stored in row-stacked form:
1036        //
1037        //   D1 beta_local in R^(P * d),
1038        //   row layout = (point 0, axis 0..d-1), (point 1, axis 0..d-1), ...
1039        //   D2 beta_local in R^(P * d * d),
1040        //   row layout = (point, Hessian axis_a, Hessian axis_b).
1041        //
1042        // so we first reshape that stacked vector into the grouped block array
1043        //
1044        //   [v_0^T
1045        //    ...
1046        //    v_(P-1)^T]  in R^(P x d).
1047        //
1048        // The three exact Charbonnier block states then carry:
1049        //   - the raw operator signals,
1050        //   - their radii sqrt(signal^2 + eps^2) or sqrt(||v_k||^2 + eps^2),
1051        //   - and all exact derivatives derived from those radii.
1052        //
1053        // This is the canonical translation from coefficient-space beta to the
1054        // penalty-side mathematical objects used throughout the implementation.
1055        let gradientrows = cache.d1.dot(&beta_local);
1056        let hessianrows = cache.d2.dot(&beta_local);
1057        Ok(Self {
1058            magnitude: CharbonnierScalarBlockState::from_signal(
1059                cache.d0.dot(&beta_local),
1060                epsilons[0],
1061            ),
1062            gradient: CharbonnierGroupedBlockState::from_signal_blocks(
1063                collocationgradient_blocks(&gradientrows, cache.dimension)?,
1064                epsilons[1],
1065            ),
1066            curvature: CharbonnierGroupedBlockState::from_signal_blocks(
1067                collocationhessian_blocks(&hessianrows, cache.dimension)?,
1068                epsilons[2],
1069            ),
1070        })
1071    }
1072
1073    fn absolute_collocation_magnitudes(&self) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
1074        (
1075            self.magnitude.absolute_signal(),
1076            self.gradient.norm_signal(),
1077            self.curvature.norm_signal(),
1078        )
1079    }
1080}
1081
1082fn robust_epsilon_from_samples(values: &[f64], min_epsilon_cfg: f64) -> f64 {
1083    if values.is_empty() {
1084        return min_epsilon_cfg.max(1e-12);
1085    }
1086    let mut clean = values
1087        .iter()
1088        .copied()
1089        .filter(|v| v.is_finite() && *v >= 0.0)
1090        .collect::<Vec<_>>();
1091    if clean.is_empty() {
1092        return min_epsilon_cfg.max(1e-12);
1093    }
1094    clean.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1095
1096    let n = clean.len();
1097    let median = quantile_from_sorted(&clean, 0.5);
1098    let q75 = quantile_from_sorted(&clean, 0.75);
1099    let q95 = quantile_from_sorted(&clean, 0.95);
1100
1101    let mut abs_dev = clean
1102        .iter()
1103        .map(|v| (v - median).abs())
1104        .filter(|v| v.is_finite())
1105        .collect::<Vec<_>>();
1106    abs_dev.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1107    let mad = 1.4826 * quantile_from_sorted(&abs_dev, 0.5);
1108
1109    // Charbonnier/MM requires eps bounded away from zero:
1110    //   u(t0) = 1 / (2*sqrt(t0^2 + eps^2)) ~ 1/(2*eps) near t0=0.
1111    // Use robust pilot scale:
1112    //   s = max(median(z), 1.4826*MAD(z), Q75(z)).
1113    // If s is tiny (<= delta), fallback to:
1114    //   s <- max(Q95(z), RMS(z)).
1115    // If still tiny, fallback to absolute floor s_min.
1116    // Then eps = kappa * s.
1117    // Primary robust scale: s = max(median, 1.4826*MAD, Q75).
1118    let mut scale = median.max(mad).max(q75);
1119
1120    // Safety threshold delta and absolute floor s_min.
1121    let delta = (f64::EPSILON.sqrt() * q95.max(1.0))
1122        .max(min_epsilon_cfg)
1123        .max(1e-12);
1124    let s_min = min_epsilon_cfg.max(1e-12);
1125
1126    // If robust scale is tiny, use high-quantile / RMS fallback.
1127    if scale <= delta {
1128        let rms = (clean.iter().map(|v| v * v).sum::<f64>() / n as f64).sqrt();
1129        scale = q95.max(rms);
1130    }
1131    if scale <= delta {
1132        scale = s_min;
1133    }
1134
1135    // Start near the observed operator scale so the optimizer begins in a
1136    // neutral regime where both quadratic and linear behavior are reachable.
1137    let kappa = 1.0_f64;
1138    (kappa * scale).max(s_min)
1139}
1140
1141fn extract_spatial_operator_runtime_caches(
1142    spec: &TermCollectionSpec,
1143    design: &TermCollectionDesign,
1144) -> Result<Vec<SpatialOperatorRuntimeCache>, EstimationError> {
1145    let smooth_start = design
1146        .design
1147        .ncols()
1148        .saturating_sub(design.smooth.total_smooth_cols());
1149    let mut out = Vec::<SpatialOperatorRuntimeCache>::new();
1150    for (term_idx, (termspec, term_fit)) in spec
1151        .smooth_terms
1152        .iter()
1153        .zip(design.smooth.terms.iter())
1154        .enumerate()
1155    {
1156        let Some(global_base_idx) = smooth_term_penalty_index(spec, design, term_idx) else {
1157            continue;
1158        };
1159        let mut active_local_idx = 0usize;
1160        let mut mass_local_idx = None;
1161        let mut tension_local_idx = None;
1162        let mut stiffness_local_idx = None;
1163        let mut mass_norm = None;
1164        let mut tension_norm = None;
1165        let mut stiffness_norm = None;
1166        for info in &term_fit.penaltyinfo_local {
1167            if !info.active {
1168                continue;
1169            }
1170            match info.source {
1171                PenaltySource::OperatorMass => {
1172                    mass_local_idx = Some(active_local_idx);
1173                    mass_norm = Some(info.normalization_scale);
1174                }
1175                PenaltySource::OperatorTension => {
1176                    tension_local_idx = Some(active_local_idx);
1177                    tension_norm = Some(info.normalization_scale);
1178                }
1179                PenaltySource::OperatorStiffness => {
1180                    stiffness_local_idx = Some(active_local_idx);
1181                    stiffness_norm = Some(info.normalization_scale);
1182                }
1183                _ => {}
1184            }
1185            active_local_idx += 1;
1186        }
1187        // The Charbonnier adaptive overlay rebuilds the {mass, tension,
1188        // stiffness} D-operator triplet from explicit collocation derivatives
1189        // and reweights all three channels in tandem; the stiffness slot in
1190        // particular is the D2 second-derivative operator. A term that does
1191        // NOT ship an explicit Stiffness penalty (pure Duchon's RKHS-Primary-
1192        // curvature layout — `DuchonOperatorPenaltySpec::default()`) has no
1193        // matching shipped penalty for the Charbonnier D2 surrogate to reweight,
1194        // so applying the overlay would smuggle a fresh D2 collocation
1195        // operator into a basis whose curvature is the RKHS Primary Gram (a
1196        // different mathematical object). Without an explicit Stiffness
1197        // channel the term must be skipped — the runtime cache for the
1198        // adaptive overlay simply doesn't apply.
1199        let (
1200            Some(mass_local),
1201            Some(tension_local),
1202            Some(stiffness_local),
1203            Some(mass_scale),
1204            Some(tension_scale),
1205            Some(stiffness_scale),
1206        ) = (
1207            mass_local_idx,
1208            tension_local_idx,
1209            stiffness_local_idx,
1210            mass_norm,
1211            tension_norm,
1212            stiffness_norm,
1213        )
1214        else {
1215            continue;
1216        };
1217        let mass_global_idx = global_base_idx + mass_local;
1218        let tension_global_idx = global_base_idx + tension_local;
1219        let stiffness_global_idx = global_base_idx + stiffness_local;
1220
1221        let (feature_cols, mut d0, mut d1, mut d2, collocation_points, dim, center_mass_rows) =
1222            match (&termspec.basis, &term_fit.metadata) {
1223                (
1224                    SmoothBasisSpec::Matern { feature_cols, .. },
1225                    BasisMetadata::Matern {
1226                        centers,
1227                        length_scale,
1228                        nu,
1229                        include_intercept,
1230                        identifiability_transform,
1231                        aniso_log_scales,
1232                        input_scales,
1233                        ..
1234                    },
1235                ) => {
1236                    // Match the σ_geom-compensated effective length scale the
1237                    // design (and shipped penalties) use against the standardized
1238                    // centers; the raw metadata length_scale lives in original
1239                    // coordinates and would put this overlay on a different kernel
1240                    // range than the penalties it scales (#706).
1241                    let collocation_length_scale = match input_scales.as_deref() {
1242                        Some(scales) => {
1243                            compensate_length_scale_for_standardization(*length_scale, scales)
1244                        }
1245                        None => *length_scale,
1246                    };
1247                    let ops = build_matern_collocation_operator_matrices(
1248                        centers.view(),
1249                        None,
1250                        collocation_length_scale,
1251                        *nu,
1252                        *include_intercept,
1253                        identifiability_transform.as_ref().map(|z| z.view()),
1254                        aniso_log_scales.as_deref(),
1255                    )?;
1256                    (
1257                        feature_cols.clone(),
1258                        ops.d0,
1259                        ops.d1,
1260                        ops.d2,
1261                        ops.collocation_points,
1262                        centers.ncols(),
1263                        false,
1264                    )
1265                }
1266                (
1267                    SmoothBasisSpec::Duchon { feature_cols, .. },
1268                    BasisMetadata::Duchon {
1269                        centers,
1270                        length_scale,
1271                        power,
1272                        nullspace_order,
1273                        identifiability_transform,
1274                        input_scales,
1275                        aniso_log_scales,
1276                        operator_collocation_points: Some(collocation_points),
1277                        ..
1278                    },
1279                ) => {
1280                    let collocation_length_scale = match (length_scale, input_scales.as_deref()) {
1281                        (Some(ls), Some(scales)) => {
1282                            Some(compensate_length_scale_for_standardization(*ls, scales))
1283                        }
1284                        (Some(ls), None) => Some(*ls),
1285                        (None, _) => None,
1286                    };
1287                    let ops =
1288                        gam_terms::basis::build_duchon_collocation_operator_matriceswithworkspace(
1289                            centers.view(),
1290                            collocation_points.view(),
1291                            None,
1292                            collocation_length_scale,
1293                            *power,
1294                            *nullspace_order,
1295                            aniso_log_scales.as_deref(),
1296                            identifiability_transform.as_ref().map(|z| z.view()),
1297                            2,
1298                            &mut BasisWorkspace::default(),
1299                        )?;
1300                    (
1301                        feature_cols.clone(),
1302                        ops.d0,
1303                        ops.d1,
1304                        ops.d2,
1305                        ops.collocation_points,
1306                        centers.ncols(),
1307                        true,
1308                    )
1309                }
1310                _ => continue,
1311            };
1312        if center_mass_rows && d0.nrows() > 0 && d0.ncols() > 0 {
1313            let means = d0.sum_axis(Axis(0)).mapv(|v| v / d0.nrows() as f64);
1314            for mut row in d0.rows_mut() {
1315                row -= &means;
1316            }
1317        }
1318
1319        // Runtime operator caches must live on the same normalized penalty scale as the
1320        // shipped design penalties. The basis builders normalize S0=D0'D0, S1=D1'D1, and
1321        // S2=D2'D2 before exposing them as smoothing blocks, recording the corresponding
1322        // Frobenius norms in penaltyinfo_local.normalization_scale. If the exact adaptive
1323        // path uses raw collocation operators here, then its Charbonnier penalties live on a
1324        // different geometry from the ordinary Matérn/Duchon penalties:
1325        //
1326        //   raw quadratic limit:        beta' (D'D) beta
1327        //   shipped design penalty:     beta' (D'D / c) beta
1328        //
1329        // The correct operator-level normalization is therefore
1330        //
1331        //   D_norm = D / sqrt(c),
1332        //
1333        // so that D_norm' D_norm = (D'D)/c matches the design penalty exactly. Without this,
1334        // adaptive lambdas compensate for hidden operator-scale mismatches and are no longer
1335        // comparable to the baseline smoothing parameters.
1336        let mass_scale = mass_scale.max(1e-12).sqrt();
1337        let tension_scale = tension_scale.max(1e-12).sqrt();
1338        let stiffness_scale = stiffness_scale.max(1e-12).sqrt();
1339        d0.mapv_inplace(|v| v / mass_scale);
1340        d1.mapv_inplace(|v| v / tension_scale);
1341        d2.mapv_inplace(|v| v / stiffness_scale);
1342
1343        let coeff_global_range =
1344            (smooth_start + term_fit.coeff_range.start)..(smooth_start + term_fit.coeff_range.end);
1345        if d0.ncols() != coeff_global_range.len()
1346            || d1.ncols() != coeff_global_range.len()
1347            || d2.ncols() != coeff_global_range.len()
1348        {
1349            crate::bail_invalid_estim!(
1350                "spatial operator dimension mismatch for term '{}': D0 cols={}, D1 cols={}, D2 cols={}, coeffs={}",
1351                term_fit.name,
1352                d0.ncols(),
1353                d1.ncols(),
1354                d2.ncols(),
1355                coeff_global_range.len()
1356            );
1357        }
1358        out.push(SpatialOperatorRuntimeCache {
1359            termname: term_fit.name.clone(),
1360            feature_cols,
1361            coeff_global_range,
1362            mass_penalty_global_idx: mass_global_idx,
1363            tension_penalty_global_idx: tension_global_idx,
1364            stiffness_penalty_global_idx: stiffness_global_idx,
1365            d0,
1366            d1,
1367            d2,
1368            collocation_points,
1369            dimension: dim,
1370        });
1371    }
1372    Ok(out)
1373}
1374
1375/// Posterior variance of a scalar collocation operator response under the
1376/// working-Laplace posterior `beta ~ N(beta_hat, Sigma_local)`.
1377///
1378/// For operator row `D_k` (one row of `D0`) acting on the term-local coefficient
1379/// block, `Var((D beta)_k) = D_k Sigma_local D_k^T = (D Sigma_local D^T)_kk`.
1380/// We compute it without forming `D Sigma D^T` densely: for each row we evaluate
1381/// `s_k = Sigma_local D_k^T` (one matrix-vector product) and then `D_k . s_k`.
1382/// `Sigma_local` is the sub-block of the global conditional covariance
1383/// `Sigma_beta = H^{-1}` indexed by the term's `coeff_global_range`, i.e. the
1384/// covariance proxy is the already-materialized inner working-Laplace inverse;
1385/// no second factorization is formed.
1386fn scalar_operator_response_variance(
1387    operator: &Array2<f64>,
1388    cov_local: &Array2<f64>,
1389) -> Array1<f64> {
1390    Array1::from_iter(operator.rows().into_iter().map(|row| {
1391        let s = cov_local.dot(&row);
1392        row.dot(&s).max(0.0)
1393    }))
1394}
1395
1396/// Posterior second-moment variance aggregated over each grouped collocation
1397/// block (gradient/curvature). The grouped operator is stored row-stacked with
1398/// `block_dim` rows per collocation point (`d` axes for the gradient, `d*d` for
1399/// the Hessian). For block `k`,
1400///
1401///   v_k = G_k beta,   Cov(v_k) = G_k Sigma_local G_k^T   (block_dim x block_dim),
1402///   variance_k = tr(Cov(v_k)) = sum_axis ( G_k[axis] Sigma_local G_k[axis]^T ),
1403///
1404/// which matches how `CharbonnierGroupedBlockState::norm` aggregates
1405/// `||v_k||^2 = sum_axis (G_k[axis] beta)^2` across the axis-block.
1406fn grouped_operator_response_variance(
1407    operator: &Array2<f64>,
1408    block_dim: usize,
1409    cov_local: &Array2<f64>,
1410) -> Result<Array1<f64>, EstimationError> {
1411    if block_dim == 0 || !operator.nrows().is_multiple_of(block_dim) {
1412        crate::bail_invalid_estim!(
1413            "grouped variance row layout invalid: rows={}, block_dim={block_dim}",
1414            operator.nrows()
1415        );
1416    }
1417    let p = operator.nrows() / block_dim;
1418    let mut out = Array1::<f64>::zeros(p);
1419    for k in 0..p {
1420        let mut acc = 0.0;
1421        for axis in 0..block_dim {
1422            let row = operator.row(k * block_dim + axis);
1423            let s = cov_local.dot(&row);
1424            acc += row.dot(&s);
1425        }
1426        out[k] = acc.max(0.0);
1427    }
1428    Ok(out)
1429}
1430
1431fn compute_spatial_adaptiveweights_for_beta(
1432    beta: &Array1<f64>,
1433    caches: &[SpatialOperatorRuntimeCache],
1434    epsilon_0: f64,
1435    epsilon_g: f64,
1436    epsilon_c: f64,
1437    weight_floor: f64,
1438    weight_ceiling: f64,
1439    beta_covariance: Option<&Array2<f64>>,
1440) -> Result<Vec<SpatialAdaptiveWeights>, EstimationError> {
1441    // Charbonnier / pseudo-Huber MM derivation (per collocation scalar t):
1442    //   psi(t; eps) = sqrt(t^2 + eps^2) - eps
1443    // and for reference t0 the tangent majorizer in t^2 gives:
1444    //   psi(t) <= 0.5 * w(t0) * t^2 + const(t0),
1445    //   w(t0) = 1 / sqrt(t0^2 + eps^2).
1446    //
1447    // We apply this to:
1448    //   t = f_k = |f(z_k)|             (magnitude),
1449    //   t = g_k = ||nabla f(z_k)||_2   (gradient magnitude),
1450    //   t = c_k = ||D²f(z_k)||_F       (full Hessian curvature),
1451    // both computed from beta^(t-1).
1452    //
1453    // These w values define the quadratic surrogate penalties:
1454    //   K0 = D0_con^T W_0 D0_con,  W_0 = diag(w_0)
1455    //   K1 = D1_con^T W_g D1_con,  W_g = diag(w_g) \otimes I_d  (k,axis order)
1456    //   K2 = D2_con^T W_c D2_con,  W_c = diag(w_c) \otimes I_(d*d).
1457    //
1458    // We clamp w directly, then derive inv_w=1/w for diagnostics and row scaling.
1459    //
1460    // Posterior-SNR reweighting (magic by default): when the inner working-Laplace
1461    // conditional covariance `Sigma_beta = H^{-1}` is available we replace the
1462    // squared point-estimate radius `t_k^2 + eps^2` by the credible (noise-floor-
1463    // corrected) second moment `max(t_k^2 - Var((D beta)_k), 0) + eps^2`, with
1464    // `Var = (D Sigma_beta D^T)_kk`. This stops the weight from leaving derivatives
1465    // un-penalized just because they are large but poorly determined: such
1466    // responses are shrunk toward zero (large weight, strong smoothing), while
1467    // credibly large derivatives (real edges) keep their small weight. `Sigma_beta`
1468    // here is the already-formed inner Hessian inverse from the final exact-family
1469    // solve — no second factorization is built; we only reuse the materialized
1470    // covariance. When the covariance is unavailable (`None`) the variance is zero
1471    // and this degrades *exactly* to the old magnitude-only radius.
1472    caches
1473        .iter()
1474        .map(|cache| {
1475            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
1476            let exact = SpatialPenaltyExactState::from_beta_local(
1477                beta_local,
1478                cache,
1479                [epsilon_0, epsilon_g, epsilon_c],
1480            )?;
1481            let cov_local = beta_covariance.map(|cov| {
1482                cov.slice(s![
1483                    cache.coeff_global_range.clone(),
1484                    cache.coeff_global_range.clone()
1485                ])
1486                .to_owned()
1487            });
1488            let dim = cache.dimension;
1489            let (var_0, var_g, var_c) = match cov_local.as_ref() {
1490                Some(cov) => (
1491                    scalar_operator_response_variance(&cache.d0, cov),
1492                    grouped_operator_response_variance(&cache.d1, dim, cov)?,
1493                    grouped_operator_response_variance(&cache.d2, dim * dim, cov)?,
1494                ),
1495                None => (
1496                    Array1::<f64>::zeros(exact.magnitude.signal.len()),
1497                    Array1::<f64>::zeros(exact.gradient.norm.len()),
1498                    Array1::<f64>::zeros(exact.curvature.norm.len()),
1499                ),
1500            };
1501            let (_, inv_0) = exact.magnitude.surrogateweights_posterior_snr(
1502                &var_0,
1503                weight_floor,
1504                weight_ceiling,
1505            );
1506            let (_, inv_g) =
1507                exact
1508                    .gradient
1509                    .surrogateweights_posterior_snr(&var_g, weight_floor, weight_ceiling);
1510            let (_, inv_c) = exact.curvature.surrogateweights_posterior_snr(
1511                &var_c,
1512                weight_floor,
1513                weight_ceiling,
1514            );
1515            Ok(SpatialAdaptiveWeights {
1516                inv_magweight: inv_0,
1517                invgradweight: inv_g,
1518                inv_lapweight: inv_c,
1519            })
1520        })
1521        .collect()
1522}
1523
1524fn compute_initial_epsilons(
1525    beta: &Array1<f64>,
1526    caches: &[SpatialOperatorRuntimeCache],
1527    min_epsilon: f64,
1528) -> Result<(f64, f64, f64), EstimationError> {
1529    let mut fvals = Vec::<f64>::new();
1530    let mut gvals = Vec::<f64>::new();
1531    let mut cvals = Vec::<f64>::new();
1532    for cache in caches {
1533        let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
1534        let exact = SpatialPenaltyExactState::from_beta_local(
1535            beta_local,
1536            cache,
1537            [min_epsilon, min_epsilon, min_epsilon],
1538        )?;
1539        let (f, g, c) = exact.absolute_collocation_magnitudes();
1540        fvals.extend(f.iter().copied());
1541        gvals.extend(g.iter().copied());
1542        cvals.extend(c.iter().copied());
1543    }
1544    // Robust epsilon initialization from pilot magnitudes:
1545    //   s = max(median(z), 1.4826*MAD(z), Q75(z)),
1546    //   if s is tiny then fallback to max(Q95(z), RMS(z)),
1547    //   if still tiny then use absolute floor min_epsilon.
1548    // Epsilon is then kappa * s.
1549    let eps_0 = robust_epsilon_from_samples(&fvals, min_epsilon);
1550    let eps_g = robust_epsilon_from_samples(&gvals, min_epsilon);
1551    let eps_c = robust_epsilon_from_samples(&cvals, min_epsilon);
1552    Ok((eps_0, eps_g, eps_c))
1553}
1554
1555fn exact_spatial_adaptive_penalty_index_set(
1556    caches: &[SpatialOperatorRuntimeCache],
1557) -> BTreeSet<usize> {
1558    let mut out = BTreeSet::new();
1559    for cache in caches {
1560        out.insert(cache.mass_penalty_global_idx);
1561        out.insert(cache.tension_penalty_global_idx);
1562        out.insert(cache.stiffness_penalty_global_idx);
1563    }
1564    out
1565}
1566
1567fn build_spatial_adaptive_hyperspecs(cache_count: usize) -> Vec<SpatialAdaptiveHyperSpec> {
1568    let mut out = Vec::with_capacity(cache_count * 3 + 3);
1569    for cache_index in 0..cache_count {
1570        out.push(SpatialAdaptiveHyperSpec {
1571            cache_index,
1572            kind: SpatialAdaptiveHyperKind::LogLambdaMagnitude,
1573        });
1574        out.push(SpatialAdaptiveHyperSpec {
1575            cache_index,
1576            kind: SpatialAdaptiveHyperKind::LogLambdaGradient,
1577        });
1578        out.push(SpatialAdaptiveHyperSpec {
1579            cache_index,
1580            kind: SpatialAdaptiveHyperKind::LogLambdaCurvature,
1581        });
1582    }
1583    out.push(SpatialAdaptiveHyperSpec {
1584        cache_index: 0,
1585        kind: SpatialAdaptiveHyperKind::LogEpsilonMagnitude,
1586    });
1587    out.push(SpatialAdaptiveHyperSpec {
1588        cache_index: 0,
1589        kind: SpatialAdaptiveHyperKind::LogEpsilonGradient,
1590    });
1591    out.push(SpatialAdaptiveHyperSpec {
1592        cache_index: 0,
1593        kind: SpatialAdaptiveHyperKind::LogEpsilonCurvature,
1594    });
1595    out
1596}
1597
1598fn penalty_matrixwith_local_block(
1599    total_dim: usize,
1600    coeff_range: Range<usize>,
1601    local: &Array2<f64>,
1602) -> Array2<f64> {
1603    let mut out = Array2::<f64>::zeros((total_dim, total_dim));
1604    out.slice_mut(s![coeff_range.clone(), coeff_range])
1605        .assign(local);
1606    out
1607}
1608
1609fn fit_term_collectionwith_exact_spatial_adaptive_regularization(
1610    baseline: FittedTermCollection,
1611    y: ArrayView1<'_, f64>,
1612    weights: ArrayView1<'_, f64>,
1613    offset: ArrayView1<'_, f64>,
1614    family: LikelihoodSpec,
1615    options: &FitOptions,
1616    runtime_caches: &[SpatialOperatorRuntimeCache],
1617) -> Result<FittedTermCollection, EstimationError> {
1618    // Exact adaptive-regularization hyperfit.
1619    //
1620    // This replaces the old MM-plus-approximate hyperfit with the
1621    // exact pseudo-Laplace objective agreed in the math notes:
1622    //
1623    //   L_tilde(theta)
1624    //   = J(beta_hat(theta); theta) + 0.5 log det H(beta_hat(theta), theta),
1625    //
1626    // where:
1627    //   - beta_hat(theta) is the exact inner mode of the true nonquadratic
1628    //     Charbonnier-penalized objective,
1629    //   - theta contains:
1630    //       * retained quadratic log-lambdas for non-adaptive penalties,
1631    //       * one log-lambda per adaptive operator block,
1632    //       * three global log-epsilons shared by every adaptive spatial term,
1633    //   - H is the exact beta-Hessian of the true objective at the mode.
1634    //
1635    // Implementation structure:
1636    //   1. keep ordinary quadratic penalties that are unrelated to adaptive
1637    //      spatial terms in the standard outer-rho path;
1638    //   2. move the adaptive Charbonnier penalties into a one-block exact-Newton
1639    //      custom family so the inner solve uses the real model rather than an
1640    //      MM surrogate;
1641    //   3. expose exact psi-gradients for adaptive log-lambda / log-epsilon
1642    //      coordinates through the custom-family pseudo-Laplace hook;
1643    //   4. refit once at the optimized hyperparameters with all penalties frozen
1644    //      inside the exact family, so covariance and final diagnostics are
1645    //      computed on the same exact surface.
1646    let adaptive_opts = options.adaptive_regularization.clone().unwrap_or_default();
1647    let adaptive_penalty_indices = exact_spatial_adaptive_penalty_index_set(runtime_caches);
1648    let p_total = baseline.design.design.ncols();
1649    struct RetainedPenaltySetup {
1650        global_idx: usize,
1651        global_penalty: Array2<f64>,
1652        nullspace_dim: usize,
1653        log_lambda: f64,
1654        col_range: Range<usize>,
1655        hessian_piece: Array2<f64>,
1656    }
1657    use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
1658    let retained_setups = baseline
1659        .design
1660        .penalties
1661        .par_iter()
1662        .enumerate()
1663        .map(|(idx, bp)| {
1664            if adaptive_penalty_indices.contains(&idx) {
1665                return None;
1666            }
1667            let lambda = baseline.fit.lambdas[idx];
1668            Some(RetainedPenaltySetup {
1669                global_idx: idx,
1670                global_penalty: bp.to_global(p_total),
1671                nullspace_dim: baseline
1672                    .design
1673                    .nullspace_dims
1674                    .get(idx)
1675                    .copied()
1676                    .unwrap_or(0),
1677                log_lambda: lambda.max(1e-12).ln(),
1678                col_range: bp.col_range.clone(),
1679                hessian_piece: bp.local.mapv(|v| lambda * v),
1680            })
1681        })
1682        .collect::<Vec<_>>();
1683    let retained_count = retained_setups
1684        .iter()
1685        .filter(|setup| setup.is_some())
1686        .count();
1687    let mut retained_penalties = Vec::<Array2<f64>>::with_capacity(retained_count);
1688    let mut retained_nullspace_dims = Vec::<usize>::with_capacity(retained_count);
1689    let mut retained_log_lambdas = Vec::<f64>::with_capacity(retained_count);
1690    let mut retained_global_indices = Vec::<usize>::with_capacity(retained_count);
1691    let mut fixed_quadratichessian = Array2::<f64>::zeros((p_total, p_total));
1692    for setup in retained_setups.into_iter().flatten() {
1693        retained_penalties.push(setup.global_penalty);
1694        retained_nullspace_dims.push(setup.nullspace_dim);
1695        retained_log_lambdas.push(setup.log_lambda);
1696        retained_global_indices.push(setup.global_idx);
1697        fixed_quadratichessian
1698            .slice_mut(s![setup.col_range.clone(), setup.col_range])
1699            .scaled_add(1.0, &setup.hessian_piece);
1700    }
1701
1702    let (eps_0_init, eps_g_init, eps_c_init) = compute_initial_epsilons(
1703        &baseline.fit.beta,
1704        runtime_caches,
1705        adaptive_opts.min_epsilon,
1706    )?;
1707    let mut initial_theta =
1708        Array1::<f64>::zeros(retained_penalties.len() + runtime_caches.len() * 3 + 3);
1709    for (idx, value) in retained_log_lambdas.iter().enumerate() {
1710        initial_theta[idx] = *value;
1711    }
1712    let adaptive_log_lambda_components = runtime_caches
1713        .par_iter()
1714        .map(|cache| {
1715            [
1716                baseline.fit.lambdas[cache.mass_penalty_global_idx]
1717                    .max(1e-12)
1718                    .ln(),
1719                baseline.fit.lambdas[cache.tension_penalty_global_idx]
1720                    .max(1e-12)
1721                    .ln(),
1722                baseline.fit.lambdas[cache.stiffness_penalty_global_idx]
1723                    .max(1e-12)
1724                    .ln(),
1725            ]
1726        })
1727        .collect::<Vec<_>>();
1728    let mut at = retained_penalties.len();
1729    for logs in &adaptive_log_lambda_components {
1730        initial_theta[at] = logs[0];
1731        initial_theta[at + 1] = logs[1];
1732        initial_theta[at + 2] = logs[2];
1733        at += 3;
1734    }
1735    initial_theta[at] = eps_0_init.max(adaptive_opts.min_epsilon).ln();
1736    initial_theta[at + 1] = eps_g_init.max(adaptive_opts.min_epsilon).ln();
1737    initial_theta[at + 2] = eps_c_init.max(adaptive_opts.min_epsilon).ln();
1738
1739    let hyperspecs = build_spatial_adaptive_hyperspecs(runtime_caches.len());
1740    let zero_psi_op: std::sync::Arc<dyn gam_custom_family::CustomFamilyPsiDerivativeOperator> =
1741        std::sync::Arc::new(gam_custom_family::ZeroPsiDerivativeOperator::new(
1742            baseline.design.design.nrows(),
1743            baseline.design.design.ncols(),
1744        ));
1745    let derivative_blocks = vec![
1746        hyperspecs
1747            .par_iter()
1748            .map(|_| CustomFamilyBlockPsiDerivative {
1749                penalty_index: None,
1750                x_psi: Array2::<f64>::zeros((0, 0)),
1751                s_psi: Array2::<f64>::zeros((0, 0)),
1752                s_psi_components: None,
1753                s_psi_penalty_components: None,
1754                x_psi_psi: None,
1755                s_psi_psi: None,
1756                s_psi_psi_components: None,
1757                s_psi_psi_penalty_components: None,
1758                implicit_operator: Some(std::sync::Arc::clone(&zero_psi_op)),
1759                implicit_axis: 0,
1760                implicit_group_id: None,
1761            })
1762            .collect::<Vec<_>>(),
1763    ];
1764
1765    let mixture_link_state = options
1766        .mixture_link
1767        .clone()
1768        .as_ref()
1769        .map(state_fromspec)
1770        .transpose()
1771        .map_err(EstimationError::InvalidInput)?;
1772    let sas_link_state = options
1773        .sas_link
1774        .map(|spec| {
1775            if family.is_binomial_beta_logistic() {
1776                state_from_beta_logisticspec(spec)
1777            } else {
1778                state_from_sasspec(spec)
1779            }
1780        })
1781        .transpose()
1782        .map_err(EstimationError::InvalidInput)?;
1783    let latent_cloglog_state = options.latent_cloglog;
1784    let shared_y = Arc::new(y.to_owned());
1785    let sharedweights = Arc::new(weights.to_owned());
1786    let shared_design = baseline
1787        .design
1788        .design
1789        .try_to_dense_arc("spatial adaptive exact hyperfit design")
1790        .map_err(EstimationError::InvalidInput)?;
1791    let shared_offset = Arc::new(offset.to_owned());
1792    let shared_runtime_caches = Arc::new(runtime_caches.to_vec());
1793    let shared_hyperspecs = Arc::new(hyperspecs.clone());
1794    let zero_quadratic = Arc::new(Array2::<f64>::zeros((
1795        baseline.design.design.ncols(),
1796        baseline.design.design.ncols(),
1797    )));
1798    let base_family = SpatialAdaptiveExactFamily {
1799        family: family.clone(),
1800        latent_cloglog_state,
1801        mixture_link_state: mixture_link_state.clone(),
1802        sas_link_state,
1803        y: shared_y.clone(),
1804        weights: sharedweights.clone(),
1805        design: shared_design.clone(),
1806        offset: shared_offset.clone(),
1807        linear_constraints: baseline.design.linear_constraints.clone(),
1808        runtime_caches: shared_runtime_caches.clone(),
1809        adaptive_params: Vec::new(),
1810        fixed_quadratichessian: zero_quadratic.clone(),
1811        hyperspecs: shared_hyperspecs.clone(),
1812        exact_eval_cache: Arc::new(Mutex::new(None)),
1813    };
1814
1815    let rho_dim = retained_penalties.len();
1816    let operator_slots_end = rho_dim + runtime_caches.len() * 3;
1817    // Every slot's box is `initial_theta[idx] ± WINDOW` clamped into a
1818    // per-slot [floor, cap]. Retained-λ previously used a scale-blind
1819    // ±30 absolute interval, which on small-n / weakly-identified Duchon
1820    // fits let those lambdas wander to the exp(-30) floor and produce
1821    // near-interpolant solutions. Anchoring on baseline log-λ inherits the
1822    // baseline REML's scale calibration so the overlay can only refine
1823    // within an exp(±6) ≈ 400× band of the well-posed baseline regime,
1824    // matching the discipline already applied to operator and epsilon
1825    // slots.
1826    const UNIFIED_LOG_WINDOW: f64 = 6.0;
1827    const RETAINED_LAMBDA_LOG_LOWER_FLOOR: f64 = -30.0;
1828    const RETAINED_LAMBDA_LOG_UPPER_CAP: f64 = 30.0;
1829    const OPERATOR_LAMBDA_LOG_LOWER_FLOOR: f64 = -10.0;
1830    const OPERATOR_LAMBDA_LOG_UPPER_CAP: f64 = 30.0;
1831    let epsilon_floor_log = adaptive_opts.min_epsilon.max(1e-12).ln();
1832    let anchored_bound = |idx: usize, sign: f64| -> f64 {
1833        let raw = initial_theta[idx] + sign * UNIFIED_LOG_WINDOW;
1834        if idx < rho_dim {
1835            raw.clamp(
1836                RETAINED_LAMBDA_LOG_LOWER_FLOOR,
1837                RETAINED_LAMBDA_LOG_UPPER_CAP,
1838            )
1839        } else if idx < operator_slots_end {
1840            raw.clamp(
1841                OPERATOR_LAMBDA_LOG_LOWER_FLOOR,
1842                OPERATOR_LAMBDA_LOG_UPPER_CAP,
1843            )
1844        } else {
1845            raw.max(epsilon_floor_log)
1846        }
1847    };
1848    let eps_lower =
1849        Array1::from_iter((0..initial_theta.len()).map(|idx| anchored_bound(idx, -1.0)));
1850    let eps_upper = Array1::from_iter((0..initial_theta.len()).map(|idx| anchored_bound(idx, 1.0)));
1851    let blockspec = ParameterBlockSpec {
1852        name: "eta".to_string(),
1853        design: baseline.design.design.clone(),
1854        offset: offset.to_owned(),
1855        penalties: retained_penalties
1856            .iter()
1857            .cloned()
1858            .map(PenaltyMatrix::Dense)
1859            .collect(),
1860        nullspace_dims: retained_nullspace_dims.clone(),
1861        initial_log_lambdas: Array1::from_vec(retained_log_lambdas.clone()),
1862        initial_beta: Some(baseline.fit.beta.clone()),
1863        gauge_priority: 100,
1864        jacobian_callback: None,
1865        stacked_design: None,
1866        stacked_offset: None,
1867    };
1868    let screening_cap = Arc::new(AtomicUsize::new(0));
1869    let outer_opts = BlockwiseFitOptions {
1870        inner_max_cycles: options.max_iter,
1871        inner_tol: options.tol,
1872        outer_max_iter: options.max_iter,
1873        outer_tol: options.tol,
1874        compute_covariance: false,
1875        screening_max_inner_iterations: Some(Arc::clone(&screening_cap)),
1876        ..BlockwiseFitOptions::default()
1877    };
1878
1879    use gam_solve::rho_optimizer::OuterProblem;
1880    use gam_problem::{DeclaredHessianForm, Derivative, HessianResult, OuterEval};
1881
1882    struct SpatialAdaptiveOuterState {
1883        warm_cache: Option<CustomFamilyWarmStart>,
1884        last_eval: Option<(
1885            Array1<f64>,
1886            f64,
1887            Array1<f64>,
1888            HessianResult,
1889            CustomFamilyWarmStart,
1890        )>,
1891    }
1892
1893    let n_theta = initial_theta.len();
1894
1895    // Clamp theta to the asymmetric epsilon bounds that run_outer's symmetric
1896    // rho_bound cannot express directly.
1897    let theta_bounds = Some((eps_lower.clone(), eps_upper.clone()));
1898    let clamp_theta = {
1899        let lo = eps_lower;
1900        let hi = eps_upper;
1901        move |theta: &Array1<f64>| -> Array1<f64> {
1902            let mut clamped = theta.clone();
1903            for i in 0..clamped.len() {
1904                clamped[i] = clamped[i].clamp(lo[i], hi[i]);
1905            }
1906            clamped
1907        }
1908    };
1909
1910    let decode_theta = |theta: &Array1<f64>| -> (Array1<f64>, Vec<SpatialAdaptiveTermHyperParams>) {
1911        let rho = theta.slice(s![..rho_dim]).to_owned();
1912        let adaptive_lambda_start = rho_dim;
1913        let adaptive_lambda_end = adaptive_lambda_start + runtime_caches.len() * 3;
1914        let eps = [
1915            theta[adaptive_lambda_end].exp(),
1916            theta[adaptive_lambda_end + 1].exp(),
1917            theta[adaptive_lambda_end + 2].exp(),
1918        ];
1919        let adaptive_params = runtime_caches
1920            .iter()
1921            .enumerate()
1922            .map(|(cache_idx, _)| SpatialAdaptiveTermHyperParams {
1923                lambda: [
1924                    theta[adaptive_lambda_start + cache_idx * 3].exp(),
1925                    theta[adaptive_lambda_start + cache_idx * 3 + 1].exp(),
1926                    theta[adaptive_lambda_start + cache_idx * 3 + 2].exp(),
1927                ],
1928                epsilon: eps,
1929            })
1930            .collect::<Vec<_>>();
1931        (rho, adaptive_params)
1932    };
1933    let analytic_outer_hessian_available =
1934        gam_custom_family::joint_exact_analytic_outer_hessian_available()
1935            && base_family
1936                .exact_outer_derivative_order(std::slice::from_ref(&blockspec), &outer_opts)
1937                .has_hessian()
1938            && gam_custom_family::exact_newton_outer_geometry_supports_second_order_solver(
1939                &base_family,
1940            );
1941    let outer_max_iter = gam_custom_family::cost_gated_first_order_max_iter(
1942        options.max_iter,
1943        base_family.coefficient_gradient_cost(std::slice::from_ref(&blockspec)),
1944        analytic_outer_hessian_available,
1945    );
1946    if outer_max_iter < options.max_iter {
1947        log::info!(
1948            "[OUTER] exact spatial adaptive regularization: first-order work gate reduced outer_max_iter {} -> {}",
1949            options.max_iter,
1950            outer_max_iter,
1951        );
1952    }
1953    // Keep the exact outer Hessian whenever the adaptive family can provide it.
1954    // The Charbonnier pseudo-Laplace surface mixes ordinary log-lambda
1955    // coordinates with adaptive λ/ε coordinates; exact curvature is the best
1956    // route when available. If a family cannot provide exact curvature, this
1957    // builder declares only the true first-order capability.
1958    let problem = OuterProblem::new(n_theta)
1959        .with_gradient(Derivative::Analytic)
1960        .with_hessian(if analytic_outer_hessian_available {
1961            DeclaredHessianForm::Either
1962        } else {
1963            DeclaredHessianForm::Unavailable
1964        })
1965        .with_fallback_policy(gam_solve::rho_optimizer::FallbackPolicy::Disabled)
1966        .with_psi_dim(n_theta.saturating_sub(rho_dim))
1967        .with_tolerance(options.tol)
1968        .with_max_iter(outer_max_iter)
1969        .with_seed_config(gam_problem::SeedConfig::default())
1970        .with_screening_cap(Arc::clone(&screening_cap))
1971        .with_initial_rho(initial_theta.clone());
1972    let problem = if let Some((lo, hi)) = theta_bounds {
1973        problem.with_bounds(lo, hi)
1974    } else {
1975        problem
1976    };
1977
1978    let eval_outer = |st: &mut SpatialAdaptiveOuterState,
1979                      theta: &Array1<f64>,
1980                      order: gam_solve::rho_optimizer::OuterEvalOrder|
1981     -> Result<OuterEval, EstimationError> {
1982        let theta = clamp_theta(theta);
1983
1984        if let Some((cached_theta, cached_cost, cached_grad, cached_hess, cached_warm)) =
1985            &st.last_eval
1986            && cached_theta.len() == theta.len()
1987            && cached_theta
1988                .iter()
1989                .zip(theta.iter())
1990                .all(|(&a, &b)| (a - b).abs() <= 1e-12)
1991            && (!matches!(
1992                order,
1993                gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
1994            ) || analytic_outer_hessian_available)
1995        {
1996            st.warm_cache = Some(cached_warm.clone());
1997            return Ok(OuterEval {
1998                cost: *cached_cost,
1999                gradient: cached_grad.clone(),
2000                hessian: if matches!(
2001                    order,
2002                    gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2003                ) && analytic_outer_hessian_available
2004                {
2005                    cached_hess.clone()
2006                } else {
2007                    HessianResult::Unavailable
2008                },
2009                inner_beta_hint: None,
2010            });
2011        }
2012
2013        let (rho, adaptive_params) = decode_theta(&theta);
2014        let family_eval = base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2015        let need_hessian = matches!(
2016            order,
2017            gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2018        ) && analytic_outer_hessian_available;
2019        let result = evaluate_custom_family_joint_hyper(
2020            &family_eval,
2021            std::slice::from_ref(&blockspec),
2022            &outer_opts,
2023            &rho,
2024            &derivative_blocks,
2025            st.warm_cache.as_ref(),
2026            if need_hessian {
2027                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueGradientHessian
2028            } else {
2029                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueAndGradient
2030            },
2031        )
2032        .map_err(|e| {
2033            EstimationError::RemlOptimizationFailed(format!("spatial adaptive eval failed: {e}"))
2034        })?;
2035        if !result.inner_converged {
2036            st.warm_cache = Some(result.warm_start.clone());
2037            return Err(EstimationError::RemlOptimizationFailed(
2038                "exact spatial adaptive inner solve did not converge".to_string(),
2039            ));
2040        }
2041        if !result.objective.is_finite() || result.gradient.iter().any(|v| !v.is_finite()) {
2042            return Err(EstimationError::RemlOptimizationFailed(
2043                "exact spatial adaptive objective returned non-finite values".to_string(),
2044            ));
2045        }
2046        let hessian_result = if need_hessian {
2047            if !result.outer_hessian.is_analytic() {
2048                return Err(EstimationError::RemlOptimizationFailed(
2049                    "exact spatial adaptive objective did not return an exact outer Hessian"
2050                        .to_string(),
2051                ));
2052            }
2053            match result.outer_hessian.dim() {
2054                Some(dim) if dim == theta.len() => {}
2055                Some(dim) => {
2056                    return Err(EstimationError::RemlOptimizationFailed(format!(
2057                        "exact spatial adaptive outer Hessian dimension mismatch: got {dim}, expected {}",
2058                        theta.len(),
2059                    )));
2060                }
2061                None => {
2062                    return Err(EstimationError::RemlOptimizationFailed(
2063                        "exact spatial adaptive objective did not report an outer Hessian dimension"
2064                            .to_string(),
2065                    ));
2066                }
2067            }
2068            st.last_eval = Some((
2069                theta.clone(),
2070                result.objective,
2071                result.gradient.clone(),
2072                result.outer_hessian.clone(),
2073                result.warm_start.clone(),
2074            ));
2075            result.outer_hessian
2076        } else {
2077            HessianResult::Unavailable
2078        };
2079        st.warm_cache = Some(result.warm_start);
2080        Ok(OuterEval {
2081            cost: result.objective,
2082            gradient: result.gradient,
2083            hessian: hessian_result,
2084            inner_beta_hint: None,
2085        })
2086    };
2087
2088    let mut obj = problem.build_objective_with_screening_proxy(
2089        SpatialAdaptiveOuterState {
2090            warm_cache: None,
2091            last_eval: None,
2092        },
2093        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2094            let theta = clamp_theta(theta);
2095            let (rho, adaptive_params) = decode_theta(&theta);
2096            let family_eval =
2097                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2098            let result = evaluate_custom_family_joint_hyper(
2099                &family_eval,
2100                std::slice::from_ref(&blockspec),
2101                &outer_opts,
2102                &rho,
2103                &derivative_blocks,
2104                st.warm_cache.as_ref(),
2105                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueOnly,
2106            )
2107            .map_err(|e| {
2108                EstimationError::RemlOptimizationFailed(format!(
2109                    "spatial adaptive cost eval failed: {e}"
2110                ))
2111            })?;
2112            if !result.inner_converged {
2113                st.warm_cache = Some(result.warm_start);
2114                return Err(EstimationError::RemlOptimizationFailed(
2115                    "exact spatial adaptive cost inner solve did not converge".to_string(),
2116                ));
2117            }
2118            st.warm_cache = Some(result.warm_start);
2119            Ok(result.objective)
2120        },
2121        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2122            eval_outer(
2123                st,
2124                theta,
2125                if analytic_outer_hessian_available {
2126                    gam_solve::rho_optimizer::OuterEvalOrder::ValueGradientHessian
2127                } else {
2128                    gam_solve::rho_optimizer::OuterEvalOrder::ValueAndGradient
2129                },
2130            )
2131        },
2132        |st: &mut SpatialAdaptiveOuterState,
2133         theta: &Array1<f64>,
2134         order: gam_solve::rho_optimizer::OuterEvalOrder| {
2135            eval_outer(st, theta, order)
2136        },
2137        Some(|st: &mut SpatialAdaptiveOuterState| {
2138            st.warm_cache = None;
2139            st.last_eval = None;
2140        }),
2141        Some(|st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2142            let theta = clamp_theta(theta);
2143            let (rho, adaptive_params) = decode_theta(&theta);
2144            let family_eval =
2145                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2146            let result = evaluate_custom_family_joint_hyper_efs(
2147                &family_eval,
2148                std::slice::from_ref(&blockspec),
2149                &outer_opts,
2150                &rho,
2151                &derivative_blocks,
2152                st.warm_cache.as_ref(),
2153            )
2154            .map_err(|e| {
2155                EstimationError::RemlOptimizationFailed(format!(
2156                    "spatial adaptive EFS eval failed: {e}"
2157                ))
2158            })?;
2159            if !result.inner_converged {
2160                st.warm_cache = Some(result.warm_start);
2161                return Err(EstimationError::RemlOptimizationFailed(
2162                    "exact spatial adaptive EFS inner solve did not converge".to_string(),
2163                ));
2164            }
2165            st.warm_cache = Some(result.warm_start);
2166            Ok(result.efs_eval)
2167        }),
2168        // Seed-screening ranking proxy (#969). The regular cost closure
2169        // above hard-errors on a non-converged inner solve — correct for
2170        // line-search costs, but under the screening cap
2171        // (`screening_max_inner_iterations`, wired into `outer_opts`) the
2172        // inner solve is truncated BY DESIGN, so screening through that
2173        // closure rejects every seed and re-creates the all-seeds-rejected
2174        // front-door failure genus. Screening only RANKS candidates: the
2175        // penalized objective of the capped solve is a meaningful ranking
2176        // signal even unconverged (the same contract as the custom-family
2177        // labeled proxy), so accept it and let the cascade pick the best
2178        // seed; the selected seed is then fit with the full budget.
2179        |st: &mut SpatialAdaptiveOuterState, theta: &Array1<f64>| {
2180            let theta = clamp_theta(theta);
2181            let (rho, adaptive_params) = decode_theta(&theta);
2182            let family_eval =
2183                base_family.with_adaptive_params(adaptive_params, zero_quadratic.clone());
2184            let result = evaluate_custom_family_joint_hyper(
2185                &family_eval,
2186                std::slice::from_ref(&blockspec),
2187                &outer_opts,
2188                &rho,
2189                &derivative_blocks,
2190                st.warm_cache.as_ref(),
2191                gam_solve::estimate::reml::reml_outer_engine::EvalMode::ValueOnly,
2192            )
2193            .map_err(|e| {
2194                EstimationError::RemlOptimizationFailed(format!(
2195                    "spatial adaptive screening eval failed: {e}"
2196                ))
2197            })?;
2198            st.warm_cache = Some(result.warm_start);
2199            Ok(result.objective)
2200        },
2201    );
2202
2203    let outer_result = problem
2204        .run(&mut obj, "exact spatial adaptive regularization")
2205        .map_err(|e| {
2206            EstimationError::InvalidInput(format!(
2207                "exact spatial adaptive outer optimization failed: {e}"
2208            ))
2209        })?;
2210    if !outer_result.converged {
2211        // The strict absolute-floor gradient criterion (`‖g‖_proj ≤ options.tol`)
2212        // is too tight near the box-constrained boundary of the adaptive
2213        // Charbonnier pseudo-Laplace objective: as the optimizer pushes ε → ∞
2214        // (overlay-disabled corner), λ → λ_min, the Hessian's nearly-null
2215        // direction lets Cauchy/Newton accept ~e-3-magnitude probe steps that
2216        // give cost changes well below 6-digit precision, and the projected
2217        // gradient floors at numerical-noise-scale (≈ 5e-6 for n≈500, cost≈
2218        // 3e2 fits in double precision) rather than at 0. Accept the iterate
2219        // when the mgcv-style relative-to-cost criterion ‖g‖_proj ≤ τ·(1+|f|)
2220        // is satisfied — that is the textbook REML convergence rule and is
2221        // exactly what `opt::GradientTolerance::relative_to_cost(τ)` would
2222        // have enforced if this OuterProblem path had wired it through. The
2223        // strict absolute floor is retained as the primary check; the
2224        // rel-to-cost form only kicks in once the absolute one has timed out
2225        // at `max_iter`, so unconverged divergent runs (which have large |g|)
2226        // still surface as errors.
2227        let rel_to_cost_threshold = options.tol * (1.0_f64 + outer_result.final_value.abs());
2228        // Rel-to-cost acceptance requires an actual gradient measurement;
2229        // `None` (cache-hit short-circuit, gradient-free path) cannot satisfy
2230        // the mgcv-style criterion regardless of magnitude.
2231        if let Some(final_grad) = outer_result
2232            .final_grad_norm
2233            .filter(|v| v.is_finite() && *v <= rel_to_cost_threshold)
2234        {
2235            log::info!(
2236                "[spatial-adaptive] outer optimization hit max_iter={} but \
2237                 projected gradient norm {:.3e} ≤ τ·(1+|f|) = {:.3e} \
2238                 (τ={:.3e}, |f|={:.3e}); accepting iterate under the mgcv-style \
2239                 relative-to-cost REML convergence criterion.",
2240                outer_result.iterations,
2241                final_grad,
2242                rel_to_cost_threshold,
2243                options.tol,
2244                outer_result.final_value.abs(),
2245            );
2246        } else {
2247            crate::bail_invalid_estim!(
2248                "exact spatial adaptive outer optimization did not converge after {} iterations (final_objective={:.6e}, final_grad_norm={})",
2249                outer_result.iterations,
2250                outer_result.final_value,
2251                outer_result.final_grad_norm_report(),
2252            );
2253        }
2254    }
2255    let outer_iterations = outer_result.iterations;
2256    // `None` = no gradient measurement (cache-hit / gradient-free); the
2257    // authoritative convergence signal is `outer_converged`.
2258    let outer_grad_norm: Option<f64> = outer_result.final_grad_norm;
2259    let theta_star = outer_result.rho;
2260    let rho_star = theta_star.slice(s![..rho_dim]).to_owned();
2261    let adaptive_lambda_start = rho_dim;
2262    let adaptive_lambda_end = adaptive_lambda_start + runtime_caches.len() * 3;
2263    let eps_star = [
2264        theta_star[adaptive_lambda_end].exp(),
2265        theta_star[adaptive_lambda_end + 1].exp(),
2266        theta_star[adaptive_lambda_end + 2].exp(),
2267    ];
2268    let adaptive_params = runtime_caches
2269        .iter()
2270        .enumerate()
2271        .map(|(cache_idx, _)| SpatialAdaptiveTermHyperParams {
2272            lambda: [
2273                theta_star[adaptive_lambda_start + cache_idx * 3].exp(),
2274                theta_star[adaptive_lambda_start + cache_idx * 3 + 1].exp(),
2275                theta_star[adaptive_lambda_start + cache_idx * 3 + 2].exp(),
2276            ],
2277            epsilon: eps_star,
2278        })
2279        .collect::<Vec<_>>();
2280    let mut fixed_total = Array2::<f64>::zeros((
2281        baseline.design.design.ncols(),
2282        baseline.design.design.ncols(),
2283    ));
2284    for (idx, penalty) in retained_penalties.iter().enumerate() {
2285        fixed_total.scaled_add(rho_star[idx].exp(), penalty);
2286    }
2287    let final_family =
2288        base_family.with_adaptive_params(adaptive_params.clone(), Arc::new(fixed_total.clone()));
2289    let final_blockspec = ParameterBlockSpec {
2290        name: "eta".to_string(),
2291        design: baseline.design.design.clone(),
2292        offset: offset.to_owned(),
2293        penalties: vec![],
2294        nullspace_dims: vec![],
2295        initial_log_lambdas: Array1::zeros(0),
2296        initial_beta: Some(baseline.fit.beta.clone()),
2297        gauge_priority: 100,
2298        jacobian_callback: None,
2299        stacked_design: None,
2300        stacked_offset: None,
2301    };
2302    let final_fit = fit_custom_family(
2303        &final_family,
2304        &[final_blockspec],
2305        &BlockwiseFitOptions {
2306            inner_max_cycles: options.max_iter,
2307            inner_tol: options.tol,
2308            outer_max_iter: 1,
2309            outer_tol: options.tol,
2310            compute_covariance: true,
2311            ..BlockwiseFitOptions::default()
2312        },
2313    )
2314    .map_err(EstimationError::CustomFamily)?;
2315    let beta = final_fit.block_states[0].beta.clone();
2316    let final_eval = final_family
2317        .exact_evaluation(&beta)
2318        .map_err(EstimationError::InvalidInput)?;
2319    let penalized_hessian = final_eval
2320        .totalobjectivehessian(&final_family.design)
2321        .map_err(EstimationError::InvalidInput)?;
2322    let beta_covariance = final_fit.covariance_conditional.clone();
2323    let beta_standard_errors = beta_covariance
2324        .as_ref()
2325        .map(|cov| Array1::from_iter((0..cov.nrows()).map(|i| cov[[i, i]].max(0.0).sqrt())));
2326
2327    let mut full_lambdas = baseline.fit.lambdas.clone();
2328    for (idx, &global_idx) in retained_global_indices.iter().enumerate() {
2329        full_lambdas[global_idx] = rho_star[idx].exp();
2330    }
2331    for (cache_idx, cache) in runtime_caches.iter().enumerate() {
2332        full_lambdas[cache.mass_penalty_global_idx] = adaptive_params[cache_idx].lambda[0];
2333        full_lambdas[cache.tension_penalty_global_idx] = adaptive_params[cache_idx].lambda[1];
2334        full_lambdas[cache.stiffness_penalty_global_idx] = adaptive_params[cache_idx].lambda[2];
2335    }
2336
2337    let deviance = if family.is_gaussian_identity() {
2338        y.iter()
2339            .zip(final_eval.obs.mu.iter())
2340            .zip(weights.iter())
2341            .map(|((&yy, &mu), &w)| w.max(0.0) * (yy - mu) * (yy - mu))
2342            .sum()
2343    } else {
2344        -2.0 * final_eval.obs.log_likelihood
2345    };
2346    let mut local_penalty_blocks =
2347        Vec::<PenaltySpec>::with_capacity(baseline.design.penalties.len());
2348    for (global_idx, bp) in baseline.design.penalties.iter().enumerate() {
2349        if adaptive_penalty_indices.contains(&global_idx) {
2350            let cache = runtime_caches
2351                .iter()
2352                .find(|cache| {
2353                    cache.mass_penalty_global_idx == global_idx
2354                        || cache.tension_penalty_global_idx == global_idx
2355                        || cache.stiffness_penalty_global_idx == global_idx
2356                })
2357                .ok_or_else(|| {
2358                    EstimationError::InvalidInput(format!(
2359                        "missing runtime cache for adaptive penalty index {global_idx}"
2360                    ))
2361                })?;
2362            let cache_idx = runtime_caches
2363                .iter()
2364                .position(|c| {
2365                    c.mass_penalty_global_idx == global_idx
2366                        || c.tension_penalty_global_idx == global_idx
2367                        || c.stiffness_penalty_global_idx == global_idx
2368                })
2369                .ok_or_else(|| {
2370                    EstimationError::InvalidInput(format!(
2371                        "missing adaptive cache position for penalty index {global_idx}"
2372                    ))
2373                })?;
2374            let state = &final_eval.adaptive_states[cache_idx];
2375            let local = if cache.mass_penalty_global_idx == global_idx {
2376                scalar_operatorhessian(&cache.d0, &state.magnitude.betahessian_diag())
2377                    .mapv(|v| adaptive_params[cache_idx].lambda[0] * v)
2378            } else if cache.tension_penalty_global_idx == global_idx {
2379                grouped_operatorhessian(
2380                    &cache.d1,
2381                    cache.dimension,
2382                    &state.gradient.betahessian_blocks(),
2383                )?
2384                .mapv(|v| adaptive_params[cache_idx].lambda[1] * v)
2385            } else {
2386                grouped_operatorhessian(
2387                    &cache.d2,
2388                    cache.dimension * cache.dimension,
2389                    &state.curvature.betahessian_blocks(),
2390                )?
2391                .mapv(|v| adaptive_params[cache_idx].lambda[2] * v)
2392            };
2393            // Wrap the pre-scaled global penalty matrix as PenaltySpec::Dense.
2394            local_penalty_blocks.push(PenaltySpec::Dense(penalty_matrixwith_local_block(
2395                baseline.design.design.ncols(),
2396                cache.coeff_global_range.clone(),
2397                &local,
2398            )));
2399        } else {
2400            local_penalty_blocks.push(PenaltySpec::Dense(
2401                bp.to_global(p_total).mapv(|v| v * full_lambdas[global_idx]),
2402            ));
2403        }
2404    }
2405    let (edf_by_block, penalty_block_trace, edf_total) = if let Some(cov) = beta_covariance.as_ref()
2406    {
2407        exact_bounded_edf(
2408            &local_penalty_blocks,
2409            &Array1::from_elem(local_penalty_blocks.len(), 1.0),
2410            cov,
2411        )?
2412    } else {
2413        (
2414            vec![0.0; local_penalty_blocks.len()],
2415            vec![0.0; local_penalty_blocks.len()],
2416            0.0,
2417        )
2418    };
2419    let stable_penalty_term =
2420        2.0 * final_eval.adaptive_penalty_value + beta.dot(&fixed_total.dot(&beta));
2421    let standard_deviation = if family.is_gaussian_identity() {
2422        let denom = (y.len() as f64 - edf_total).max(1.0);
2423        (deviance / denom).sqrt()
2424    } else {
2425        1.0
2426    };
2427    let maps = compute_spatial_adaptiveweights_for_beta(
2428        &beta,
2429        runtime_caches,
2430        eps_star[0],
2431        eps_star[1],
2432        eps_star[2],
2433        adaptive_opts.weight_floor,
2434        adaptive_opts.weight_ceiling,
2435        // Working-Laplace conditional covariance Sigma_beta = H^{-1} from the
2436        // final exact-family solve, reused here as the posterior-SNR variance
2437        // source (no second factorization is formed).
2438        beta_covariance.as_ref(),
2439    )?
2440    .into_iter()
2441    .zip(runtime_caches.iter())
2442    .map(|(w, cache)| AdaptiveSpatialMap {
2443        termname: cache.termname.clone(),
2444        feature_cols: cache.feature_cols.clone(),
2445        collocation_points: cache.collocation_points.clone(),
2446        inv_magweight: w.inv_magweight,
2447        invgradweight: w.invgradweight,
2448        inv_lapweight: w.inv_lapweight,
2449    })
2450    .collect::<Vec<_>>();
2451    let fitted_link = if family.is_latent_cloglog() {
2452        FittedLinkState::LatentCLogLog {
2453            state: latent_cloglog_state
2454                .expect("BinomialLatentCLogLog requires an explicit latent-cloglog state"),
2455        }
2456    } else if family.is_binomial_mixture() {
2457        mixture_link_state
2458            .clone()
2459            .map(|state| FittedLinkState::Mixture {
2460                state,
2461                covariance: None,
2462            })
2463            .unwrap_or(FittedLinkState::Standard(None))
2464    } else if family.is_binomial_sas() {
2465        sas_link_state
2466            .map(|state| FittedLinkState::Sas {
2467                state,
2468                covariance: None,
2469            })
2470            .unwrap_or(FittedLinkState::Standard(None))
2471    } else if family.is_binomial_beta_logistic() {
2472        sas_link_state
2473            .map(|state| FittedLinkState::BetaLogistic {
2474                state,
2475                covariance: None,
2476            })
2477            .unwrap_or(FittedLinkState::Standard(None))
2478    } else {
2479        FittedLinkState::Standard(None)
2480    };
2481    let max_abs_eta = final_eval
2482        .obs
2483        .eta
2484        .iter()
2485        .fold(0.0_f64, |acc, &v| acc.max(v.abs()));
2486    let fitted = FittedTermCollection {
2487        fit: {
2488            let log_lambdas = full_lambdas.mapv(|v| v.max(1e-300).ln());
2489            let inf = FitInference {
2490                edf_by_block,
2491                penalty_block_trace,
2492                edf_total,
2493                smoothing_correction: None,
2494                // Boundary adapter: wrap the raw `Array2<f64>` Hessian as
2495                // `UnscaledPrecision` for the newtype storage.
2496                penalized_hessian: penalized_hessian.clone().into(),
2497                working_weights: final_eval.obs.fisherweight.clone(),
2498                working_response: {
2499                    let mut out = final_eval.obs.eta.clone();
2500                    for i in 0..out.len() {
2501                        let wi = final_eval.obs.fisherweight[i].max(1e-12);
2502                        out[i] += final_eval.obs.score[i] / wi;
2503                    }
2504                    out
2505                },
2506                reparam_qs: None,
2507                dispersion: gam_solve::estimate::Dispersion::Known(1.0),
2508                beta_covariance: beta_covariance
2509                    .clone()
2510                    .map(gam_problem::dispersion_cov::PhiScaledCovariance::from),
2511                beta_standard_errors,
2512                beta_covariance_corrected: None,
2513                beta_standard_errors_corrected: None,
2514                beta_covariance_frequentist: None,
2515                coefficient_influence: None,
2516                weighted_gram: None,
2517                bias_correction_beta: None,
2518            };
2519            let geometry = Some(gam_solve::estimate::FitGeometry {
2520                penalized_hessian: penalized_hessian.into(),
2521                working_weights: inf.working_weights.clone(),
2522                working_response: inf.working_response.clone(),
2523            });
2524            let covariance_conditional = beta_covariance;
2525            let pirls_status_val = if final_fit.outer_converged {
2526                gam_solve::pirls::PirlsStatus::Converged
2527            } else {
2528                gam_solve::pirls::PirlsStatus::StalledAtValidMinimum
2529            };
2530            UnifiedFitResult::try_from_parts(UnifiedFitResultParts {
2531                blocks: vec![gam_solve::estimate::FittedBlock {
2532                    beta: beta.clone(),
2533                    role: gam_problem::BlockRole::Mean,
2534                    edf: edf_total,
2535                    lambdas: full_lambdas.clone(),
2536                }],
2537                log_lambdas,
2538                lambdas: full_lambdas,
2539                likelihood_scale: family.default_scale_metadata(),
2540                likelihood_family: Some(family),
2541                log_likelihood_normalization:
2542                    gam_spec::LogLikelihoodNormalization::UserProvided,
2543                log_likelihood: final_eval.obs.log_likelihood,
2544                deviance,
2545                reml_score: final_fit.penalized_objective,
2546                stable_penalty_term,
2547                penalized_objective: final_fit.penalized_objective,
2548                used_device: false,
2549                outer_iterations,
2550                outer_converged: final_fit.outer_converged,
2551                outer_gradient_norm: outer_grad_norm,
2552                standard_deviation,
2553                covariance_conditional,
2554                covariance_corrected: None,
2555                inference: Some(inf),
2556                fitted_link,
2557                geometry,
2558                block_states: Vec::new(),
2559                pirls_status: pirls_status_val,
2560                max_abs_eta,
2561                constraint_kkt: None,
2562                artifacts: gam_solve::estimate::FitArtifacts {
2563                    pirls: None,
2564                    ..Default::default()
2565                },
2566                inner_cycles: 0,
2567            })?
2568        },
2569        design: baseline.design,
2570        adaptive_diagnostics: Some(AdaptiveRegularizationDiagnostics {
2571            epsilon_0: eps_star[0],
2572            epsilon_g: eps_star[1],
2573            epsilon_c: eps_star[2],
2574            epsilon_outer_iterations: outer_iterations,
2575            mm_iterations: 0,
2576            converged: final_fit.outer_converged,
2577            maps,
2578        }),
2579    };
2580    enforce_term_constraint_feasibility(&fitted.design, &fitted.fit)?;
2581    Ok(fitted)
2582}
2583
2584/// Relax the per-coordinate ρ-prior for terms running in Marra–Wood
2585/// double-penalty selection mode (#1266).
2586///
2587/// The default ρ-prior is a `Normal { mean: 0, sd: 3 }` cap on each log-λ — a
2588/// stabiliser that keeps ordinary smoothing parameters from drifting to
2589/// degenerate extremes (gam#893/#1196). For a smooth carrying a
2590/// `DoublePenaltyNullspace` block (`double_penalty = True`, the default `s(...)`
2591/// — analogous to mgcv `select = TRUE`) that cap is actively wrong: the whole
2592/// purpose of the second penalty is to let REML drive an *unsupported* term to
2593/// `EDF → 0`, which needs both the wiggliness and null-space log-λ to grow
2594/// large. The `ρ²/(2·9)` cap pulls them back toward 0, so REML settles at a
2595/// point that leaves the term under-shrunk — the smooth's EDF comes out ABOVE
2596/// the single-penalty (`double_penalty = False`) EDF instead of at or below it,
2597/// the exact contract violation in #1266. mgcv's `select = TRUE` applies no
2598/// such cap to the selection coordinates, and the lower-level term-collection
2599/// fits already converge correctly under a flat prior.
2600///
2601/// We therefore rewrite the prior to `Independent`, holding the base prior on
2602/// every ordinary coordinate but switching the coordinates of any
2603/// double-penalty term to `Flat`. Single-penalty terms are byte-for-byte
2604/// unchanged, and an already-`Flat`/already-`Independent` base prior, or a
2605/// design with no double-penalty block, is returned untouched.
2606///
2607/// The relaxed per-coordinate prior is FAMILY-AGNOSTIC: the cap-lifting of the
2608/// bending coordinate and the determinacy-gated null-space treatment apply
2609/// identically for Gaussian and non-Gaussian families. The response family / link
2610/// only matters for length-safety (it can append auxiliary trailing ρ
2611/// coordinates via dispersion / SAS / mixture / moving-κ machinery), which is
2612/// gated separately by `length_safe`; once that gate passes the inner ρ aligns
2613/// 1:1 with `penaltyinfo` regardless of family, so the same relaxation is valid
2614/// for a Tweedie / Gamma-log `ps` smooth as for a Gaussian one (#1426/#1477).
2615fn relax_smoothing_rho_prior(
2616    options: &FitOptions,
2617    design: &TermCollectionDesign,
2618) -> gam_spec::RhoPrior {
2619    use gam_terms::basis::BasisMetadata;
2620    let base = &options.rho_prior;
2621    // Only a single scalar prior that actually caps log-λ needs relaxing;
2622    // `Flat` already imposes no cap and `Independent` is assumed caller-built.
2623    if matches!(
2624        base,
2625        gam_spec::RhoPrior::Flat | gam_spec::RhoPrior::Independent(_)
2626    ) {
2627        return base.clone();
2628    }
2629    // LENGTH SAFETY (load-bearing). The per-coordinate `Independent` prior is
2630    // validated against the FULL outer ρ vector and a length disagreement
2631    // saturates the prior to `+∞`, breaking the fit. The ρ vector this prior is
2632    // attached to (the inner REML fit at a *fixed* realized design) aligns 1:1
2633    // with the penalty blocks in `design.penaltyinfo` ONLY when the fit
2634    // introduces no auxiliary trailing ρ coordinates. Such coordinates come from
2635    //   * non-Gaussian dispersion / non-identity link machinery,
2636    //   * SAS ε/δ and mixture-link parameters,
2637    //   * spatial κ length-scale optimisation that actually moves κ.
2638    // Gate to the link-aux-free case. Spatial κ optimisation (Matérn / Duchon /
2639    // sphere / curvature / measure-jet) genuinely appends a moving log-κ
2640    // coordinate AND needs the cap to stabilise it, so bail if any such term is
2641    // present. Thin-plate is the exception: its length-scale is a pure radial
2642    // SCALE that REML cannot identify (the κ optimiser converges to a no-op,
2643    // leaving `n_params = penalty-block count`), so it adds no trailing
2644    // coordinate and is safe to relax alongside the B-spline family. The response
2645    // family / link itself does NOT break length-safety (a non-Gaussian GAM with
2646    // no link-aux and no moving κ still has exactly `penaltyinfo.len()` inner ρ
2647    // coordinates), so the relaxed prior below is family-agnostic.
2648    let has_link_aux = options.sas_link.is_some()
2649        || options.optimize_sas
2650        || options.mixture_link.is_some()
2651        || options.optimize_mixture;
2652    let has_moving_kappa = design.smooth.terms.iter().any(|t| {
2653        matches!(
2654            t.metadata,
2655            BasisMetadata::Matern { .. }
2656                | BasisMetadata::Duchon { .. }
2657                | BasisMetadata::Sphere { .. }
2658                | BasisMetadata::SphereHarmonics { .. }
2659                | BasisMetadata::ConstantCurvature { .. }
2660                | BasisMetadata::MeasureJet { .. }
2661        )
2662    });
2663    // LENGTH SAFETY decides only whether the inner ρ aligns 1:1 with the penalty
2664    // blocks (so an `Independent` prior is valid): it is broken by SAS/mixture
2665    // link-shape coordinates and by a moving spatial κ, NOT by the response
2666    // family or link per se. A Gamma/log (or any other non-Gaussian) GAM with no
2667    // link-aux and no moving κ has exactly `penaltyinfo.len()` ρ coordinates, so
2668    // the `DoublePenaltyNullspace` selection prior below is length-safe there too.
2669    let length_safe = !has_link_aux && !has_moving_kappa;
2670    if !length_safe {
2671        return base.clone();
2672    }
2673    let coords = &design.penaltyinfo;
2674    if coords.is_empty() {
2675        return base.clone();
2676    }
2677    // WELL-IDENTIFICATION GATE (#1089). The ρ-prior is two things at once: a
2678    // #1266/#1271-harmful symmetric cap on each smoothing log-λ, AND a
2679    // #1089-load-bearing stabiliser that makes the outer REML loop terminate on
2680    // an *under-determined* design (gam#893/#1196/#1089: the n=30 five-`ps` wine
2681    // fit has p ≈ 51 > n, so without the cap's curvature the outer criterion is
2682    // flat/degenerate in ρ-space and the loop never certifies a stationary
2683    // point). Only lift the cap when the data comfortably over-determines the
2684    // model (`n ≥ 2·p`), so the unregularised REML problem is well-posed on its
2685    // own; otherwise keep the base prior. The #1266/#1271 cases (n ≈ 800,
2686    // p ≈ 20–40) clear this by ≥20×; the #1089 wine fit (n < p) keeps its cap.
2687    let n_obs = design.design.nrows();
2688    let p_total = design.design.ncols();
2689    // REGIME of the relaxed prior on the relaxable smooth coordinates.
2690    //
2691    // * WELL-DETERMINED (`n ≥ 2·p`): the unregularised REML problem is well
2692    //   posed on its own, so the relaxable coordinates are freed to `Flat`,
2693    //   which the runtime resolves to the firth one-sided barrier — byte-flat
2694    //   on the identified side (pure REML, exactly mgcv) and only a convex wall
2695    //   against the `λ → 0` degeneracy. This is the #1266/#1271 behaviour.
2696    //
2697    // * UNDER-DETERMINED (`n < 2·p`): the design does NOT over-determine the
2698    //   model (the n≈26 five-`ps` wine fit has p > n), so the firth barrier's
2699    //   zero curvature on the identified side leaves the outer REML criterion
2700    //   flat/degenerate in ρ-space and the loop hits `max_iter` at whatever
2701    //   (under-smoothed) λ it last held — EDF rails up to ≈n, the smooths
2702    //   interpolate the training rows, and held-out prediction explodes
2703    //   (#1392: held-out R² as low as −2.5e6 on `wine_gamair`). The previous
2704    //   stabiliser kept the FULL base prior here — a symmetric
2705    //   `Normal{mean:0, sd:3}` cap. Its `ρ²/(2·9)` curvature does terminate the
2706    //   loop, but it is centred at λ=1 with a tight `sd=3`: at the REML optimum
2707    //   `ρ* ≈ 8–15` (heavy smoothing, which an over-parameterised fit needs and
2708    //   which mgcv's pure REML reaches), the cap's `ρ*/9` gradient drags λ back
2709    //   down by `O(1)` in ρ, pinning the fit in the under-smoothed regime.
2710    //
2711    //   The fix keeps a stabiliser with strictly positive curvature (so the
2712    //   loop still certifies a stationary point — the #1089 requirement) but
2713    //   WIDENS it to `sd = RELAX_UNDERDETERMINED_RHO_SD` so its gradient drag at
2714    //   the heavily-smoothed optimum is negligible (`ρ*/sd² = O(1/100)`) and
2715    //   pure REML — not the prior — chooses λ. The wide symmetric Gaussian is
2716    //   weakly informative: ±2σ spans the whole feasible ρ range (`|ρ| ≤ 30`),
2717    //   so it adds termination curvature without biasing which λ REML lands on,
2718    //   restoring the mgcv-like heavy smoothing on the over-parameterised fit.
2719    let underdetermined = n_obs < 2 * p_total;
2720    // Relaxable terms: penalized smooths whose smoothing log-λ the symmetric cap
2721    // wrongly bounds when the term's signal lives in its penalty null space — a
2722    // straight line under a bending penalty drives λ → ∞ but the cap pulls it
2723    // back, leaving spurious wiggle. mgcv caps neither. This is exactly the
2724    // B-spline family (`ps`/`cr`/`cs`/`bs`, BSpline1D), thin-plate (`tp`), and
2725    // tensor-B-spline (`te`/`ti`) smooths — single- AND double-penalty (#1266 is
2726    // the double-penalty case, #1271 the single-penalty `tp`/`ps`). EVERY penalty
2727    // coordinate such a term owns (bending wiggliness AND any null-space
2728    // shrinkage) is freed to `Flat`, which the runtime resolves to the
2729    // firth-default one-sided barrier: no high-λ cap, but still a convex wall
2730    // against the `λ → 0` under-smoothing degeneracy.
2731    let relaxable_terms: std::collections::HashSet<&str> = design
2732        .smooth
2733        .terms
2734        .iter()
2735        .filter(|t| {
2736            matches!(
2737                t.metadata,
2738                BasisMetadata::BSpline1D { .. }
2739                    | BasisMetadata::ThinPlate { .. }
2740                    | BasisMetadata::TensorBSpline { .. }
2741            )
2742            // SHAPE-CONSTRAINED terms must KEEP the cap (#1380). A monotone /
2743            // convex / concave smooth carries linear-inequality constraints; at
2744            // the active boundary (e.g. a convex fit pinned at 2nd-diff = 0) the
2745            // active set collapses the penalized subspace onto the bending
2746            // penalty's own null space ({1, x}), where the smoothing log-λ is
2747            // UNIDENTIFIED. Lifting the cap to `Flat` there lets REML rail λ to
2748            // `RHO_BOUND` (zero curvature → the smooth collapses to a flat/linear
2749            // fit, R² ≈ 0 on data the constraint is correct for). The constraint
2750            // already regularizes the term, and the symmetric cap is the
2751            // #1089-style stabiliser that pins the unidentified λ — so a
2752            // shape-constrained term needs the cap KEPT, exactly the
2753            // under-determined case this gate protects. (Unconstrained #1266/#1271
2754            // selection terms still relax.)
2755            && matches!(t.shape, gam_terms::smooth::ShapeConstraint::None)
2756        })
2757        .map(|t| t.name.as_str())
2758        .collect();
2759    let any_relaxed = coords.iter().any(|info| {
2760        info.termname
2761            .as_deref()
2762            .is_some_and(|name| relaxable_terms.contains(name))
2763    });
2764    if !any_relaxed {
2765        return base.clone();
2766    }
2767    // Relaxed prior for a relaxable smooth coordinate, chosen by regime (see the
2768    // block above): the firth one-sided barrier (`Flat`) when the fit is
2769    // well-determined, a wide-but-curved symmetric Gaussian when it is
2770    // under-determined and the loop still needs termination curvature.
2771    let relaxed_prior = if underdetermined {
2772        gam_spec::RhoPrior::Normal {
2773            mean: 0.0,
2774            sd: RELAX_UNDERDETERMINED_RHO_SD,
2775        }
2776    } else {
2777        gam_spec::RhoPrior::Flat
2778    };
2779    // DOUBLE-PENALTY NULL-SPACE SELECTION (#1392, mgcv `select=TRUE`). A
2780    // double-penalty smooth carries a second `DoublePenaltyNullspace` ridge on
2781    // the term's penalty null space ({1, x} for a 1-D bend) whose only job is
2782    // selection: drive its λ UP (toward the prior's finite well-penalized mode
2783    // λ* = θ², not to ∞) to shrink the null-space (linear) component OUT when
2784    // the data does not support it, exactly as mgcv's `select=TRUE` adds a
2785    // null-space penalty. On an over-parameterized `p > n` fit
2786    // (`wine_gamair`: 5 `ps` smooths on ~26 rows) the symmetric relaxed prior
2787    // above leaves this ridge's outer score flat on the select-out side, so REML
2788    // stalls it at λ ≈ 0.11 — the null space is kept, the EDF rails up, and
2789    // held-out prediction collapses (#1392). The RANGE-space (`Primary`) bending
2790    // coordinate's smoothing selection must NOT be touched, so this select-out
2791    // bias is gated to `DoublePenaltyNullspace` coordinates only and is applied
2792    // ONLY in the under-determined regime — in the well-determined regime the
2793    // relaxable coordinates stay byte-flat (`Flat`) so a clean `n > p` fit is
2794    // unchanged (no regression on ordinary smooth recovery).
2795    //
2796    // The strong select-out PC prior is applied to the `DoublePenaltyNullspace`
2797    // coordinate ONLY in the UNDER-DETERMINED regime, where the outer score is
2798    // genuinely flat on the select-out side and REML needs the active push. In the
2799    // WELL-DETERMINED regime the null space gets the wide
2800    // `nullspace_degeneracy_prior` instead (see below) — an active select-out mode
2801    // there would over-shrink a genuinely-supported collinear null space (#1476).
2802    // The RANGE-space (`Primary`) bending coordinate is untouched (stays `Flat`
2803    // when well-determined), so ordinary single-smooth recovery is unchanged.
2804    //
2805    let nullspace_select_prior = gam_spec::RhoPrior::PenalizedComplexity {
2806        upper: NULLSPACE_SELECT_PC_UPPER,
2807        tail_prob: NULLSPACE_SELECT_PC_TAIL_PROB,
2808    };
2809    // WELL-DETERMINED NULL-SPACE DEGENERACY BREAKER (#1476). When the fit is
2810    // well-determined (`n ≥ 2·p`) the strong `nullspace_select_prior` above is the
2811    // WRONG tool for the Gaussian null-space coordinate: its finite well-penalized
2812    // mode at `λ* = θ² ≈ 8483` is an aggressive select-OUT pull that drags a
2813    // GENUINELY-SUPPORTED null space (a real linear/constant component) toward
2814    // collapse — the #1476 over-shrink. But leaving the coordinate fully `Flat`
2815    // (the previous well-determined behaviour) is the OTHER failure: under
2816    // concurvity (`s(x1)+s(x2)`, corr ≈ 0.9) the two smooths' null-space (linear)
2817    // directions are near-collinear, so the joint REML objective is essentially
2818    // FLAT along the "transfer the shared linear signal between the two smooths"
2819    // ridge; with zero curvature on that coordinate REML cannot certify an
2820    // interior stationary point and one smooth's `λ_nullspace` rails to the ρ
2821    // bound (≈1e13), annihilating its genuine linear signal to `EDF ≈ 0` while the
2822    // other absorbs it. The principled fix is NEITHER a select-out mode NOR a
2823    // flat coordinate: it is a WIDE, weakly-informative symmetric Gaussian that
2824    // contributes strictly-positive termination curvature `1/sd²` (breaking the
2825    // concurvity flat-ridge degeneracy so REML lands an interior allocation) while
2826    // its gradient `ρ/sd²` at any plausible optimum is negligible — so REML, not
2827    // the prior, chooses how the shared linear signal is split. This adds no
2828    // directional select-out bias, so it does NOT over-shrink a supported null
2829    // space (#1476); a genuinely-UNSUPPORTED null space is still selected out
2830    // because REML's own score drives its `λ` up and the weak symmetric pull
2831    // barely opposes it (#1266 irrelevant-covariate shrinkage, #1371 single-smooth
2832    // recovery preserved). The strong PC select-out remains in the
2833    // UNDER-DETERMINED regime, where the score IS flat on the select-out side and
2834    // REML needs the active push (#1392 wine `p > n`).
2835    let nullspace_degeneracy_prior = gam_spec::RhoPrior::Normal {
2836        mean: 0.0,
2837        sd: NULLSPACE_WELLDET_DEGENERACY_RHO_SD,
2838    };
2839    let per_coord = coords
2840        .iter()
2841        .map(|info| {
2842            let relax = info
2843                .termname
2844                .as_deref()
2845                .is_some_and(|name| relaxable_terms.contains(name));
2846            if !relax {
2847                return base.clone();
2848            }
2849            let is_nullspace =
2850                matches!(info.penalty.source, PenaltySource::DoublePenaltyNullspace);
2851            // The relaxed per-coordinate prior is FAMILY-AGNOSTIC: the choice
2852            // depends only on the coordinate's role (bending vs null-space
2853            // selection) and on whether the data over-determines the model, NOT
2854            // on the response family or link. (Length-safety — the only thing the
2855            // family/link can break via auxiliary ρ coordinates — is already
2856            // gated above by `length_safe`; reaching this point means the inner ρ
2857            // aligns 1:1 with `penaltyinfo` for Gaussian and non-Gaussian alike.)
2858            //
2859            // The previous code split here on `gaussian_identity` and pinned the
2860            // non-Gaussian null-space coordinate to the AGGRESSIVE PC select-out
2861            // prior in BOTH determinacy regimes. That select-out prior has a
2862            // finite well-penalized mode at λ* ≈ θ² ≈ 8483, which carves a SECOND,
2863            // deep basin into the 2-D (bending, null-space) outer REML surface at
2864            // large λ_null. On a well-determined non-Gaussian double-penalty `ps`
2865            // smooth the outer ARC then has two competing basins — the genuine
2866            // bending optimum and the prior-induced high-λ_null shelf — and the
2867            // expensive non-Gaussian multi-start lands the wrong one: the fit
2868            // ships a right-boundary blow-up (Tweedie `s(x)` pred ≈ 1.4–2.0× truth
2869            // at x=1 on data whose null space is unsupported) and, on the hard
2870            // seeds, a falsely-"converged" EDF-inflated under-smooth (#1477; the
2871            // same genus as the #1426 Gamma/log overfit). The Gaussian path does
2872            // NOT do this — #1476 deliberately switched its well-determined
2873            // null-space coordinate to the wide, weakly-informative degeneracy
2874            // prior precisely because the active select-out over-shrinks /
2875            // destabilises a well-determined fit. Non-Gaussian needs the identical
2876            // treatment, so the determinacy gate now applies to BOTH families:
2877            //
2878            //   * BENDING (range-space) coordinate → `relaxed_prior` (firth
2879            //     one-sided barrier when well-determined = pure REML = mgcv; wide
2880            //     #1089 `Normal` when under-determined).
2881            //   * NULL-SPACE selection coordinate → the AGGRESSIVE PC select-out
2882            //     ONLY when under-determined (`p > n`, #1392 wine: the outer score
2883            //     is flat on the select-out side and REML needs the active push);
2884            //     otherwise the gentle, wide degeneracy prior (#1476), which adds
2885            //     termination curvature without biasing which λ_null REML lands on
2886            //     — so a genuinely-unsupported null space is still selected out by
2887            //     REML's own score (the sin-data linear trend → λ_null large) and a
2888            //     genuinely-supported one is not over-shrunk.
2889            if is_nullspace {
2890                if underdetermined {
2891                    nullspace_select_prior.clone()
2892                } else {
2893                    nullspace_degeneracy_prior.clone()
2894                }
2895            } else {
2896                relaxed_prior.clone()
2897            }
2898        })
2899        .collect::<Vec<_>>();
2900    gam_spec::RhoPrior::Independent(per_coord)
2901}
2902
2903/// Standard deviation of the wide, weakly-informative symmetric `Normal` prior
2904/// placed on a relaxable smooth's log-λ coordinates when the fit is
2905/// under-determined (`n < 2·p`); see [`relax_smoothing_rho_prior`].
2906///
2907/// Chosen so that ±2σ spans the entire feasible ρ range (the outer optimiser
2908/// bounds `|ρ| ≤ 30`): the prior contributes strictly-positive termination
2909/// curvature `1/sd²` to the outer Hessian (the #1089 requirement that the REML
2910/// loop certify a stationary point on a `p > n` design) while its gradient drag
2911/// at the heavily-smoothed REML optimum is negligible, so pure REML — matching
2912/// mgcv — selects λ. Reducing it toward the old `sd = 3` re-introduces the
2913/// #1392 under-smoothing drag; widening it further weakens termination
2914/// curvature without further benefit.
2915const RELAX_UNDERDETERMINED_RHO_SD: f64 = 15.0;
2916
2917/// Distance-scale bound `upper` (`P(d > upper) = tail_prob` on the marginal-SD
2918/// scale `d = exp(-ρ/2)`) of the penalized-complexity prior placed on a
2919/// relaxable smooth's `DoublePenaltyNullspace` selection coordinate when the fit
2920/// is under-determined (`n < 2·p`); see [`relax_smoothing_rho_prior`].
2921///
2922/// The null-space ridge exists only to SELECT the linear/constant null-space
2923/// component out (mgcv `select=TRUE`): we want its `λ` driven UP (`d → 0`)
2924/// unless the data clearly buys the null-space wiggle. The PC prior is the
2925/// convex bowl `C(ρ) = ρ/2 + θ e^{-ρ/2}` with the steep exponential wall on the
2926/// `λ → 0` (null space kept, `d > upper`) side and a FINITE interior mode at
2927/// `ρ* = 2 ln θ` (`λ* = θ²`). A small `upper` puts that wall close in, so the
2928/// coordinate's λ is selected up toward the well-penalized mode; the data can
2929/// still keep the null space when it genuinely earns it (the over-smoothing side
2930/// of the bowl, gradient `→ +1/2` only in the far tail, pulls ρ back DOWN toward
2931/// λ* — there is no λ → ∞ runaway). `0.05` places the wall at a marginal-SD
2932/// scale two decades below unit, biasing toward select-out on the
2933/// over-parameterized `p > n` wine fit while staying weakly informative.
2934const NULLSPACE_SELECT_PC_UPPER: f64 = 0.05;
2935
2936/// Tail probability `α` (`P(d > upper) = α`) calibrating the rate
2937/// `θ = −ln(α)/upper` of the [`NULLSPACE_SELECT_PC_UPPER`] penalized-complexity
2938/// select-out prior. A small `α` makes the wall against the kept-null-space
2939/// (`λ → 0`) side steep; combined with the small `upper` it yields a strong
2940/// θ ≈ 92 so REML moves the under-determined null-space ridge off its stalled
2941/// λ ≈ 0.11 toward select-out. The PC bowl has a FINITE mode at `λ* = θ² ≈ 8483`
2942/// (`ρ* = 2 ln θ ≈ 9.05`), NOT a hard `λ → ∞` cap: beyond the mode the gradient
2943/// turns positive (approaching `+1/2` only as `ρ → +∞`) and, the objective being
2944/// minimized, pulls ρ back DOWN toward λ*. See [`relax_smoothing_rho_prior`].
2945const NULLSPACE_SELECT_PC_TAIL_PROB: f64 = 0.01;
2946
2947fn adaptive_fit_options_base(options: &FitOptions, design: &TermCollectionDesign) -> FitOptions {
2948    FitOptions {
2949        latent_cloglog: options.latent_cloglog,
2950        mixture_link: options.mixture_link.clone(),
2951        optimize_mixture: options.optimize_mixture,
2952        sas_link: options.sas_link,
2953        optimize_sas: options.optimize_sas,
2954        compute_inference: options.compute_inference,
2955        skip_rho_posterior_inference: options.skip_rho_posterior_inference,
2956        max_iter: options.max_iter,
2957        tol: options.tol,
2958        nullspace_dims: design.nullspace_dims.clone(),
2959        linear_constraints: design.linear_constraints.clone(),
2960        firth_bias_reduction: options.firth_bias_reduction,
2961        adaptive_regularization: None,
2962        penalty_shrinkage_floor: options.penalty_shrinkage_floor,
2963        // Propagate user-supplied rho_prior so the baseline/refit and the
2964        // joint optimizer minimize the same REML objective.
2965        rho_prior: options.rho_prior.clone(),
2966        kronecker_penalty_system: design.kronecker_penalty_system(),
2967        kronecker_factored: design
2968            .smooth
2969            .terms
2970            .iter()
2971            .find_map(|t| t.kronecker_factored.clone()),
2972        persist_warm_start_disk: options.persist_warm_start_disk,
2973    }
2974}
2975
2976fn superseded_fit_options(options: &FitOptions) -> FitOptions {
2977    let mut fit_options = options.clone();
2978    fit_options.skip_rho_posterior_inference = true;
2979    fit_options
2980}
2981
2982#[derive(Clone)]
2983struct BoundedLinearTermMeta {
2984    col_idx: usize,
2985    min: f64,
2986    max: f64,
2987    prior: BoundedCoefficientPriorSpec,
2988}
2989
2990/// β-dependent effective Jacobian for the bounded-linear fit block.
2991///
2992/// Each bounded coefficient enters the linear predictor non-linearly, as
2993/// `β = min + width·σ(θ)`, and is supplied to the solver through the family
2994/// adapter's offset rather than the linear design. To keep that contribution
2995/// out of the *linear* design the fit places a deliberately **zeroed**
2996/// placeholder column for every bounded term in the block design
2997/// (see `fit_bounded_term_collection_with_design`). The pre-fit
2998/// identifiability audit, however, assesses block rank by reading each block's
2999/// effective Jacobian — and a zeroed column reads as a structural rank
3000/// deficiency, so without this callback the audit refuses *every* bounded
3001/// model before fitting begins.
3002///
3003/// This callback reports the model's true Jacobian column for each bounded
3004/// term, `∂η_i/∂θ = (dβ/dθ)·x_i`, so the audit inspects the same geometry the
3005/// solver actually fits. Because `dβ/dθ = width·σ(θ)(1−σ(θ))` is strictly
3006/// positive for finite θ and `width > 0`, a bounded column is rank-deficient
3007/// in the audit exactly when its underlying covariate is genuinely collinear
3008/// with the rest of the design — never merely because the placeholder was
3009/// zeroed. The callback is consumed only by the identifiability audit /
3010/// canonicalisation; the inner PIRLS solve drives η through the
3011/// [`BoundedLinearFamily`] adapter, so reporting the non-zeroed Jacobian here
3012/// does not double-count the bounded contribution.
3013struct BoundedEffectiveJacobian {
3014    design: Array2<f64>,
3015    bounded_terms: Vec<BoundedLinearTermMeta>,
3016}
3017
3018impl BlockEffectiveJacobian for BoundedEffectiveJacobian {
3019    fn effective_jacobian_rows(
3020        &self,
3021        state: &FamilyLinearizationState<'_>,
3022        rows: std::ops::Range<usize>,
3023    ) -> Result<Array2<f64>, String> {
3024        let p = self.design.ncols();
3025        let n = self.design.nrows();
3026        let rows = rows.start.min(n)..rows.end.min(n);
3027        if !state.beta.is_empty() {
3028            if state.beta.len() != p {
3029                return Err(format!(
3030                    "BoundedEffectiveJacobian::effective_jacobian_at: beta length {} != design \
3031                     ncols {p}",
3032                    state.beta.len(),
3033                ));
3034            }
3035            if state.beta.iter().any(|v| v.is_nan()) {
3036                return Err(
3037                    "BoundedEffectiveJacobian::effective_jacobian_at: beta contains NaN"
3038                        .to_string(),
3039                );
3040            }
3041        }
3042        let mut jac = self
3043            .design
3044            .slice(ndarray::s![rows.start..rows.end, ..])
3045            .to_owned();
3046        for term in &self.bounded_terms {
3047            let theta = if state.beta.is_empty() {
3048                0.0
3049            } else {
3050                state.beta[term.col_idx]
3051            };
3052            let (_, _, db_dtheta, _, _) = bounded_latent_derivatives(theta, term.min, term.max);
3053            jac.column_mut(term.col_idx).mapv_inplace(|v| v * db_dtheta);
3054        }
3055        Ok(jac)
3056    }
3057}
3058
3059#[derive(Clone)]
3060struct BoundedLinearFamily {
3061    family: LikelihoodSpec,
3062    latent_cloglog_state: Option<LatentCLogLogState>,
3063    mixture_link_state: Option<MixtureLinkState>,
3064    sas_link_state: Option<SasLinkState>,
3065    y: Array1<f64>,
3066    weights: Array1<f64>,
3067    design: Array2<f64>,
3068    designzeroed: Array2<f64>,
3069    offset: Array1<f64>,
3070    bounded_terms: Vec<BoundedLinearTermMeta>,
3071}
3072
3073#[derive(Clone)]
3074struct StandardFamilyObservationState {
3075    eta: Array1<f64>,
3076    mu: Array1<f64>,
3077    score: Array1<f64>,
3078    fisherweight: Array1<f64>,
3079    neghessian_eta: Array1<f64>,
3080    neghessian_eta_derivative: Array1<f64>,
3081    log_likelihood: f64,
3082}
3083
3084fn bounded_logit(z: f64) -> f64 {
3085    let zc = z.clamp(1e-12, 1.0 - 1e-12);
3086    (zc / (1.0 - zc)).ln()
3087}
3088
3089fn stable_sigmoid(theta: f64) -> f64 {
3090    if theta >= 0.0 {
3091        let exp_neg = (-theta).exp();
3092        1.0 / (1.0 + exp_neg)
3093    } else {
3094        let exp_pos = theta.exp();
3095        exp_pos / (1.0 + exp_pos)
3096    }
3097}
3098
3099fn bounded_latent_to_user(theta: f64, min: f64, max: f64) -> (f64, f64, f64) {
3100    let z = stable_sigmoid(theta);
3101    let width = max - min;
3102    let beta = min + width * z;
3103    let db_dtheta = width * z * (1.0 - z);
3104    (beta, z, db_dtheta)
3105}
3106
3107/// Invert the bounded interval transform: given a user-scale coefficient
3108/// `beta` in the open interval `(min, max)`, return the latent coordinate
3109/// `theta` with `bounded_latent_to_user(theta, min, max).0 == beta`.
3110///
3111/// This is the exact inverse of the logistic interval map used by the bounded
3112/// custom family: `z = (beta - min)/(max - min)` (the normalized position in
3113/// the interval) and `theta = logit(z)`. The normalized position is clamped
3114/// strictly inside `(0, 1)` (mirroring `bounded_logit`) so a coefficient that
3115/// sits numerically at a boundary maps to a large-but-finite latent value
3116/// rather than `±∞`.
3117fn bounded_user_to_latent(beta: f64, min: f64, max: f64) -> f64 {
3118    let width = max - min;
3119    if width <= 0.0 || !width.is_finite() {
3120        return 0.0;
3121    }
3122    let z = (beta - min) / width;
3123    bounded_logit(z)
3124}
3125
3126/// One bounded coefficient column for posterior sampling: its position in the
3127/// (internal, conditioned) coefficient vector and the interval bounds expressed
3128/// on that same internal scale.
3129#[derive(Debug, Clone, Copy)]
3130pub struct BoundedSampleColumn {
3131    /// Column index into the internal (conditioned) coefficient vector.
3132    pub col_idx: usize,
3133    /// Lower interval bound on the internal scale.
3134    pub min: f64,
3135    /// Upper interval bound on the internal scale.
3136    pub max: f64,
3137}
3138
3139/// Exact posterior draws for a model with `bounded()` coefficients.
3140///
3141/// The bounded custom family fits each bounded coefficient as a smooth interval
3142/// transform `beta = min + (max - min)·sigmoid(theta)` of an unconstrained
3143/// latent `theta`. The Laplace approximation is *Gaussian on the latent scale*
3144/// — that is precisely the scale on which the fit treats the coefficient as an
3145/// unconstrained, locally-quadratic parameter. Sampling a Gaussian directly on
3146/// the user (bounded) scale is wrong twice over: it can place mass outside
3147/// `[min, max]`, and it discards the boundary-induced skew that the nonlinear
3148/// map produces. This routine instead draws `theta ~ N(theta_mode, H_latent^{-1})`
3149/// and pushes every draw through the *exact* interval map, so user-scale draws
3150/// always lie strictly inside the interval and carry the correct skew.
3151///
3152/// Coordinate bookkeeping. The caller supplies the user-scale mode `beta_user`
3153/// and the user-scale penalized Hessian `user_hessian` (both in *internal /
3154/// conditioned* coordinates — i.e. before `backtransform_*` to the original
3155/// data scale) together with the internal-scale bounds for each bounded column.
3156/// The user-scale Hessian relates to the latent-scale Hessian by the diagonal
3157/// delta-method Jacobian `J = diag(db/dtheta)`:
3158///   `H_user = J^{-1} H_latent J^{-1}`  ⇒  `H_latent = J H_user J`,
3159/// which is exactly the inverse of `transform_bounded_latent_precision_to_user_internal`.
3160/// Non-bounded columns have `J_ii = 1`, so they are sampled as the ordinary
3161/// Gaussian Laplace draw and returned unchanged.
3162///
3163/// Dispersion. `user_hessian` is the UNSCALED penalized Hessian `H_user`
3164/// (unit implicit dispersion). For a free-dispersion family the latent
3165/// posterior covariance is `φ̂·H_latent⁻¹`, so the caller passes
3166/// `sqrt_cov_scale = √φ̂` (the coefficient-covariance scale `√σ̂²` for a
3167/// profiled Gaussian, `1` for fixed-scale families like Binomial) and every
3168/// latent perturbation is multiplied by it. This makes the draw covariance
3169/// `sqrt_cov_scale² · H_latent⁻¹`, matching the fit's reported
3170/// `Vb = cov_scale·H_user⁻¹` exactly (gam#1514) — without it a Gaussian
3171/// bounded slope's draws were ~`1/σ̂` too wide.
3172///
3173/// Returns the draws as a `(n_draws, p)` matrix on the *internal* user scale
3174/// (still conditioned); the caller back-transforms to the original data scale
3175/// with the same conditioning it used for the point estimate.
3176pub fn sample_bounded_latent_posterior_internal(
3177    beta_user: &Array1<f64>,
3178    user_hessian: &Array2<f64>,
3179    bounded_columns: &[BoundedSampleColumn],
3180    n_draws: usize,
3181    sqrt_cov_scale: f64,
3182    base_seed: u64,
3183) -> Result<Array2<f64>, EstimationError> {
3184    let p = beta_user.len();
3185    if user_hessian.nrows() != p || user_hessian.ncols() != p {
3186        crate::bail_invalid_estim!(
3187            "bounded posterior sampling dimension mismatch: mode has {p} entries, user Hessian is {}x{}",
3188            user_hessian.nrows(),
3189            user_hessian.ncols()
3190        );
3191    }
3192
3193    // Latent mode and delta-method Jacobian, column by column.
3194    let mut theta_mode = beta_user.clone();
3195    let mut jac_diag = Array1::<f64>::ones(p);
3196    for bc in bounded_columns {
3197        if bc.col_idx >= p {
3198            crate::bail_invalid_estim!(
3199                "bounded posterior sampling: bounded column index {} out of range for {p} coefficients",
3200                bc.col_idx
3201            );
3202        }
3203        let theta_i = bounded_user_to_latent(beta_user[bc.col_idx], bc.min, bc.max);
3204        let (_, _, db_dtheta) = bounded_latent_to_user(theta_i, bc.min, bc.max);
3205        theta_mode[bc.col_idx] = theta_i;
3206        // Guard against a degenerate (numerically vanishing) Jacobian at a
3207        // coefficient pinned hard against a boundary: floor the slope so the
3208        // latent precision stays finite and the draw simply collapses onto the
3209        // boundary, which is the correct limiting posterior.
3210        jac_diag[bc.col_idx] = db_dtheta.max(1e-12);
3211    }
3212
3213    // H_latent = J H_user J  (J diagonal). This is the exact inverse of the
3214    // user-scale precision transform applied at fit time.
3215    let mut h_latent = user_hessian.clone();
3216    for i in 0..p {
3217        let ji = jac_diag[i];
3218        if ji != 1.0 {
3219            h_latent.row_mut(i).mapv_inplace(|v| v * ji);
3220            h_latent.column_mut(i).mapv_inplace(|v| v * ji);
3221        }
3222    }
3223
3224    // Draw theta ~ N(theta_mode, H_latent^{-1}) via the Cholesky of H_latent:
3225    // L Lᵀ = H_latent, solve Lᵀ δ = ε so Var(δ) = H_latent^{-1}.
3226    use gam_linalg::faer_ndarray::FaerCholesky as _;
3227    use rand::SeedableRng as _;
3228    let chol = h_latent.cholesky(faer::Side::Lower).map_err(|err| {
3229        EstimationError::InvalidInput(format!(
3230            "bounded posterior sampling: Cholesky of the latent penalized Hessian failed: {err:?}"
3231        ))
3232    })?;
3233    let l = chol.lower_triangular();
3234
3235    let mut draws = Array2::<f64>::zeros((n_draws, p));
3236    let mut eps = Array1::<f64>::zeros(p);
3237    let mut delta = Array1::<f64>::zeros(p);
3238    let mut rng = rand::rngs::StdRng::seed_from_u64(base_seed);
3239    for k in 0..n_draws {
3240        for e in eps.iter_mut() {
3241            *e = standard_normal_draw(&mut rng);
3242        }
3243        solve_lower_transpose_into(&l, &eps, &mut delta);
3244        for i in 0..p {
3245            // δ has covariance `H_latent⁻¹`; scaling by √cov_scale lifts it to
3246            // the dispersion-correct posterior covariance `cov_scale·H_latent⁻¹`.
3247            draws[(k, i)] = theta_mode[i] + sqrt_cov_scale * delta[i];
3248        }
3249        // Push bounded columns through the exact interval map so every draw is
3250        // strictly inside (min, max); leave unconstrained columns untouched.
3251        for bc in bounded_columns {
3252            let (beta_draw, _, _) = bounded_latent_to_user(draws[(k, bc.col_idx)], bc.min, bc.max);
3253            draws[(k, bc.col_idx)] = beta_draw;
3254        }
3255    }
3256
3257    Ok(draws)
3258}
3259
3260/// Box-Muller standard-normal draw (kept local so the bounded sampler does not
3261/// depend on the HMC module's RNG plumbing).
3262#[inline]
3263fn standard_normal_draw<R: rand::Rng + ?Sized>(rng: &mut R) -> f64 {
3264    use rand::RngExt as _;
3265    let u1 = rng.random::<f64>().max(1e-16);
3266    let u2 = rng.random::<f64>();
3267    (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
3268}
3269
3270/// Solve `Lᵀ x = b` for a lower-triangular `L` (back substitution), writing the
3271/// result into `out`. Used to turn a standard-normal `b` into a draw with
3272/// covariance `(L Lᵀ)^{-1}`.
3273fn solve_lower_transpose_into(l: &Array2<f64>, b: &Array1<f64>, out: &mut Array1<f64>) {
3274    let p = l.nrows();
3275    for i in (0..p).rev() {
3276        let mut acc = b[i];
3277        for j in (i + 1)..p {
3278            acc -= l[(j, i)] * out[j];
3279        }
3280        let diag = l[(i, i)];
3281        out[i] = if diag.abs() > 0.0 { acc / diag } else { 0.0 };
3282    }
3283}
3284
3285fn bounded_latent_derivatives(theta: f64, min: f64, max: f64) -> (f64, f64, f64, f64, f64) {
3286    let z = stable_sigmoid(theta);
3287    let width = max - min;
3288    let s = z * (1.0 - z);
3289    let beta = min + width * z;
3290    let db_dtheta = width * s;
3291    let d2b_dtheta2 = width * s * (1.0 - 2.0 * z);
3292    let d3b_dtheta3 = width * s * (1.0 - 6.0 * z + 6.0 * z * z);
3293    (beta, z, db_dtheta, d2b_dtheta2, d3b_dtheta3)
3294}
3295
3296fn bounded_prior_terms(theta: f64, prior: &BoundedCoefficientPriorSpec) -> (f64, f64, f64, f64) {
3297    let (a, b) = match prior {
3298        // `None` means constrained MLE with no extra prior term on the bounded coefficient.
3299        BoundedCoefficientPriorSpec::None => return (0.0, 0.0, 0.0, 0.0),
3300        // Uniform on the normalized user-scale coefficient z in (0, 1). In latent space this is
3301        // exactly the Jacobian term for the logistic transform, up to an additive width constant.
3302        BoundedCoefficientPriorSpec::Uniform => (1.0, 1.0),
3303        BoundedCoefficientPriorSpec::Beta { a, b } => (*a, *b),
3304    };
3305    let z = stable_sigmoid(theta).clamp(1e-12, 1.0 - 1e-12);
3306    let logp = a * z.ln() + b * (1.0 - z).ln();
3307    let grad = a - (a + b) * z;
3308    let neghess = (a + b) * z * (1.0 - z);
3309    let neghess_derivative = (a + b) * z * (1.0 - z) * (1.0 - 2.0 * z);
3310    (logp, grad, neghess, neghess_derivative)
3311}
3312
3313/// Assemble the per-observation IRLS state for a standard exponential-family
3314/// GLM from the inverse-link jet (`d1 = dμ/dη`, `d2`, `d3`) and the
3315/// log-likelihood derivatives in `μ` (`lmu = ∂l/∂μ`, `lmumu`, `lmumumu`) plus
3316/// the mean-variance `var = V(μ)`. Mirrors the Binomial general arm EXACTLY
3317/// (#1615/#1616): expected Fisher weight `(dμ/dη)²/V(μ)`, observed
3318/// negative-Hessian in `η`, and its `η`-derivative. Canonical-link self-check:
3319/// for a canonical link the observed neg-Hessian equals the Fisher weight
3320/// (e.g. Poisson-log → both `= w·μ`), which this construction reproduces.
3321#[inline]
3322fn glm_eta_observation_state(
3323    w: f64,
3324    lmu: f64,
3325    lmumu: f64,
3326    lmumumu: f64,
3327    var: f64,
3328    d1: f64,
3329    d2: f64,
3330    d3: f64,
3331    mu_deriv_eps: f64,
3332) -> (f64, f64, f64, f64) {
3333    let score = w * lmu * d1;
3334    let fisherweight = (w * d1 * d1 / var).max(mu_deriv_eps);
3335    let neghessian = -w * (lmumu * d1 * d1 + lmu * d2);
3336    let neghessian_deriv = -w * (lmumumu * d1 * d1 * d1 + 3.0 * lmumu * d1 * d2 + lmu * d3);
3337    (score, fisherweight, neghessian, neghessian_deriv)
3338}
3339
3340fn evaluate_standard_familyobservations(
3341    family: LikelihoodSpec,
3342    latent_cloglog_state: Option<&LatentCLogLogState>,
3343    mixture_link_state: Option<&MixtureLinkState>,
3344    sas_link_state: Option<&SasLinkState>,
3345    y: &Array1<f64>,
3346    weights: &Array1<f64>,
3347    eta: &Array1<f64>,
3348) -> Result<StandardFamilyObservationState, EstimationError> {
3349    const PROB_EPS: f64 = 1e-10;
3350    const MU_DERIV_EPS: f64 = 1e-12;
3351    let n = y.len();
3352    if weights.len() != n || eta.len() != n {
3353        crate::bail_invalid_estim!("bounded family observation size mismatch");
3354    }
3355
3356    let mut mu = Array1::<f64>::zeros(n);
3357    let mut score = Array1::<f64>::zeros(n);
3358    let mut fisherweight = Array1::<f64>::zeros(n);
3359    let mut neghessian_eta = Array1::<f64>::zeros(n);
3360    let mut neghessian_eta_derivative = Array1::<f64>::zeros(n);
3361    let mut log_likelihood = 0.0;
3362
3363    for i in 0..n {
3364        let w = weights[i].max(0.0);
3365        let yi = y[i];
3366        let eta_i = eta[i];
3367        match (&family.response, &family.link) {
3368            (ResponseFamily::Gaussian, _) => {
3369                let resid = yi - eta_i;
3370                mu[i] = eta_i;
3371                score[i] = w * resid;
3372                fisherweight[i] = w.max(MU_DERIV_EPS);
3373                neghessian_eta[i] = w;
3374                neghessian_eta_derivative[i] = 0.0;
3375                log_likelihood += -0.5 * w * resid * resid;
3376            }
3377            (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Logit)) => {
3378                let jet = logit_inverse_link_jet5(eta_i);
3379                mu[i] = jet.mu;
3380                score[i] = w * (yi - jet.mu);
3381                fisherweight[i] = jet.d1.max(MU_DERIV_EPS);
3382                neghessian_eta[i] = jet.d1;
3383                neghessian_eta_derivative[i] = jet.d2;
3384                let logmu = -gam_linalg::utils::stable_softplus(-eta_i);
3385                let log_one_minusmu = -gam_linalg::utils::stable_softplus(eta_i);
3386                log_likelihood += w * (yi * logmu + (1.0 - yi) * log_one_minusmu);
3387            }
3388            (ResponseFamily::Binomial, _) => {
3389                let inverse_link = if let Some(state) = latent_cloglog_state {
3390                    Some(InverseLink::LatentCLogLog(*state))
3391                } else if let Some(state) = mixture_link_state {
3392                    Some(InverseLink::Mixture(state.clone()))
3393                } else {
3394                    sas_link_state.map(|state| {
3395                        if family.is_binomial_beta_logistic() {
3396                            InverseLink::BetaLogistic(*state)
3397                        } else {
3398                            InverseLink::Sas(*state)
3399                        }
3400                    })
3401                };
3402                let strategy_spec = LikelihoodSpec {
3403                    response: family.response.clone(),
3404                    link: inverse_link.clone().unwrap_or_else(|| family.link.clone()),
3405                };
3406                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3407                let mu_i_raw = jet.mu;
3408                let dmu_deta_raw = jet.d1;
3409                let mu_i: f64 = mu_i_raw.clamp(PROB_EPS, 1.0 - PROB_EPS);
3410                let dmu_deta = dmu_deta_raw.max(MU_DERIV_EPS);
3411                let d2mu_deta2 = jet.d2;
3412                let d3mu_deta3 = jet.d3;
3413                let var = (mu_i * (1.0 - mu_i)).max(PROB_EPS);
3414                let lmu = (yi - mu_i) / var;
3415                let lmumu = -(yi / (mu_i * mu_i)) - ((1.0 - yi) / ((1.0 - mu_i) * (1.0 - mu_i)));
3416                let lmumumu = 2.0 * yi / (mu_i * mu_i * mu_i)
3417                    - 2.0 * (1.0 - yi) / ((1.0 - mu_i) * (1.0 - mu_i) * (1.0 - mu_i));
3418                mu[i] = mu_i;
3419                score[i] = w * lmu * dmu_deta;
3420                fisherweight[i] = (w * dmu_deta * dmu_deta / var).max(MU_DERIV_EPS);
3421                neghessian_eta[i] = -w * (lmumu * dmu_deta * dmu_deta + lmu * d2mu_deta2);
3422                neghessian_eta_derivative[i] = -w
3423                    * (lmumumu * dmu_deta * dmu_deta * dmu_deta
3424                        + 3.0 * lmumu * dmu_deta * d2mu_deta2
3425                        + lmu * d3mu_deta3);
3426                log_likelihood += w * (yi * mu_i.ln() + (1.0 - yi) * (1.0 - mu_i).ln());
3427            }
3428            (ResponseFamily::Poisson, _) => {
3429                // l(μ) = y·ln μ − μ (drop ln y! const) ⇒ lmu = y/μ − 1,
3430                // lmumu = −y/μ², lmumumu = 2y/μ³, V(μ) = μ. #1615/#1616.
3431                let strategy_spec = LikelihoodSpec {
3432                    response: family.response.clone(),
3433                    link: family.link.clone(),
3434                };
3435                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3436                let mu_i = jet.mu.max(PROB_EPS);
3437                let d1 = jet.d1.max(MU_DERIV_EPS);
3438                let var = mu_i;
3439                let lmu = yi / mu_i - 1.0;
3440                let lmumu = -yi / (mu_i * mu_i);
3441                let lmumumu = 2.0 * yi / (mu_i * mu_i * mu_i);
3442                let (s, f, nh, nhd) = glm_eta_observation_state(
3443                    w, lmu, lmumu, lmumumu, var, d1, jet.d2, jet.d3, MU_DERIV_EPS,
3444                );
3445                mu[i] = mu_i;
3446                score[i] = s;
3447                fisherweight[i] = f;
3448                neghessian_eta[i] = nh;
3449                neghessian_eta_derivative[i] = nhd;
3450                log_likelihood += w * (yi * mu_i.ln() - mu_i);
3451            }
3452            (ResponseFamily::Tweedie { p }, _) => {
3453                // Fixed-p Tweedie quasi-likelihood: ∂Q/∂μ = (y−μ)/V, V(μ) = μ^p.
3454                // lmu = (y−μ)μ^{−p};  lmumu = −μ^{−p} − p(y−μ)μ^{−p−1};
3455                // lmumumu = 2p·μ^{−p−1} + p(p+1)(y−μ)μ^{−p−2};
3456                // Q = y·μ^{1−p}/(1−p) − μ^{2−p}/(2−p). #1615/#1616.
3457                let p = *p;
3458                let strategy_spec = LikelihoodSpec {
3459                    response: family.response.clone(),
3460                    link: family.link.clone(),
3461                };
3462                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3463                let mu_i = jet.mu.max(PROB_EPS);
3464                let d1 = jet.d1.max(MU_DERIV_EPS);
3465                let var = mu_i.powf(p);
3466                let resid = yi - mu_i;
3467                let lmu = resid / var;
3468                let lmumu = -mu_i.powf(-p) - p * resid * mu_i.powf(-p - 1.0);
3469                let lmumumu =
3470                    2.0 * p * mu_i.powf(-p - 1.0) + p * (p + 1.0) * resid * mu_i.powf(-p - 2.0);
3471                let (s, f, nh, nhd) = glm_eta_observation_state(
3472                    w, lmu, lmumu, lmumumu, var, d1, jet.d2, jet.d3, MU_DERIV_EPS,
3473                );
3474                mu[i] = mu_i;
3475                score[i] = s;
3476                fisherweight[i] = f;
3477                neghessian_eta[i] = nh;
3478                neghessian_eta_derivative[i] = nhd;
3479                // Quasi-log-likelihood (p ≠ 1, 2 in the supported compound range).
3480                log_likelihood += w
3481                    * (yi * mu_i.powf(1.0 - p) / (1.0 - p) - mu_i.powf(2.0 - p) / (2.0 - p));
3482            }
3483            (ResponseFamily::NegativeBinomial { theta, .. }, _) => {
3484                // l(μ) = y·ln μ − (y+θ)·ln(μ+θ) (drop μ-independent terms) ⇒
3485                // lmu = y/μ − (y+θ)/(μ+θ), lmumu = −y/μ² + (y+θ)/(μ+θ)²,
3486                // lmumumu = 2y/μ³ − 2(y+θ)/(μ+θ)³, V(μ) = μ + μ²/θ. #1615/#1616.
3487                let theta = (*theta).max(PROB_EPS);
3488                let strategy_spec = LikelihoodSpec {
3489                    response: family.response.clone(),
3490                    link: family.link.clone(),
3491                };
3492                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3493                let mu_i = jet.mu.max(PROB_EPS);
3494                let d1 = jet.d1.max(MU_DERIV_EPS);
3495                let mu_plus = mu_i + theta;
3496                let var = mu_i + mu_i * mu_i / theta;
3497                let lmu = yi / mu_i - (yi + theta) / mu_plus;
3498                let lmumu = -yi / (mu_i * mu_i) + (yi + theta) / (mu_plus * mu_plus);
3499                let lmumumu =
3500                    2.0 * yi / (mu_i * mu_i * mu_i) - 2.0 * (yi + theta) / (mu_plus * mu_plus * mu_plus);
3501                let (s, f, nh, nhd) = glm_eta_observation_state(
3502                    w, lmu, lmumu, lmumumu, var, d1, jet.d2, jet.d3, MU_DERIV_EPS,
3503                );
3504                mu[i] = mu_i;
3505                score[i] = s;
3506                fisherweight[i] = f;
3507                neghessian_eta[i] = nh;
3508                neghessian_eta_derivative[i] = nhd;
3509                log_likelihood += w * (yi * mu_i.ln() - (yi + theta) * mu_plus.ln());
3510            }
3511            (ResponseFamily::Beta { .. }, _) => {
3512                crate::bail_invalid_estim!(
3513                    "bounded linear terms are not supported for BetaLogit fits"
3514                );
3515            }
3516            (ResponseFamily::Gamma, _) => {
3517                // Unit-dispersion Gamma kernel l(μ) = −y/μ − ln μ ⇒
3518                // lmu = y/μ² − 1/μ, lmumu = −2y/μ³ + 1/μ²,
3519                // lmumumu = 6y/μ⁴ − 2/μ³, V(μ) = μ². #1615/#1616.
3520                let strategy_spec = LikelihoodSpec {
3521                    response: family.response.clone(),
3522                    link: family.link.clone(),
3523                };
3524                let jet = strategy_for_spec(&strategy_spec).inverse_link_jet(eta_i)?;
3525                let mu_i = jet.mu.max(PROB_EPS);
3526                let d1 = jet.d1.max(MU_DERIV_EPS);
3527                let var = mu_i * mu_i;
3528                let lmu = yi / (mu_i * mu_i) - 1.0 / mu_i;
3529                let lmumu = -2.0 * yi / (mu_i * mu_i * mu_i) + 1.0 / (mu_i * mu_i);
3530                let lmumumu =
3531                    6.0 * yi / (mu_i * mu_i * mu_i * mu_i) - 2.0 / (mu_i * mu_i * mu_i);
3532                let (s, f, nh, nhd) = glm_eta_observation_state(
3533                    w, lmu, lmumu, lmumumu, var, d1, jet.d2, jet.d3, MU_DERIV_EPS,
3534                );
3535                mu[i] = mu_i;
3536                score[i] = s;
3537                fisherweight[i] = f;
3538                neghessian_eta[i] = nh;
3539                neghessian_eta_derivative[i] = nhd;
3540                log_likelihood += w * (-(yi / mu_i) - mu_i.ln());
3541            }
3542            (ResponseFamily::RoystonParmar, _) => {
3543                crate::bail_invalid_estim!(
3544                    "bounded linear terms are not supported for survival model fits"
3545                );
3546            }
3547        }
3548    }
3549
3550    Ok(StandardFamilyObservationState {
3551        eta: eta.clone(),
3552        mu,
3553        score,
3554        fisherweight,
3555        neghessian_eta,
3556        neghessian_eta_derivative,
3557        log_likelihood,
3558    })
3559}
3560
3561#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3562enum SpatialAdaptiveHyperKind {
3563    LogLambdaMagnitude,
3564    LogLambdaGradient,
3565    LogLambdaCurvature,
3566    LogEpsilonMagnitude,
3567    LogEpsilonGradient,
3568    LogEpsilonCurvature,
3569}
3570
3571impl SpatialAdaptiveHyperKind {
3572    fn component_index(self) -> usize {
3573        match self {
3574            SpatialAdaptiveHyperKind::LogLambdaMagnitude
3575            | SpatialAdaptiveHyperKind::LogEpsilonMagnitude => 0,
3576            SpatialAdaptiveHyperKind::LogLambdaGradient
3577            | SpatialAdaptiveHyperKind::LogEpsilonGradient => 1,
3578            SpatialAdaptiveHyperKind::LogLambdaCurvature
3579            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => 2,
3580        }
3581    }
3582
3583    fn is_log_lambda(self) -> bool {
3584        matches!(
3585            self,
3586            SpatialAdaptiveHyperKind::LogLambdaMagnitude
3587                | SpatialAdaptiveHyperKind::LogLambdaGradient
3588                | SpatialAdaptiveHyperKind::LogLambdaCurvature
3589        )
3590    }
3591
3592    fn is_log_epsilon(self) -> bool {
3593        matches!(
3594            self,
3595            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
3596                | SpatialAdaptiveHyperKind::LogEpsilonGradient
3597                | SpatialAdaptiveHyperKind::LogEpsilonCurvature
3598        )
3599    }
3600}
3601
3602#[derive(Clone, Copy, Debug)]
3603struct SpatialAdaptiveHyperSpec {
3604    cache_index: usize,
3605    kind: SpatialAdaptiveHyperKind,
3606}
3607
3608#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3609enum SpatialAdaptiveExplicitSecondOrderKind {
3610    StructuralZero,
3611    LocalAlphaAlpha,
3612    LocalAlphaEta,
3613    SharedEtaEta,
3614}
3615
3616/// Penalty family selected within one adaptive smooth cache. The component index
3617/// (0/1/2) used throughout the runtime caches maps onto these three operators:
3618/// the scalar magnitude operator `d0`, the grouped gradient operator `d1`, and
3619/// the grouped curvature operator `d2`.
3620#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3621enum AdaptiveComponent {
3622    Magnitude,
3623    Gradient,
3624    Curvature,
3625}
3626
3627impl AdaptiveComponent {
3628    fn from_index(index: usize) -> Result<Self, String> {
3629        match index {
3630            0 => Ok(AdaptiveComponent::Magnitude),
3631            1 => Ok(AdaptiveComponent::Gradient),
3632            2 => Ok(AdaptiveComponent::Curvature),
3633            other => Err(SmoothError::invalid_index(format!(
3634                "invalid adaptive component index {}",
3635                other
3636            ))
3637            .into()),
3638        }
3639    }
3640}
3641
3642/// Which hyper-derivative of the adaptive penalty's local pieces to assemble.
3643/// Each variant selects one accessor triple (objective scalar, beta-mixed
3644/// gradient, beta hessian) on the per-component exact state; the operator
3645/// embedding around those accessors is identical across variants.
3646#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3647enum HyperDerivativeKind {
3648    /// First derivative in `log lambda` (rho): the bare penalty pieces.
3649    Rho,
3650    /// First derivative in `log epsilon`.
3651    LogEpsilonFirst,
3652    /// Second derivative in `log epsilon`.
3653    LogEpsilonSecond,
3654}
3655
3656/// Which directional-drift hyper-derivative of the adaptive penalty Hessian to
3657/// assemble: the bare rho drift, or the shared-`log epsilon` drift. Both share
3658/// the per-component direction projection, operator embedding, and global
3659/// embedding; only the directional state accessor differs.
3660#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3661enum HyperDriftKind {
3662    Rho,
3663    LogEpsilon,
3664}
3665
3666impl SpatialAdaptiveHyperSpec {
3667    fn component_index(self) -> usize {
3668        self.kind.component_index()
3669    }
3670
3671    fn explicit_second_order_kind(self, other: Self) -> SpatialAdaptiveExplicitSecondOrderKind {
3672        if self.component_index() != other.component_index() {
3673            return SpatialAdaptiveExplicitSecondOrderKind::StructuralZero;
3674        }
3675        match (
3676            self.kind.is_log_lambda(),
3677            other.kind.is_log_lambda(),
3678            self.kind.is_log_epsilon(),
3679            other.kind.is_log_epsilon(),
3680        ) {
3681            (true, true, false, false) if self.cache_index == other.cache_index => {
3682                SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaAlpha
3683            }
3684            (true, false, false, true) | (false, true, true, false) => {
3685                SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaEta
3686            }
3687            (false, false, true, true) => SpatialAdaptiveExplicitSecondOrderKind::SharedEtaEta,
3688            _ => SpatialAdaptiveExplicitSecondOrderKind::StructuralZero,
3689        }
3690    }
3691}
3692
3693#[derive(Clone, Debug)]
3694struct SpatialAdaptiveTermHyperParams {
3695    lambda: [f64; 3],
3696    epsilon: [f64; 3],
3697}
3698
3699#[derive(Clone)]
3700struct SpatialAdaptiveExactEvaluation {
3701    obs: StandardFamilyObservationState,
3702    adaptive_states: Vec<SpatialPenaltyExactState>,
3703    adaptive_penalty_value: f64,
3704    adaptive_penaltygradient: Array1<f64>,
3705    adaptive_penaltyhessian: Array2<f64>,
3706    fixed_quadraticvalue: f64,
3707    fixed_quadraticgradient: Array1<f64>,
3708    fixed_quadratichessian: Array2<f64>,
3709}
3710
3711#[derive(Clone)]
3712struct CachedSpatialAdaptiveExactEvaluation {
3713    beta: Array1<f64>,
3714    eval: Arc<SpatialAdaptiveExactEvaluation>,
3715}
3716
3717impl SpatialAdaptiveExactEvaluation {
3718    fn total_penalty_value(&self) -> f64 {
3719        self.adaptive_penalty_value + self.fixed_quadraticvalue
3720    }
3721
3722    fn total_penaltygradient(&self) -> Array1<f64> {
3723        &self.adaptive_penaltygradient + &self.fixed_quadraticgradient
3724    }
3725
3726    fn total_penaltyhessian(&self) -> Array2<f64> {
3727        &self.adaptive_penaltyhessian + &self.fixed_quadratichessian
3728    }
3729
3730    fn totalobjectivehessian(&self, design: &Array2<f64>) -> Result<Array2<f64>, String> {
3731        let mut out = xt_diag_x_dense(design.view(), self.obs.neghessian_eta.view())?;
3732        out += &self.total_penaltyhessian();
3733        Ok(out)
3734    }
3735}
3736
3737#[derive(Clone)]
3738struct SpatialAdaptiveExactFamily {
3739    family: LikelihoodSpec,
3740    latent_cloglog_state: Option<LatentCLogLogState>,
3741    mixture_link_state: Option<MixtureLinkState>,
3742    sas_link_state: Option<SasLinkState>,
3743    y: Arc<Array1<f64>>,
3744    weights: Arc<Array1<f64>>,
3745    design: Arc<Array2<f64>>,
3746    offset: Arc<Array1<f64>>,
3747    linear_constraints: Option<LinearInequalityConstraints>,
3748    runtime_caches: Arc<Vec<SpatialOperatorRuntimeCache>>,
3749    adaptive_params: Vec<SpatialAdaptiveTermHyperParams>,
3750    fixed_quadratichessian: Arc<Array2<f64>>,
3751    hyperspecs: Arc<Vec<SpatialAdaptiveHyperSpec>>,
3752    exact_eval_cache: Arc<Mutex<Option<CachedSpatialAdaptiveExactEvaluation>>>,
3753}
3754
3755impl SpatialAdaptiveExactFamily {
3756    fn with_adaptive_params(
3757        &self,
3758        adaptive_params: Vec<SpatialAdaptiveTermHyperParams>,
3759        fixed_quadratichessian: Arc<Array2<f64>>,
3760    ) -> Self {
3761        Self {
3762            family: self.family.clone(),
3763            latent_cloglog_state: self.latent_cloglog_state,
3764            mixture_link_state: self.mixture_link_state.clone(),
3765            sas_link_state: self.sas_link_state,
3766            y: self.y.clone(),
3767            weights: self.weights.clone(),
3768            design: self.design.clone(),
3769            offset: self.offset.clone(),
3770            linear_constraints: self.linear_constraints.clone(),
3771            runtime_caches: self.runtime_caches.clone(),
3772            adaptive_params,
3773            fixed_quadratichessian,
3774            hyperspecs: self.hyperspecs.clone(),
3775            exact_eval_cache: Arc::new(Mutex::new(None)),
3776        }
3777    }
3778
3779    fn total_eta(&self, beta: &Array1<f64>) -> Array1<f64> {
3780        gam_linalg::faer_ndarray::fast_av(self.design.as_ref(), beta) + self.offset.as_ref()
3781    }
3782
3783    fn fixed_quadratic_terms(&self, beta: &Array1<f64>) -> (f64, Array1<f64>) {
3784        let grad = self.fixed_quadratichessian.dot(beta);
3785        let value = 0.5 * beta.dot(&grad);
3786        (value, grad)
3787    }
3788
3789    fn adaptive_penalty_value_only(&self, beta: &Array1<f64>) -> Result<f64, String> {
3790        let mut penalty_value = 0.0;
3791        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
3792            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
3793                format!(
3794                    "missing adaptive parameter block for cache {}",
3795                    cache.termname
3796                )
3797            })?;
3798            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
3799            let state =
3800                SpatialPenaltyExactState::from_beta_local(beta_local, cache, params.epsilon)
3801                    .map_err(|e| e.to_string())?;
3802            penalty_value += params.lambda[0] * state.magnitude.penalty_value();
3803            penalty_value += params.lambda[1] * state.gradient.penalty_value();
3804            penalty_value += params.lambda[2] * state.curvature.penalty_value();
3805        }
3806        Ok(penalty_value)
3807    }
3808
3809    fn zero_hyper_parts(&self) -> (Array1<f64>, Array2<f64>) {
3810        let total_dim = self.design.ncols();
3811        (
3812            Array1::<f64>::zeros(total_dim),
3813            Array2::<f64>::zeros((total_dim, total_dim)),
3814        )
3815    }
3816
3817    fn embed_local_hyper_parts(
3818        &self,
3819        coeff_range: &Range<usize>,
3820        local_grad: &Array1<f64>,
3821        local_hess: &Array2<f64>,
3822    ) -> (Array1<f64>, Array2<f64>) {
3823        let (mut beta_mixed, mut betahessian) = self.zero_hyper_parts();
3824        beta_mixed
3825            .slice_mut(s![coeff_range.clone()])
3826            .assign(local_grad);
3827        betahessian
3828            .slice_mut(s![coeff_range.clone(), coeff_range.clone()])
3829            .assign(local_hess);
3830        (beta_mixed, betahessian)
3831    }
3832
3833    fn embed_local_hyper_hessian(
3834        &self,
3835        coeff_range: &Range<usize>,
3836        local_hess: &Array2<f64>,
3837    ) -> Array2<f64> {
3838        let total_dim = self.design.ncols();
3839        let mut out = Array2::<f64>::zeros((total_dim, total_dim));
3840        out.slice_mut(s![coeff_range.clone(), coeff_range.clone()])
3841            .assign(local_hess);
3842        out
3843    }
3844
3845    /// Unified per-block hyper-derivative assembly. Owns the shared cache /
3846    /// hyperparameter / exact-state lookup, the component -> operator selection
3847    /// (scalar magnitude `d0`, grouped gradient `d1`, grouped curvature `d2`),
3848    /// and the global embedding via [`Self::embed_local_hyper_parts`]. The only
3849    /// piece that varies with `derivative` is the per-component accessor triple
3850    /// (objective scalar, beta-mixed gradient, beta hessian) read off the exact
3851    /// state. Returns `(objective, beta_mixed, betahessian)`, each already
3852    /// scaled by the component's penalty weight `lambda`.
3853    fn adaptive_block_eval(
3854        &self,
3855        eval: &SpatialAdaptiveExactEvaluation,
3856        cache_idx: usize,
3857        component: AdaptiveComponent,
3858        derivative: HyperDerivativeKind,
3859    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3860        let cache = self
3861            .runtime_caches
3862            .get(cache_idx)
3863            .ok_or_else(|| format!("adaptive cache index {} out of bounds", cache_idx))?;
3864        let params = self
3865            .adaptive_params
3866            .get(cache_idx)
3867            .ok_or_else(|| format!("adaptive hyperparameter block {} out of bounds", cache_idx))?;
3868        let state = eval
3869            .adaptive_states
3870            .get(cache_idx)
3871            .ok_or_else(|| format!("adaptive exact state index {} out of bounds", cache_idx))?;
3872
3873        let (objective_local, beta_mixed_local, betahessian_local) = match component {
3874            AdaptiveComponent::Magnitude => {
3875                let lambda = params.lambda[0];
3876                let mag = &state.magnitude;
3877                let (objective, gradient_coeff, hessian_diag) = match derivative {
3878                    HyperDerivativeKind::Rho => (
3879                        mag.penalty_value(),
3880                        mag.betagradient_coeff(),
3881                        mag.betahessian_diag(),
3882                    ),
3883                    HyperDerivativeKind::LogEpsilonFirst => (
3884                        mag.log_epsilon_gradient_terms().sum(),
3885                        mag.log_epsilon_betagradient_coeff(),
3886                        mag.log_epsilon_betahessian_diag(),
3887                    ),
3888                    HyperDerivativeKind::LogEpsilonSecond => (
3889                        mag.log_epsilon_hessian_terms().sum(),
3890                        mag.log_epsilon_beta_mixed_second_coeff(),
3891                        mag.log_epsilon_betahessian_second_diag(),
3892                    ),
3893                };
3894                (
3895                    lambda * objective,
3896                    lambda * scalar_operatorgradient(&cache.d0, &gradient_coeff),
3897                    lambda * scalar_operatorhessian(&cache.d0, &hessian_diag),
3898                )
3899            }
3900            AdaptiveComponent::Gradient => {
3901                let lambda = params.lambda[1];
3902                let grad = &state.gradient;
3903                let (objective, gradient_blocks, hessian_blocks) = match derivative {
3904                    HyperDerivativeKind::Rho => (
3905                        grad.penalty_value(),
3906                        grad.betagradient_blocks(),
3907                        grad.betahessian_blocks(),
3908                    ),
3909                    HyperDerivativeKind::LogEpsilonFirst => (
3910                        grad.log_epsilon_gradient_terms().sum(),
3911                        grad.log_epsilon_betagradient_blocks(),
3912                        grad.log_epsilon_betahessian_blocks(),
3913                    ),
3914                    HyperDerivativeKind::LogEpsilonSecond => (
3915                        grad.log_epsilon_hessian_terms().sum(),
3916                        grad.log_epsilon_beta_mixed_second_blocks(),
3917                        grad.log_epsilon_betahessian_second_blocks(),
3918                    ),
3919                };
3920                (
3921                    lambda * objective,
3922                    lambda
3923                        * grouped_operatorgradient(&cache.d1, cache.dimension, &gradient_blocks)
3924                            .map_err(|e| e.to_string())?,
3925                    lambda
3926                        * grouped_operatorhessian(&cache.d1, cache.dimension, &hessian_blocks)
3927                            .map_err(|e| e.to_string())?,
3928                )
3929            }
3930            AdaptiveComponent::Curvature => {
3931                let lambda = params.lambda[2];
3932                let group = cache.dimension * cache.dimension;
3933                let curv = &state.curvature;
3934                let (objective, gradient_blocks, hessian_blocks) = match derivative {
3935                    HyperDerivativeKind::Rho => (
3936                        curv.penalty_value(),
3937                        curv.betagradient_blocks(),
3938                        curv.betahessian_blocks(),
3939                    ),
3940                    HyperDerivativeKind::LogEpsilonFirst => (
3941                        curv.log_epsilon_gradient_terms().sum(),
3942                        curv.log_epsilon_betagradient_blocks(),
3943                        curv.log_epsilon_betahessian_blocks(),
3944                    ),
3945                    HyperDerivativeKind::LogEpsilonSecond => (
3946                        curv.log_epsilon_hessian_terms().sum(),
3947                        curv.log_epsilon_beta_mixed_second_blocks(),
3948                        curv.log_epsilon_betahessian_second_blocks(),
3949                    ),
3950                };
3951                (
3952                    lambda * objective,
3953                    lambda
3954                        * grouped_operatorgradient(&cache.d2, group, &gradient_blocks)
3955                            .map_err(|e| e.to_string())?,
3956                    lambda
3957                        * grouped_operatorhessian(&cache.d2, group, &hessian_blocks)
3958                            .map_err(|e| e.to_string())?,
3959                )
3960            }
3961        };
3962
3963        let (beta_mixed, betahessian) = self.embed_local_hyper_parts(
3964            &cache.coeff_global_range,
3965            &beta_mixed_local,
3966            &betahessian_local,
3967        );
3968        Ok((objective_local, beta_mixed, betahessian))
3969    }
3970
3971    fn adaptive_shared_log_epsilon_parts(
3972        &self,
3973        eval: &SpatialAdaptiveExactEvaluation,
3974        component: usize,
3975    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3976        // Exact shared-log-epsilon first-order pieces:
3977        //
3978        //   J_{eta_p}         = sum_m lambda_{m,p} U_{m,p,eta},
3979        //   J_{beta,eta_p}    = sum_m lambda_{m,p} U_{m,p,beta eta},
3980        //   J_{beta,beta,eta} = sum_m lambda_{m,p} U_{m,p,beta beta eta}.
3981        self.adaptive_shared_block_eval(eval, component, HyperDerivativeKind::LogEpsilonFirst)
3982    }
3983
3984    fn adaptive_shared_log_epsilon_second_parts(
3985        &self,
3986        eval: &SpatialAdaptiveExactEvaluation,
3987        component: usize,
3988    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
3989        // Exact shared-log-epsilon second-order pieces:
3990        //
3991        //   J_{eta_p,eta_p}            = sum_m lambda_{m,p} U_{m,p,eta eta},
3992        //   J_{beta,eta_p,eta_p}       = sum_m lambda_{m,p} U_{m,p,beta eta eta},
3993        //   J_{beta,beta,eta_p,eta_p}  = sum_m lambda_{m,p} U_{m,p,beta beta eta eta}.
3994        self.adaptive_shared_block_eval(eval, component, HyperDerivativeKind::LogEpsilonSecond)
3995    }
3996
3997    /// Sum a per-block hyper-derivative across every adaptive term for one shared
3998    /// `log epsilon` coordinate (selected by `component`). The three log-epsilon
3999    /// coordinates are shared globally by penalty type, so each contributes the
4000    /// matching component's block from every cache.
4001    fn adaptive_shared_block_eval(
4002        &self,
4003        eval: &SpatialAdaptiveExactEvaluation,
4004        component: usize,
4005        derivative: HyperDerivativeKind,
4006    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
4007        let component = AdaptiveComponent::from_index(component)?;
4008        let (mut score, mut hessian) = self.zero_hyper_parts();
4009        let mut objective = 0.0;
4010        for cache_idx in 0..self.runtime_caches.len() {
4011            let (local_objective, local_score, local_hessian) =
4012                self.adaptive_block_eval(eval, cache_idx, component, derivative)?;
4013            objective += local_objective;
4014            score += &local_score;
4015            hessian += &local_hessian;
4016        }
4017        Ok((objective, score, hessian))
4018    }
4019
4020    fn adaptive_shared_log_epsilon_drift(
4021        &self,
4022        eval: &SpatialAdaptiveExactEvaluation,
4023        component: usize,
4024        direction: &Array1<f64>,
4025    ) -> Result<Array2<f64>, String> {
4026        // Exact shared-log-epsilon Hessian drift:
4027        //
4028        //   T_{eta_p}[u] = sum_m lambda_{m,p} D_beta(U_{m,p,beta beta eta})[u].
4029        let component = AdaptiveComponent::from_index(component)?;
4030        let total_dim = self.design.ncols();
4031        let mut total = Array2::<f64>::zeros((total_dim, total_dim));
4032        for cache_idx in 0..self.runtime_caches.len() {
4033            total += &self.adaptive_block_drift_eval(
4034                eval,
4035                cache_idx,
4036                component,
4037                HyperDriftKind::LogEpsilon,
4038                direction,
4039            )?;
4040        }
4041        Ok(total)
4042    }
4043
4044    fn adaptive_explicit_second_order_parts(
4045        &self,
4046        eval: &SpatialAdaptiveExactEvaluation,
4047        left: SpatialAdaptiveHyperSpec,
4048        right: SpatialAdaptiveHyperSpec,
4049    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
4050        // Structural sparsity from the adaptive penalty algebra:
4051        //
4052        //   - alpha_{m,p} / alpha_{n,r} is nonzero only when (m,p) = (n,r),
4053        //   - alpha_{m,p} / eta_r is nonzero only when p = r,
4054        //   - eta_p / eta_r is nonzero only when p = r,
4055        //
4056        // with eta_p contributions summed over all adaptive terms m because the
4057        // three log-epsilon coordinates are shared globally by penalty type.
4058        match left.explicit_second_order_kind(right) {
4059            SpatialAdaptiveExplicitSecondOrderKind::StructuralZero => {
4060                let (score, hessian) = self.zero_hyper_parts();
4061                Ok((0.0, score, hessian))
4062            }
4063            SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaAlpha => self.adaptive_block_eval(
4064                eval,
4065                left.cache_index,
4066                AdaptiveComponent::from_index(left.component_index())?,
4067                HyperDerivativeKind::Rho,
4068            ),
4069            SpatialAdaptiveExplicitSecondOrderKind::LocalAlphaEta => {
4070                let local_alpha = if left.kind.is_log_lambda() {
4071                    left
4072                } else {
4073                    right
4074                };
4075                self.adaptive_block_eval(
4076                    eval,
4077                    local_alpha.cache_index,
4078                    AdaptiveComponent::from_index(local_alpha.component_index())?,
4079                    HyperDerivativeKind::LogEpsilonFirst,
4080                )
4081            }
4082            SpatialAdaptiveExplicitSecondOrderKind::SharedEtaEta => {
4083                self.adaptive_shared_log_epsilon_second_parts(eval, left.component_index())
4084            }
4085        }
4086    }
4087
4088    /// Unified per-block directional-drift assembly. Owns the shared cache /
4089    /// hyperparameter / exact-state lookup, the per-component direction
4090    /// projection through the collocation operators, the operator embedding, and
4091    /// the global embedding via [`Self::embed_local_hyper_hessian`]. The only
4092    /// piece that varies with `drift` is the directional state accessor:
4093    /// [`HyperDriftKind::Rho`] takes the bare directional Hessian drift, while
4094    /// [`HyperDriftKind::LogEpsilon`] takes its `log epsilon` derivative.
4095    fn adaptive_block_drift_eval(
4096        &self,
4097        eval: &SpatialAdaptiveExactEvaluation,
4098        cache_idx: usize,
4099        component: AdaptiveComponent,
4100        drift: HyperDriftKind,
4101        direction: &Array1<f64>,
4102    ) -> Result<Array2<f64>, String> {
4103        let cache = self
4104            .runtime_caches
4105            .get(cache_idx)
4106            .ok_or_else(|| format!("adaptive cache index {} out of bounds", cache_idx))?;
4107        let params = self
4108            .adaptive_params
4109            .get(cache_idx)
4110            .ok_or_else(|| format!("adaptive hyperparameter block {} out of bounds", cache_idx))?;
4111        let state = eval
4112            .adaptive_states
4113            .get(cache_idx)
4114            .ok_or_else(|| format!("adaptive exact state index {} out of bounds", cache_idx))?;
4115        let direction_local = direction.slice(s![cache.coeff_global_range.clone()]);
4116
4117        let local_hessian = match component {
4118            AdaptiveComponent::Magnitude => {
4119                let d0_u = cache.d0.dot(&direction_local);
4120                let mag = &state.magnitude;
4121                let diag = match drift {
4122                    HyperDriftKind::Rho => mag.directionalhessian_diag(&d0_u),
4123                    HyperDriftKind::LogEpsilon => {
4124                        mag.log_epsilon_betahessian_directional_diag(&d0_u)
4125                    }
4126                };
4127                params.lambda[0] * scalar_operatorhessian(&cache.d0, &diag)
4128            }
4129            AdaptiveComponent::Gradient => {
4130                let d1_u = cache.d1.dot(&direction_local);
4131                let direction_blocks = collocationgradient_blocks(&d1_u, cache.dimension)
4132                    .map_err(|e| e.to_string())?;
4133                let grad = &state.gradient;
4134                let blocks = match drift {
4135                    HyperDriftKind::Rho => grad.directionalhessian_blocks(&direction_blocks),
4136                    HyperDriftKind::LogEpsilon => {
4137                        grad.log_epsilon_betahessian_directional_blocks(&direction_blocks)
4138                    }
4139                };
4140                params.lambda[1]
4141                    * grouped_operatorhessian(&cache.d1, cache.dimension, &blocks)
4142                        .map_err(|e| e.to_string())?
4143            }
4144            AdaptiveComponent::Curvature => {
4145                let group = cache.dimension * cache.dimension;
4146                let d2_u = cache.d2.dot(&direction_local);
4147                let direction_blocks =
4148                    collocationhessian_blocks(&d2_u, cache.dimension).map_err(|e| e.to_string())?;
4149                let curv = &state.curvature;
4150                let blocks = match drift {
4151                    HyperDriftKind::Rho => curv.directionalhessian_blocks(&direction_blocks),
4152                    HyperDriftKind::LogEpsilon => {
4153                        curv.log_epsilon_betahessian_directional_blocks(&direction_blocks)
4154                    }
4155                };
4156                params.lambda[2]
4157                    * grouped_operatorhessian(&cache.d2, group, &blocks)
4158                        .map_err(|e| e.to_string())?
4159            }
4160        };
4161
4162        Ok(self.embed_local_hyper_hessian(&cache.coeff_global_range, &local_hessian))
4163    }
4164
4165    fn adaptive_hyper_parts(
4166        &self,
4167        eval: &SpatialAdaptiveExactEvaluation,
4168        hyper: SpatialAdaptiveHyperSpec,
4169    ) -> Result<(f64, Array1<f64>, Array2<f64>), String> {
4170        match hyper.kind {
4171            // Per-term `log lambda` (rho) hyper-derivative: the bare penalty
4172            // pieces for this cache's selected component.
4173            SpatialAdaptiveHyperKind::LogLambdaMagnitude
4174            | SpatialAdaptiveHyperKind::LogLambdaGradient
4175            | SpatialAdaptiveHyperKind::LogLambdaCurvature => self.adaptive_block_eval(
4176                eval,
4177                hyper.cache_index,
4178                AdaptiveComponent::from_index(hyper.component_index())?,
4179                HyperDerivativeKind::Rho,
4180            ),
4181            // Shared `log epsilon` hyper-derivative: summed across all terms.
4182            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
4183            | SpatialAdaptiveHyperKind::LogEpsilonGradient
4184            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => {
4185                self.adaptive_shared_log_epsilon_parts(eval, hyper.component_index())
4186            }
4187        }
4188    }
4189
4190    fn exact_evaluation_uncached(
4191        &self,
4192        beta: &Array1<f64>,
4193    ) -> Result<SpatialAdaptiveExactEvaluation, String> {
4194        let eta = self.total_eta(beta);
4195        let obs = evaluate_standard_familyobservations(
4196            self.family.clone(),
4197            self.latent_cloglog_state.as_ref(),
4198            self.mixture_link_state.as_ref(),
4199            self.sas_link_state.as_ref(),
4200            &self.y,
4201            &self.weights,
4202            &eta,
4203        )
4204        .map_err(|e| e.to_string())?;
4205        let p = beta.len();
4206        let mut penalty_value = 0.0;
4207        let mut penaltygradient = Array1::<f64>::zeros(p);
4208        let mut penaltyhessian = Array2::<f64>::zeros((p, p));
4209        let mut adaptive_states = Vec::with_capacity(self.runtime_caches.len());
4210
4211        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4212            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4213                format!(
4214                    "missing adaptive parameter block for cache {}",
4215                    cache.termname
4216                )
4217            })?;
4218            let beta_local = beta.slice(s![cache.coeff_global_range.clone()]);
4219            let state =
4220                SpatialPenaltyExactState::from_beta_local(beta_local, cache, params.epsilon)
4221                    .map_err(|e| e.to_string())?;
4222
4223            let g0 = scalar_operatorgradient(&cache.d0, &state.magnitude.betagradient_coeff());
4224            let gg = grouped_operatorgradient(
4225                &cache.d1,
4226                cache.dimension,
4227                &state.gradient.betagradient_blocks(),
4228            )
4229            .map_err(|e| e.to_string())?;
4230            let gc = grouped_operatorgradient(
4231                &cache.d2,
4232                cache.dimension * cache.dimension,
4233                &state.curvature.betagradient_blocks(),
4234            )
4235            .map_err(|e| e.to_string())?;
4236            let h0 = scalar_operatorhessian(&cache.d0, &state.magnitude.betahessian_diag());
4237            let hg = grouped_operatorhessian(
4238                &cache.d1,
4239                cache.dimension,
4240                &state.gradient.betahessian_blocks(),
4241            )
4242            .map_err(|e| e.to_string())?;
4243            let hc = grouped_operatorhessian(
4244                &cache.d2,
4245                cache.dimension * cache.dimension,
4246                &state.curvature.betahessian_blocks(),
4247            )
4248            .map_err(|e| e.to_string())?;
4249
4250            let lambda0 = params.lambda[0];
4251            let lambdag = params.lambda[1];
4252            let lambdac = params.lambda[2];
4253
4254            penalty_value += lambda0 * state.magnitude.penalty_value();
4255            penalty_value += lambdag * state.gradient.penalty_value();
4256            penalty_value += lambdac * state.curvature.penalty_value();
4257
4258            let range = cache.coeff_global_range.clone();
4259            {
4260                let mut grad_local = penaltygradient.slice_mut(s![range.clone()]);
4261                grad_local += &(g0.mapv(|v| lambda0 * v));
4262                grad_local += &(gg.mapv(|v| lambdag * v));
4263                grad_local += &(gc.mapv(|v| lambdac * v));
4264            }
4265            {
4266                let mut h_local = penaltyhessian.slice_mut(s![range.clone(), range]);
4267                h_local += &h0.mapv(|v| lambda0 * v);
4268                h_local += &hg.mapv(|v| lambdag * v);
4269                h_local += &hc.mapv(|v| lambdac * v);
4270            }
4271
4272            adaptive_states.push(state);
4273        }
4274
4275        let (fixed_quadraticvalue, fixed_quadraticgradient) = self.fixed_quadratic_terms(beta);
4276        Ok(SpatialAdaptiveExactEvaluation {
4277            obs,
4278            adaptive_states,
4279            adaptive_penalty_value: penalty_value,
4280            adaptive_penaltygradient: penaltygradient,
4281            adaptive_penaltyhessian: penaltyhessian,
4282            fixed_quadraticvalue,
4283            fixed_quadraticgradient,
4284            fixed_quadratichessian: self.fixed_quadratichessian.as_ref().clone(),
4285        })
4286    }
4287
4288    fn exact_evaluation(
4289        &self,
4290        beta: &Array1<f64>,
4291    ) -> Result<Arc<SpatialAdaptiveExactEvaluation>, String> {
4292        {
4293            let cache = self
4294                .exact_eval_cache
4295                .lock()
4296                .map_err(|_| "spatial adaptive exact-evaluation cache lock poisoned".to_string())?;
4297            if let Some(cached) = cache.as_ref()
4298                && cached.beta.len() == beta.len()
4299                && cached
4300                    .beta
4301                    .iter()
4302                    .zip(beta.iter())
4303                    .all(|(&left, &right)| left == right)
4304            {
4305                return Ok(Arc::clone(&cached.eval));
4306            }
4307        }
4308
4309        let eval = Arc::new(self.exact_evaluation_uncached(beta)?);
4310        let mut cache = self
4311            .exact_eval_cache
4312            .lock()
4313            .map_err(|_| "spatial adaptive exact-evaluation cache lock poisoned".to_string())?;
4314        *cache = Some(CachedSpatialAdaptiveExactEvaluation {
4315            beta: beta.clone(),
4316            eval: Arc::clone(&eval),
4317        });
4318        Ok(eval)
4319    }
4320
4321    fn exacthessian_directional_derivative_from_evaluation(
4322        &self,
4323        beta: &Array1<f64>,
4324        eval: &SpatialAdaptiveExactEvaluation,
4325        direction: &Array1<f64>,
4326    ) -> Result<Array2<f64>, String> {
4327        assert_eq!(
4328            beta.len(),
4329            direction.len(),
4330            "beta/direction length mismatch",
4331        );
4332        let d_eta = gam_linalg::faer_ndarray::fast_av(self.design.as_ref(), direction);
4333        let mut total = xt_diag_x_dense(
4334            self.design.view(),
4335            (&eval.obs.neghessian_eta_derivative * &d_eta).view(),
4336        )?;
4337        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4338            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4339                format!(
4340                    "missing adaptive parameter block for cache {}",
4341                    cache.termname
4342                )
4343            })?;
4344            let state = eval
4345                .adaptive_states
4346                .get(cache_idx)
4347                .ok_or_else(|| format!("missing adaptive state for cache {}", cache.termname))?;
4348            let direction_local = direction.slice(s![cache.coeff_global_range.clone()]);
4349            let d0_u = cache.d0.dot(&direction_local);
4350            let d1_u = cache.d1.dot(&direction_local);
4351            let d2_u = cache.d2.dot(&direction_local);
4352            let h0 =
4353                scalar_operatorhessian(&cache.d0, &state.magnitude.directionalhessian_diag(&d0_u))
4354                    .mapv(|v| params.lambda[0] * v);
4355            let hg = grouped_operatorhessian(
4356                &cache.d1,
4357                cache.dimension,
4358                &state.gradient.directionalhessian_blocks(
4359                    &collocationgradient_blocks(&d1_u, cache.dimension)
4360                        .map_err(|e| e.to_string())?,
4361                ),
4362            )
4363            .map_err(|e| e.to_string())?
4364            .mapv(|v| params.lambda[1] * v);
4365            let hc = grouped_operatorhessian(
4366                &cache.d2,
4367                cache.dimension * cache.dimension,
4368                &state.curvature.directionalhessian_blocks(
4369                    &collocationhessian_blocks(&d2_u, cache.dimension)
4370                        .map_err(|e| e.to_string())?,
4371                ),
4372            )
4373            .map_err(|e| e.to_string())?
4374            .mapv(|v| params.lambda[2] * v);
4375            let range = cache.coeff_global_range.clone();
4376            let mut local = total.slice_mut(s![range.clone(), range]);
4377            local += &h0;
4378            local += &hg;
4379            local += &hc;
4380        }
4381        Ok(total)
4382    }
4383
4384    /// Exact second directional derivative `D²_β H[u, v]` of the joint
4385    /// (likelihood + adaptive Charbonnier penalty) Hessian, needed so the outer
4386    /// LAML's joint-Jeffreys curvature drift `D_β H_Φ[β̇]` is exact rather than
4387    /// silently dropped (which leaves the outer hypergradient inconsistent with
4388    /// the `½log|H+H_Φ|` objective it folds `H_Φ` into).
4389    ///
4390    /// The data block contributes `Xᵀ diag(ℓ'''(η_i) (Xu)_i (Xv)_i) X`, where
4391    /// `ℓ'''` is the third derivative of the per-observation log-likelihood in
4392    /// `η`. The observation state exposes the working weight `w=−ℓ''` and its
4393    /// first `η`-derivative `w'` (`neghessian_eta_derivative`) but not `w''`, so
4394    /// the exact data term is available only on the **constant-weight** path
4395    /// (`w' ≡ 0`, e.g. Gaussian identity), where `w'' ≡ 0` and the data block
4396    /// second derivative vanishes. On a varying-weight family we return `None`
4397    /// (the safe, pre-existing behavior: the drift degrades to zero rather than
4398    /// to a wrong value) until the observation contract carries `w''`.
4399    ///
4400    /// The penalty block is always exact: with `λ_m G_mᵀ B_m(G_m β) G_m` the
4401    /// per-component penalty Hessian, `D²_β` is `λ_m Σ_k G_mᵀ N_m,k G_m` using the
4402    /// scalar (`second_directionalhessian_diag`) / grouped
4403    /// (`second_directionalhessian_blocks`) fourth-derivative contractions.
4404    fn exacthessian_second_directional_derivative_from_evaluation(
4405        &self,
4406        eval: &SpatialAdaptiveExactEvaluation,
4407        direction_u: &Array1<f64>,
4408        direction_v: &Array1<f64>,
4409    ) -> Result<Option<Array2<f64>>, String> {
4410        let p = self.design.ncols();
4411        // Data block: exact only when the working weight is constant in η.
4412        if eval.obs.neghessian_eta_derivative.iter().any(|&w| w != 0.0) {
4413            return Ok(None);
4414        }
4415        let mut total = Array2::<f64>::zeros((p, p));
4416        for (cache_idx, cache) in self.runtime_caches.iter().enumerate() {
4417            let params = self.adaptive_params.get(cache_idx).ok_or_else(|| {
4418                format!(
4419                    "missing adaptive parameter block for cache {}",
4420                    cache.termname
4421                )
4422            })?;
4423            let state = eval
4424                .adaptive_states
4425                .get(cache_idx)
4426                .ok_or_else(|| format!("missing adaptive state for cache {}", cache.termname))?;
4427            let u_local = direction_u.slice(s![cache.coeff_global_range.clone()]);
4428            let v_local = direction_v.slice(s![cache.coeff_global_range.clone()]);
4429
4430            // Magnitude (scalar d0).
4431            let q0_u = cache.d0.dot(&u_local);
4432            let q0_v = cache.d0.dot(&v_local);
4433            let h0 = scalar_operatorhessian(
4434                &cache.d0,
4435                &state.magnitude.second_directionalhessian_diag(&q0_u, &q0_v),
4436            )
4437            .mapv(|x| params.lambda[0] * x);
4438
4439            // Gradient (grouped d1, block dim = dimension).
4440            let a1 = collocationgradient_blocks(&cache.d1.dot(&u_local), cache.dimension)
4441                .map_err(|e| e.to_string())?;
4442            let b1 = collocationgradient_blocks(&cache.d1.dot(&v_local), cache.dimension)
4443                .map_err(|e| e.to_string())?;
4444            let hg = grouped_operatorhessian(
4445                &cache.d1,
4446                cache.dimension,
4447                &state.gradient.second_directionalhessian_blocks(&a1, &b1),
4448            )
4449            .map_err(|e| e.to_string())?
4450            .mapv(|x| params.lambda[1] * x);
4451
4452            // Curvature (grouped d2, block dim = dimension²).
4453            let a2 = collocationhessian_blocks(&cache.d2.dot(&u_local), cache.dimension)
4454                .map_err(|e| e.to_string())?;
4455            let b2 = collocationhessian_blocks(&cache.d2.dot(&v_local), cache.dimension)
4456                .map_err(|e| e.to_string())?;
4457            let hc = grouped_operatorhessian(
4458                &cache.d2,
4459                cache.dimension * cache.dimension,
4460                &state.curvature.second_directionalhessian_blocks(&a2, &b2),
4461            )
4462            .map_err(|e| e.to_string())?
4463            .mapv(|x| params.lambda[2] * x);
4464
4465            let range = cache.coeff_global_range.clone();
4466            let mut local = total.slice_mut(s![range.clone(), range]);
4467            local += &h0;
4468            local += &hg;
4469            local += &hc;
4470        }
4471        Ok(Some(total))
4472    }
4473}
4474
4475impl CustomFamily for SpatialAdaptiveExactFamily {
4476    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
4477    // flat-prior exact-Newton objective carries no Jeffreys term), so families
4478    // that historically armed the term by default opt back in explicitly.
4479    fn joint_jeffreys_term_required(&self) -> bool {
4480        true
4481    }
4482
4483    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
4484        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4485        let eval = self.exact_evaluation(beta)?;
4486        let mut gradient = fast_atv(&self.design, &eval.obs.score);
4487        gradient -= &eval.total_penaltygradient();
4488        let mut hessian = xt_diag_x_dense(self.design.view(), eval.obs.neghessian_eta.view())?;
4489        hessian += &eval.total_penaltyhessian();
4490        Ok(FamilyEvaluation {
4491            log_likelihood: eval.obs.log_likelihood - eval.total_penalty_value(),
4492            blockworking_sets: vec![BlockWorkingSet::ExactNewton {
4493                gradient,
4494                hessian: SymmetricMatrix::Dense(hessian),
4495            }],
4496        })
4497    }
4498
4499    fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
4500        let state = expect_single_block_state(block_states, "spatial adaptive exact family")?;
4501        let beta = &state.beta;
4502        let obs = evaluate_standard_familyobservations(
4503            self.family.clone(),
4504            self.latent_cloglog_state.as_ref(),
4505            self.mixture_link_state.as_ref(),
4506            self.sas_link_state.as_ref(),
4507            &self.y,
4508            &self.weights,
4509            &state.eta,
4510        )
4511        .map_err(|e| e.to_string())?;
4512        let adaptive_penalty = self.adaptive_penalty_value_only(beta)?;
4513        let (fixed_quadratic, _) = self.fixed_quadratic_terms(beta);
4514        Ok(obs.log_likelihood - adaptive_penalty - fixed_quadratic)
4515    }
4516
4517    fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
4518        ExactNewtonOuterObjective::StrictPseudoLaplace
4519    }
4520
4521    fn exact_newton_joint_hessian(
4522        &self,
4523        block_states: &[ParameterBlockState],
4524    ) -> Result<Option<Array2<f64>>, String> {
4525        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4526        let eval = self.exact_evaluation(beta)?;
4527        Ok(Some(eval.totalobjectivehessian(&self.design)?))
4528    }
4529
4530    fn exact_newton_hessian_directional_derivative(
4531        &self,
4532        block_states: &[ParameterBlockState],
4533        block_idx: usize,
4534        d_beta: &Array1<f64>,
4535    ) -> Result<Option<Array2<f64>>, String> {
4536        expect_block_idx_zero(block_idx, "spatial adaptive exact family", "")?;
4537        self.exact_newton_joint_hessian_directional_derivative(block_states, d_beta)
4538    }
4539
4540    fn exact_newton_joint_hessian_directional_derivative(
4541        &self,
4542        block_states: &[ParameterBlockState],
4543        d_beta_flat: &Array1<f64>,
4544    ) -> Result<Option<Array2<f64>>, String> {
4545        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4546        if d_beta_flat.len() != beta.len() {
4547            return Err(SmoothError::dimension_mismatch(format!(
4548                "spatial adaptive exact family direction length mismatch: got {}, expected {}",
4549                d_beta_flat.len(),
4550                beta.len()
4551            ))
4552            .into());
4553        }
4554        let eval = self.exact_evaluation(beta)?;
4555        Ok(Some(
4556            self.exacthessian_directional_derivative_from_evaluation(beta, &eval, d_beta_flat)?,
4557        ))
4558    }
4559
4560    fn exact_newton_joint_hessiansecond_directional_derivative(
4561        &self,
4562        block_states: &[ParameterBlockState],
4563        d_beta_u_flat: &Array1<f64>,
4564        d_betav_flat: &Array1<f64>,
4565    ) -> Result<Option<Array2<f64>>, String> {
4566        let beta = &expect_single_block_state(block_states, "spatial adaptive exact family")?.beta;
4567        if d_beta_u_flat.len() != beta.len() || d_betav_flat.len() != beta.len() {
4568            return Err(SmoothError::dimension_mismatch(format!(
4569                "spatial adaptive exact family second-direction length mismatch: got ({}, {}), expected {}",
4570                d_beta_u_flat.len(),
4571                d_betav_flat.len(),
4572                beta.len()
4573            ))
4574            .into());
4575        }
4576        let eval = self.exact_evaluation(beta)?;
4577        self.exacthessian_second_directional_derivative_from_evaluation(
4578            &eval,
4579            d_beta_u_flat,
4580            d_betav_flat,
4581        )
4582    }
4583
4584    fn block_linear_constraints(
4585        &self,
4586        block_states: &[ParameterBlockState],
4587        block_idx: usize,
4588        block_spec: &ParameterBlockSpec,
4589    ) -> Result<Option<LinearInequalityConstraints>, String> {
4590        assert!(!block_states.is_empty(), "block_states must be non-empty");
4591        assert!(
4592            !block_spec.name.is_empty(),
4593            "block spec name must be non-empty",
4594        );
4595        expect_block_idx_zero(block_idx, "spatial adaptive exact family", "")?;
4596        Ok(self.linear_constraints.clone())
4597    }
4598
4599    fn exact_newton_joint_psi_terms(
4600        &self,
4601        block_states: &[ParameterBlockState],
4602        specs: &[ParameterBlockSpec],
4603        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4604        psi_index: usize,
4605    ) -> Result<Option<ExactNewtonJointPsiTerms>, String> {
4606        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4607            return Err(SmoothError::dimension_mismatch(format!(
4608                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4609                block_states.len(),
4610                specs.len(),
4611                derivative_blocks.len()
4612            ))
4613            .into());
4614        }
4615        derivative_blocks[0]
4616            .get(psi_index)
4617            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4618        let hyper = self
4619            .hyperspecs
4620            .get(psi_index)
4621            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4622        let beta = &block_states[0].beta;
4623        let eval = self.exact_evaluation(beta)?;
4624        let (direct, beta_mixed, betahessian_explicit) =
4625            self.adaptive_hyper_parts(&eval, *hyper)?;
4626
4627        // Exact pseudo-Laplace psi-gradient.
4628        //
4629        // For one hyperparameter coordinate a we use the exact formula
4630        //
4631        //   d/da L_tilde
4632        //   = J_a + 0.5 tr(H^{-1} Hdot_a),
4633        //
4634        // with
4635        //
4636        //   H u_a   = J_{beta,a},
4637        //   beta_a  = -u_a,
4638        //   Hdot_a  = J_{beta,beta,a} + D_beta(H)[beta_a]
4639        //           = J_{beta,beta,a} - D_beta(H)[u_a].
4640        //
4641        // Here:
4642        //   - `direct` is J_a,
4643        //   - `beta_mixed` is J_{beta,a},
4644        //   - `betahessian_explicit` is J_{beta,beta,a},
4645        //   - `exacthessian_directional_derivative_from_evaluation(..., u)` returns
4646        //     D_beta(H)[u] for the exact likelihood-plus-Charbonnier model.
4647        Ok(Some(ExactNewtonJointPsiTerms {
4648            objective_psi: direct,
4649            score_psi: beta_mixed,
4650            hessian_psi: betahessian_explicit,
4651            hessian_psi_operator: None,
4652        }))
4653    }
4654
4655    fn exact_newton_joint_psisecond_order_terms(
4656        &self,
4657        block_states: &[ParameterBlockState],
4658        specs: &[ParameterBlockSpec],
4659        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4660        psi_i: usize,
4661        psi_j: usize,
4662    ) -> Result<Option<gam_problem::ExactNewtonJointPsiSecondOrderTerms>, String> {
4663        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4664            return Err(SmoothError::dimension_mismatch(format!(
4665                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4666                block_states.len(),
4667                specs.len(),
4668                derivative_blocks.len()
4669            ))
4670            .into());
4671        }
4672        derivative_blocks[0]
4673            .get(psi_i)
4674            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_i))?;
4675        derivative_blocks[0]
4676            .get(psi_j)
4677            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_j))?;
4678        let hyper_i = self
4679            .hyperspecs
4680            .get(psi_i)
4681            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_i))?;
4682        let hyper_j = self
4683            .hyperspecs
4684            .get(psi_j)
4685            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_j))?;
4686        let beta = &block_states[0].beta;
4687        let eval = self.exact_evaluation(beta)?;
4688        let (objective_psi_psi, score_psi_psi, hessian_psi_psi) =
4689            self.adaptive_explicit_second_order_parts(&eval, *hyper_i, *hyper_j)?;
4690
4691        Ok(Some(
4692            gam_problem::ExactNewtonJointPsiSecondOrderTerms {
4693                objective_psi_psi,
4694                score_psi_psi,
4695                hessian_psi_psi,
4696                hessian_psi_psi_operator: None,
4697            },
4698        ))
4699    }
4700
4701    fn exact_newton_joint_psihessian_directional_derivative(
4702        &self,
4703        block_states: &[ParameterBlockState],
4704        specs: &[ParameterBlockSpec],
4705        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
4706        psi_index: usize,
4707        direction: &Array1<f64>,
4708    ) -> Result<Option<Array2<f64>>, String> {
4709        if block_states.len() != 1 || specs.len() != 1 || derivative_blocks.len() != 1 {
4710            return Err(SmoothError::dimension_mismatch(format!(
4711                "spatial adaptive exact family expects one block/state/spec/psi payload, got states={} specs={} deriv_blocks={}",
4712                block_states.len(),
4713                specs.len(),
4714                derivative_blocks.len()
4715            ))
4716            .into());
4717        }
4718        let beta = &block_states[0].beta;
4719        if direction.len() != beta.len() {
4720            return Err(SmoothError::dimension_mismatch(format!(
4721                "spatial adaptive exact family direction length mismatch: got {}, expected {}",
4722                direction.len(),
4723                beta.len()
4724            ))
4725            .into());
4726        }
4727        derivative_blocks[0]
4728            .get(psi_index)
4729            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4730        let hyper = self
4731            .hyperspecs
4732            .get(psi_index)
4733            .ok_or_else(|| format!("adaptive psi index {} out of bounds", psi_index))?;
4734        let eval = self.exact_evaluation(beta)?;
4735        let drift = match hyper.kind {
4736            SpatialAdaptiveHyperKind::LogLambdaMagnitude
4737            | SpatialAdaptiveHyperKind::LogLambdaGradient
4738            | SpatialAdaptiveHyperKind::LogLambdaCurvature => self.adaptive_block_drift_eval(
4739                &eval,
4740                hyper.cache_index,
4741                AdaptiveComponent::from_index(hyper.kind.component_index())?,
4742                HyperDriftKind::Rho,
4743                direction,
4744            )?,
4745            SpatialAdaptiveHyperKind::LogEpsilonMagnitude
4746            | SpatialAdaptiveHyperKind::LogEpsilonGradient
4747            | SpatialAdaptiveHyperKind::LogEpsilonCurvature => self
4748                .adaptive_shared_log_epsilon_drift(
4749                    &eval,
4750                    hyper.kind.component_index(),
4751                    direction,
4752                )?,
4753        };
4754        Ok(Some(drift))
4755    }
4756}
4757
4758fn expect_single_block_state<'a>(
4759    block_states: &'a [ParameterBlockState],
4760    family_name: &str,
4761) -> Result<&'a ParameterBlockState, String> {
4762    crate::block_layout::block_count::validate_block_count::<SmoothError>(
4763        family_name,
4764        1,
4765        block_states.len(),
4766    )?;
4767    Ok(&block_states[0])
4768}
4769
4770fn expect_block_idx_zero(block_idx: usize, family_name: &str, context: &str) -> Result<(), String> {
4771    if block_idx != 0 {
4772        return Err(SmoothError::invalid_index(format!(
4773            "{family_name} expects block_idx 0{context}, got {block_idx}"
4774        ))
4775        .into());
4776    }
4777    Ok::<(), _>(())
4778}
4779
4780impl BoundedLinearFamily {
4781    fn bounded_term_derivative_data(
4782        &self,
4783        latent_beta: &Array1<f64>,
4784    ) -> (
4785        Array1<f64>,
4786        Array1<f64>,
4787        Array1<f64>,
4788        Array1<f64>,
4789        Array1<f64>,
4790    ) {
4791        let p = latent_beta.len();
4792        let mut beta_user = latent_beta.clone();
4793        let mut jac_diag = Array1::<f64>::ones(p);
4794        let mut second_diag = Array1::<f64>::zeros(p);
4795        let mut third_diag = Array1::<f64>::zeros(p);
4796        let mut priorthird = Array1::<f64>::zeros(p);
4797        for term in &self.bounded_terms {
4798            let (beta, _, db_dtheta, d2b_dtheta2, d3b_dtheta3) =
4799                bounded_latent_derivatives(latent_beta[term.col_idx], term.min, term.max);
4800            beta_user[term.col_idx] = beta;
4801            jac_diag[term.col_idx] = db_dtheta;
4802            second_diag[term.col_idx] = d2b_dtheta2;
4803            third_diag[term.col_idx] = d3b_dtheta3;
4804            let (_, _, _, prior_neghess_derivative) =
4805                bounded_prior_terms(latent_beta[term.col_idx], &term.prior);
4806            priorthird[term.col_idx] = prior_neghess_derivative;
4807        }
4808        (beta_user, jac_diag, second_diag, third_diag, priorthird)
4809    }
4810
4811    fn user_beta_and_jacobian(&self, latent_beta: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
4812        let (beta_user, jac_diag, _, _, _) = self.bounded_term_derivative_data(latent_beta);
4813        (beta_user, jac_diag)
4814    }
4815
4816    fn nonlinear_offset_from_latent(&self, latent_beta: &Array1<f64>) -> Array1<f64> {
4817        let mut offset = self.offset.clone();
4818        for term in &self.bounded_terms {
4819            let (beta, _, _) =
4820                bounded_latent_to_user(latent_beta[term.col_idx], term.min, term.max);
4821            offset.scaled_add(beta, &self.design.column(term.col_idx));
4822        }
4823        offset
4824    }
4825
4826    fn effective_design_for_latent(&self, jac_diag: &Array1<f64>) -> Array2<f64> {
4827        let mut x_eff = self.design.clone();
4828        for term in &self.bounded_terms {
4829            x_eff
4830                .column_mut(term.col_idx)
4831                .mapv_inplace(|v| v * jac_diag[term.col_idx]);
4832        }
4833        x_eff
4834    }
4835
4836    fn exacthessian_andgradient(
4837        &self,
4838        latent_beta: &Array1<f64>,
4839    ) -> Result<
4840        (
4841            StandardFamilyObservationState,
4842            Array2<f64>,
4843            Array1<f64>,
4844            f64,
4845            Array1<f64>,
4846            Array1<f64>,
4847            Array1<f64>,
4848        ),
4849        String,
4850    > {
4851        let (_, jac_diag, second_diag, third_diag, priorthird) =
4852            self.bounded_term_derivative_data(latent_beta);
4853        let x_eff = self.effective_design_for_latent(&jac_diag);
4854        let eta =
4855            self.designzeroed.dot(latent_beta) + self.nonlinear_offset_from_latent(latent_beta);
4856        let obs = evaluate_standard_familyobservations(
4857            self.family.clone(),
4858            self.latent_cloglog_state.as_ref(),
4859            self.mixture_link_state.as_ref(),
4860            self.sas_link_state.as_ref(),
4861            &self.y,
4862            &self.weights,
4863            &eta,
4864        )
4865        .map_err(|e| e.to_string())?;
4866
4867        let mut priorgrad = Array1::<f64>::zeros(latent_beta.len());
4868        let mut prior_neghess = Array2::<f64>::zeros((latent_beta.len(), latent_beta.len()));
4869        let mut prior_loglik = 0.0;
4870        for term in &self.bounded_terms {
4871            let (logp, grad, neghess, _) =
4872                bounded_prior_terms(latent_beta[term.col_idx], &term.prior);
4873            prior_loglik += logp;
4874            priorgrad[term.col_idx] += grad;
4875            prior_neghess[[term.col_idx, term.col_idx]] += neghess;
4876        }
4877
4878        let mut hessian = xt_diag_x_dense(x_eff.view(), obs.neghessian_eta.view())?;
4879        let mut gradient = fast_atv(&x_eff, &obs.score);
4880        for term in &self.bounded_terms {
4881            let score_beta = self.design.column(term.col_idx).dot(&obs.score);
4882            hessian[[term.col_idx, term.col_idx]] -= score_beta * second_diag[term.col_idx];
4883        }
4884        hessian += &prior_neghess;
4885        gradient += &priorgrad;
4886
4887        Ok((
4888            obs,
4889            hessian,
4890            gradient,
4891            prior_loglik,
4892            second_diag,
4893            third_diag,
4894            priorthird,
4895        ))
4896    }
4897
4898    fn evaluation_from_latent(
4899        &self,
4900        latent_beta: &Array1<f64>,
4901    ) -> Result<
4902        (
4903            StandardFamilyObservationState,
4904            Array2<f64>,
4905            Array1<f64>,
4906            f64,
4907        ),
4908        String,
4909    > {
4910        let (obs, hessian, gradient, prior_loglik, _, _, _) =
4911            self.exacthessian_andgradient(latent_beta)?;
4912        Ok((obs, hessian, gradient, prior_loglik))
4913    }
4914}
4915
4916impl CustomFamily for BoundedLinearFamily {
4917    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
4918    // flat-prior exact-Newton objective carries no Jeffreys term), so families
4919    // that historically armed the term by default opt back in explicitly.
4920    fn joint_jeffreys_term_required(&self) -> bool {
4921        true
4922    }
4923
4924    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
4925        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4926        let (obs, hessian, gradient, prior_loglik) = self.evaluation_from_latent(latent_beta)?;
4927        Ok(FamilyEvaluation {
4928            log_likelihood: obs.log_likelihood + prior_loglik,
4929            blockworking_sets: vec![BlockWorkingSet::ExactNewton {
4930                gradient,
4931                hessian: SymmetricMatrix::Dense(hessian),
4932            }],
4933        })
4934    }
4935
4936    fn exact_newton_joint_hessian(
4937        &self,
4938        block_states: &[ParameterBlockState],
4939    ) -> Result<Option<Array2<f64>>, String> {
4940        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4941        let (_, hessian, _, _) = self.evaluation_from_latent(latent_beta)?;
4942        Ok(Some(hessian))
4943    }
4944
4945    fn exact_newton_hessian_directional_derivative(
4946        &self,
4947        block_states: &[ParameterBlockState],
4948        block_idx: usize,
4949        d_beta: &Array1<f64>,
4950    ) -> Result<Option<Array2<f64>>, String> {
4951        expect_block_idx_zero(block_idx, "bounded linear family", "")?;
4952        self.exact_newton_joint_hessian_directional_derivative(block_states, d_beta)
4953    }
4954
4955    fn exact_newton_joint_hessian_directional_derivative(
4956        &self,
4957        block_states: &[ParameterBlockState],
4958        d_beta_flat: &Array1<f64>,
4959    ) -> Result<Option<Array2<f64>>, String> {
4960        let latent_beta = &expect_single_block_state(block_states, "bounded linear family")?.beta;
4961        if d_beta_flat.len() != latent_beta.len() {
4962            return Err(SmoothError::dimension_mismatch(format!(
4963                "bounded linear family directional derivative length mismatch: got {}, expected {}",
4964                d_beta_flat.len(),
4965                latent_beta.len()
4966            ))
4967            .into());
4968        }
4969
4970        let (obs, _, _, _, second_diag, third_diag, priorthird) =
4971            self.exacthessian_andgradient(latent_beta)?;
4972
4973        let (_, jac_diag, _, _, _) = self.bounded_term_derivative_data(latent_beta);
4974        let x_eff = self.effective_design_for_latent(&jac_diag);
4975        let deta = x_eff.dot(d_beta_flat);
4976        let d_neghess_eta = &obs.neghessian_eta_derivative * &deta;
4977
4978        let mut dx_eff = Array2::<f64>::zeros(x_eff.raw_dim());
4979        for term in &self.bounded_terms {
4980            let scale = second_diag[term.col_idx] * d_beta_flat[term.col_idx];
4981            if scale != 0.0 {
4982                let mut col = dx_eff.column_mut(term.col_idx);
4983                col.assign(&self.design.column(term.col_idx));
4984                col.mapv_inplace(|v| v * scale);
4985            }
4986        }
4987
4988        let mut dhessian = xt_diag_x_dense(x_eff.view(), d_neghess_eta.view())?;
4989        let mut wxdx = Array2::<f64>::zeros((x_eff.ncols(), x_eff.ncols()));
4990        for i in 0..x_eff.nrows() {
4991            let wi = obs.neghessian_eta[i];
4992            if wi == 0.0 {
4993                continue;
4994            }
4995            for a in 0..x_eff.ncols() {
4996                let xa = x_eff[[i, a]];
4997                for b in 0..x_eff.ncols() {
4998                    wxdx[[a, b]] += wi * (dx_eff[[i, a]] * x_eff[[i, b]] + xa * dx_eff[[i, b]]);
4999                }
5000            }
5001        }
5002        dhessian += &wxdx;
5003
5004        let d_score = -&obs.neghessian_eta * &deta;
5005        for term in &self.bounded_terms {
5006            let score_beta = self.design.column(term.col_idx).dot(&obs.score);
5007            let d_score_beta = self.design.column(term.col_idx).dot(&d_score);
5008            dhessian[[term.col_idx, term.col_idx]] -= d_score_beta * second_diag[term.col_idx]
5009                + score_beta * third_diag[term.col_idx] * d_beta_flat[term.col_idx];
5010            dhessian[[term.col_idx, term.col_idx]] +=
5011                priorthird[term.col_idx] * d_beta_flat[term.col_idx];
5012        }
5013
5014        Ok(Some(dhessian))
5015    }
5016
5017    fn block_geometry(
5018        &self,
5019        block_states: &[ParameterBlockState],
5020        spec: &ParameterBlockSpec,
5021    ) -> Result<(DesignMatrix, Array1<f64>), String> {
5022        if block_states.is_empty() {
5023            return Ok((
5024                DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(
5025                    self.designzeroed.clone(),
5026                )),
5027                self.offset.clone(),
5028            ));
5029        }
5030        let offset = self.nonlinear_offset_from_latent(
5031            &expect_single_block_state(block_states, "bounded linear family")?.beta,
5032        );
5033        let x = if spec.design.ncols() == self.designzeroed.ncols() {
5034            self.designzeroed.clone()
5035        } else {
5036            return Err(SmoothError::dimension_mismatch(
5037                "bounded linear family design column mismatch",
5038            )
5039            .into());
5040        };
5041        Ok((
5042            DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(x)),
5043            offset,
5044        ))
5045    }
5046
5047    fn block_geometry_is_dynamic(&self) -> bool {
5048        true
5049    }
5050
5051    fn block_geometry_directional_derivative(
5052        &self,
5053        block_states: &[ParameterBlockState],
5054        block_idx: usize,
5055        spec: &ParameterBlockSpec,
5056        d_beta: &Array1<f64>,
5057    ) -> Result<Option<BlockGeometryDirectionalDerivative>, String> {
5058        expect_block_idx_zero(
5059            block_idx,
5060            "bounded linear family",
5061            " for geometry derivative",
5062        )?;
5063        expect_single_block_state(block_states, "bounded linear family")?;
5064        if d_beta.len() != spec.design.ncols() {
5065            return Err(SmoothError::dimension_mismatch(format!(
5066                "bounded linear family geometry derivative direction mismatch: got {}, expected {}",
5067                d_beta.len(),
5068                spec.design.ncols()
5069            ))
5070            .into());
5071        }
5072        let (_, jac_diag, _, _, _) = self.bounded_term_derivative_data(&block_states[0].beta);
5073        let mut d_offset = Array1::<f64>::zeros(self.offset.len());
5074        let has_drift = self
5075            .bounded_terms
5076            .iter()
5077            .any(|term| jac_diag[term.col_idx] != 0.0 && d_beta[term.col_idx] != 0.0);
5078        if !has_drift {
5079            return Ok(Some(BlockGeometryDirectionalDerivative {
5080                d_design: None,
5081                d_offset,
5082            }));
5083        }
5084        for term in &self.bounded_terms {
5085            let col = term.col_idx;
5086            let drift = jac_diag[col] * d_beta[col];
5087            if drift != 0.0 {
5088                d_offset.scaled_add(drift, &self.design.column(col));
5089            }
5090        }
5091        Ok(Some(BlockGeometryDirectionalDerivative {
5092            d_design: None,
5093            d_offset,
5094        }))
5095    }
5096}
5097
5098#[inline]
5099fn dense_diag_gram_chunkrows(p: usize) -> usize {
5100    const MIN_ROWS: usize = 512;
5101    const MAX_ROWS: usize = 2048;
5102    const TARGET_BYTES: usize = 2 * 1024 * 1024;
5103    let bytes_per_row = p.max(1) * std::mem::size_of::<f64>();
5104    (TARGET_BYTES / bytes_per_row).clamp(MIN_ROWS, MAX_ROWS)
5105}
5106
5107fn xt_diag_x_dense(x: ArrayView2<'_, f64>, w: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
5108    if x.nrows() != w.len() {
5109        return Err(SmoothError::dimension_mismatch("xt_diag_x_dense row mismatch").into());
5110    }
5111    let (n, p) = x.dim();
5112    if n == 0 || p == 0 {
5113        return Ok(Array2::<f64>::zeros((p, p)));
5114    }
5115
5116    const STREAMING_BYTES_THRESHOLD: usize = 8 * 1024 * 1024;
5117    let dense_work_bytes = n
5118        .checked_mul(p)
5119        .and_then(|cells| cells.checked_mul(std::mem::size_of::<f64>()))
5120        .unwrap_or(usize::MAX);
5121    if dense_work_bytes <= STREAMING_BYTES_THRESHOLD {
5122        let mut weighted = x.to_owned();
5123        ndarray::Zip::from(weighted.rows_mut())
5124            .and(w)
5125            .par_for_each(|mut row, wi| row *= *wi);
5126        return Ok(fast_atb(&x, &weighted));
5127    }
5128
5129    let chunkrows = dense_diag_gram_chunkrows(p).min(n);
5130    let mut weighted_chunk = Array2::<f64>::zeros((chunkrows, p));
5131    let mut out = Array2::<f64>::zeros((p, p));
5132    for row_start in (0..n).step_by(chunkrows) {
5133        let rows = (n - row_start).min(chunkrows);
5134        let x_chunk = x.slice(s![row_start..row_start + rows, ..]);
5135        {
5136            let mut chunk = weighted_chunk.slice_mut(s![0..rows, ..]);
5137            for local_row in 0..rows {
5138                let scale = w[row_start + local_row];
5139                if scale == 0.0 {
5140                    chunk.row_mut(local_row).fill(0.0);
5141                    continue;
5142                }
5143                for col in 0..p {
5144                    chunk[[local_row, col]] = x_chunk[[local_row, col]] * scale;
5145                }
5146            }
5147        }
5148        out += &fast_atb(&x_chunk, &weighted_chunk.slice(s![0..rows, ..]));
5149    }
5150    Ok(out)
5151}
5152
5153fn trace_of_dense_product(a: &Array2<f64>, b: &Array2<f64>) -> Result<f64, String> {
5154    if a.nrows() != a.ncols() || b.nrows() != b.ncols() || a.nrows() != b.nrows() {
5155        return Err(
5156            SmoothError::dimension_mismatch("trace_of_dense_product dimension mismatch").into(),
5157        );
5158    }
5159    let mut trace = 0.0;
5160    for i in 0..a.nrows() {
5161        for j in 0..a.ncols() {
5162            trace += a[[i, j]] * b[[j, i]];
5163        }
5164    }
5165    Ok(trace)
5166}
5167
5168fn exact_bounded_edf(
5169    penalties: &[PenaltySpec],
5170    lambdas: &Array1<f64>,
5171    latent_cov: &Array2<f64>,
5172) -> Result<(Vec<f64>, Vec<f64>, f64), EstimationError> {
5173    if penalties.len() != lambdas.len() {
5174        crate::bail_invalid_estim!(
5175            "bounded EDF penalty/lambda mismatch: {} penalties vs {} lambdas",
5176            penalties.len(),
5177            lambdas.len()
5178        );
5179    }
5180    if latent_cov.nrows() != latent_cov.ncols() {
5181        crate::bail_invalid_estim!("bounded EDF covariance must be square");
5182    }
5183
5184    let p = latent_cov.nrows();
5185    let mut s_lambda = Array2::<f64>::zeros((p, p));
5186    let mut edf_by_block = Vec::with_capacity(penalties.len());
5187    // Raw per-block penalty trace tr_kk = λ_kk·tr(H⁻¹S_kk) (issue #1219).
5188    let mut penalty_block_trace = Vec::with_capacity(penalties.len());
5189    let mut trace_sum = 0.0;
5190
5191    for (k, ps) in penalties.iter().enumerate() {
5192        let lambda_k = lambdas[k];
5193        match ps {
5194            PenaltySpec::Block {
5195                local, col_range, ..
5196            } => {
5197                s_lambda
5198                    .slice_mut(ndarray::s![col_range.clone(), col_range.clone()])
5199                    .scaled_add(lambda_k, local);
5200                // Compute penalty rank from the block-local matrix directly.
5201                let penalty_rank =
5202                    local
5203                        .nrows()
5204                        .saturating_sub(estimate_penalty_nullity(local).map_err(|e| {
5205                            EstimationError::InvalidInput(format!("bounded EDF rank failed: {e}"))
5206                        })?);
5207                // Trace only involves the block slice of latent_cov.
5208                let cov_block = latent_cov.slice(ndarray::s![col_range.clone(), col_range.clone()]);
5209                let trace_k = lambda_k
5210                    * trace_of_dense_product(&cov_block.to_owned(), local)
5211                        .map_err(EstimationError::InvalidInput)?;
5212                trace_sum += trace_k;
5213                penalty_block_trace.push(trace_k);
5214                let p_k = penalty_rank as f64;
5215                edf_by_block.push((p_k - trace_k).clamp(0.0, p_k));
5216            }
5217            PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5218                s_lambda.scaled_add(lambda_k, m);
5219                let penalty_rank = p.saturating_sub(estimate_penalty_nullity(m).map_err(|e| {
5220                    EstimationError::InvalidInput(format!("bounded EDF rank failed: {e}"))
5221                })?);
5222                let trace_k = lambda_k
5223                    * trace_of_dense_product(latent_cov, m)
5224                        .map_err(EstimationError::InvalidInput)?;
5225                trace_sum += trace_k;
5226                penalty_block_trace.push(trace_k);
5227                let p_k = penalty_rank as f64;
5228                edf_by_block.push((p_k - trace_k).clamp(0.0, p_k));
5229            }
5230        }
5231    }
5232
5233    let nullity_total = estimate_penalty_nullity(&s_lambda)
5234        .map_err(|e| EstimationError::InvalidInput(format!("bounded EDF nullity failed: {e}")))?
5235        as f64;
5236    let edf_total = (p as f64 - trace_sum).clamp(nullity_total, p as f64);
5237    Ok((edf_by_block, penalty_block_trace, edf_total))
5238}
5239
5240/// Symmetric posterior-precision inverse for the bounded-coefficient path.
5241///
5242/// The penalised Hessian at a strict posterior maximum is SPD, so its inverse
5243/// is the posterior covariance. We eigendecompose the symmetric precision and
5244/// invert the positive-eigenvalue subspace, projecting out the (rare)
5245/// structural null directions a penalised model leaves flat rather than
5246/// δ-ridging them — the same honest pseudo-inverse contract the strict
5247/// pseudo-Laplace covariance uses (gam#748). A genuinely indefinite precision
5248/// (a negative eigenvalue beyond rounding) means the reported mode is not a
5249/// posterior maximum and is surfaced as a fit-quality error rather than
5250/// masked.
5251fn symmetric_positive_definite_inverse_or_pseudo(
5252    precision: &Array2<f64>,
5253) -> Result<Array2<f64>, EstimationError> {
5254    use gam_linalg::faer_ndarray::FaerEigh;
5255    let p = precision.nrows();
5256    if precision.ncols() != p {
5257        crate::bail_invalid_estim!(
5258            "posterior precision inverse requires a square matrix, got {}x{}",
5259            precision.nrows(),
5260            precision.ncols()
5261        );
5262    }
5263    if p == 0 {
5264        return Ok(Array2::<f64>::zeros((0, 0)));
5265    }
5266    let symmetric = (precision + &precision.t().to_owned()) * 0.5;
5267    let (evals, evecs) = symmetric.eigh(faer::Side::Lower).map_err(|e| {
5268        EstimationError::InvalidInput(format!(
5269            "posterior precision eigendecomposition failed: {e}"
5270        ))
5271    })?;
5272    let max_abs_eval = evals.iter().fold(0.0_f64, |acc, &ev| acc.max(ev.abs()));
5273    let tol =
5274        (10.0 * f64::EPSILON * (p as f64) * (p as f64) * max_abs_eval).max(100.0 * f64::EPSILON);
5275    if let Some(&min_eval) = evals
5276        .iter()
5277        .filter(|&&ev| ev < -tol)
5278        .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
5279    {
5280        crate::bail_invalid_estim!(
5281            "bounded posterior precision is non-PD at the converged optimum (min eigenvalue \
5282             {min_eval:.6e} < -tol={tol:.6e}); the reported mode is not a strict posterior \
5283             maximum, so a covariance would be meaningless"
5284        );
5285    }
5286    // Σ = U diag(1/λ_+) Uᵀ over the positive-eigenvalue subspace.
5287    let mut scaled = evecs.clone();
5288    for (j, &ev) in evals.iter().enumerate() {
5289        let inv = if ev > tol { 1.0 / ev } else { 0.0 };
5290        scaled.column_mut(j).mapv_inplace(|v| v * inv);
5291    }
5292    let cov = scaled.dot(&evecs.t());
5293    Ok((&cov + &cov.t().to_owned()) * 0.5)
5294}
5295
5296fn transform_bounded_latent_precision_to_user_internal(
5297    latent_precision: &Array2<f64>,
5298    jac_diag: &Array1<f64>,
5299) -> Result<Array2<f64>, EstimationError> {
5300    let p = latent_precision.nrows();
5301    if latent_precision.ncols() != p || jac_diag.len() != p {
5302        crate::bail_invalid_estim!(
5303            "bounded precision transform dimension mismatch: precision is {}x{}, jacobian has {} entries",
5304            latent_precision.nrows(),
5305            latent_precision.ncols(),
5306            jac_diag.len()
5307        );
5308    }
5309    let mut out = latent_precision.clone();
5310    for i in 0..p {
5311        let scale = jac_diag[i];
5312        if !scale.is_finite() || scale <= 0.0 {
5313            crate::bail_invalid_estim!(
5314                "bounded precision transform requires a positive finite coefficient jacobian; column {i} has {scale}"
5315            );
5316        }
5317        if scale != 1.0 {
5318            out.row_mut(i).mapv_inplace(|v| v / scale);
5319            out.column_mut(i).mapv_inplace(|v| v / scale);
5320        }
5321    }
5322    Ok(out)
5323}
5324
5325fn fit_bounded_term_collection_with_design(
5326    y: ArrayView1<'_, f64>,
5327    weights: ArrayView1<'_, f64>,
5328    offset: ArrayView1<'_, f64>,
5329    spec: &TermCollectionSpec,
5330    design: &TermCollectionDesign,
5331    heuristic_lambdas: Option<&[f64]>,
5332    family: LikelihoodSpec,
5333    options: &FitOptions,
5334) -> Result<FittedTermCollection, EstimationError> {
5335    let conditioning_cols: Vec<usize> = spec
5336        .linear_terms
5337        .iter()
5338        .enumerate()
5339        .filter_map(|(j, linear)| {
5340            (!linear.double_penalty).then_some(design.intercept_range.end + j)
5341        })
5342        .collect();
5343    let conditioning = LinearFitConditioning::from_columns(design, &conditioning_cols);
5344    let dense_design = design.design.to_dense_cow();
5345    let fit_design = conditioning.apply_to_design(&dense_design);
5346    let fit_penalties = conditioning
5347        .transform_blockwise_penalties_to_internal(&design.penalties, design.design.ncols());
5348    if design.linear_constraints.is_some() {
5349        crate::bail_invalid_estim!(
5350            "bounded() terms are not yet compatible with explicit linear constraints"
5351        );
5352    }
5353    let mut bounded_terms = Vec::<BoundedLinearTermMeta>::new();
5354    for (j, term) in spec.linear_terms.iter().enumerate() {
5355        if term.double_penalty
5356            && matches!(
5357                term.coefficient_geometry,
5358                LinearCoefficientGeometry::Bounded { .. }
5359            )
5360        {
5361            crate::bail_invalid_estim!(
5362                "bounded linear term '{}' cannot also use double_penalty",
5363                term.name
5364            );
5365        }
5366        if let LinearCoefficientGeometry::Bounded { min, max, prior } =
5367            term.coefficient_geometry.clone()
5368        {
5369            let col_idx = design.intercept_range.end + j;
5370            let (min_internal, max_internal) = conditioning.internal_bounds_for(col_idx, min, max);
5371            bounded_terms.push(BoundedLinearTermMeta {
5372                col_idx,
5373                min: min_internal,
5374                max: max_internal,
5375                prior,
5376            });
5377        }
5378    }
5379    if bounded_terms.is_empty() {
5380        crate::bail_invalid_estim!("internal bounded fit path called with no bounded terms");
5381    }
5382
5383    let mut designzeroed = fit_design.clone();
5384    let mut initial_beta = Array1::<f64>::zeros(fit_design.ncols());
5385    for term in &bounded_terms {
5386        designzeroed.column_mut(term.col_idx).fill(0.0);
5387        initial_beta[term.col_idx] = bounded_logit(0.5);
5388    }
5389
5390    let initial_log_lambdas = heuristic_lambdas
5391        .map(|vals| Array1::from_vec(vals.to_vec()))
5392        .unwrap_or_else(|| Array1::zeros(fit_penalties.len()));
5393    if initial_log_lambdas.len() != fit_penalties.len() {
5394        crate::bail_invalid_estim!(
5395            "heuristic lambda length mismatch for bounded model: got {}, expected {}",
5396            initial_log_lambdas.len(),
5397            fit_penalties.len()
5398        );
5399    }
5400
5401    let is_beta_logistic = family.is_binomial_beta_logistic();
5402    let family_adapter = BoundedLinearFamily {
5403        family: family.clone(),
5404        latent_cloglog_state: options.latent_cloglog,
5405        mixture_link_state: options
5406            .mixture_link
5407            .clone()
5408            .as_ref()
5409            .map(state_fromspec)
5410            .transpose()
5411            .map_err(EstimationError::InvalidInput)?,
5412        sas_link_state: options
5413            .sas_link
5414            .map(|spec| {
5415                if is_beta_logistic {
5416                    state_from_beta_logisticspec(spec)
5417                } else {
5418                    state_from_sasspec(spec)
5419                }
5420            })
5421            .transpose()
5422            .map_err(EstimationError::InvalidInput)?,
5423        y: y.to_owned(),
5424        weights: weights.to_owned(),
5425        design: fit_design.clone(),
5426        designzeroed: designzeroed.clone(),
5427        offset: offset.to_owned(),
5428        bounded_terms: bounded_terms.clone(),
5429    };
5430    let blockspec = ParameterBlockSpec {
5431        name: "eta".to_string(),
5432        design: DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(designzeroed)),
5433        offset: offset.to_owned(),
5434        penalties: fit_penalties
5435            .iter()
5436            .map(|ps| match ps {
5437                PenaltySpec::Block {
5438                    local, col_range, ..
5439                } => PenaltyMatrix::Blockwise {
5440                    local: local.clone(),
5441                    col_range: col_range.clone(),
5442                    total_dim: design.design.ncols(),
5443                },
5444                PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5445                    PenaltyMatrix::Dense(m.clone())
5446                }
5447            })
5448            .collect(),
5449        nullspace_dims: design.nullspace_dims.clone(),
5450        initial_log_lambdas,
5451        initial_beta: Some(initial_beta),
5452        gauge_priority: 100,
5453        // Report the true β-dependent Jacobian (bounded columns scaled by
5454        // dβ/dθ) to the identifiability audit so it does not mistake the
5455        // deliberately-zeroed placeholder columns for a structural rank
5456        // deficiency. The inner solve still drives η through the family
5457        // adapter, so this does not affect the fit geometry.
5458        jacobian_callback: Some(Arc::new(BoundedEffectiveJacobian {
5459            design: fit_design.clone(),
5460            bounded_terms: bounded_terms.clone(),
5461        })),
5462        stacked_design: None,
5463        stacked_offset: None,
5464    };
5465    let fit = fit_custom_family(
5466        &family_adapter,
5467        &[blockspec],
5468        &BlockwiseFitOptions {
5469            inner_max_cycles: options.max_iter,
5470            inner_tol: options.tol,
5471            outer_max_iter: options.max_iter,
5472            outer_tol: options.tol,
5473            // The bounded path builds its own user-scale covariance below by
5474            // inverting the user-scale penalised Hessian (delta-method through
5475            // the bounded transform's Jacobian + the conditioning map), so it
5476            // does not consume the inner solver's optional canonical-space
5477            // `covariance_conditional`. Inverting the reported precision
5478            // directly guarantees `inv(penalized_hessian) == covariance` and
5479            // works on every bounded fit — including the common no-smoothing
5480            // path where the inner solve surfaces no covariance at all (the
5481            // gam#854 "bounded fit emits no user-scale covariance" symptom).
5482            compute_covariance: false,
5483            ..BlockwiseFitOptions::default()
5484        },
5485    )
5486    .map_err(EstimationError::CustomFamily)?;
5487
5488    let latent_beta = fit.block_states[0].beta.clone();
5489    let (beta_user_internal, jac_diag) = family_adapter.user_beta_and_jacobian(&latent_beta);
5490    let beta_user = conditioning.backtransform_beta(&beta_user_internal);
5491
5492    let (eta_state, h_data, _, _) = family_adapter
5493        .evaluation_from_latent(&latent_beta)
5494        .map_err(EstimationError::InvalidInput)?;
5495    let p_fit = fit_design.ncols();
5496    let mut s_lambda_internal = Array2::<f64>::zeros((p_fit, p_fit));
5497    for (k, penalty) in fit_penalties.iter().enumerate() {
5498        match penalty {
5499            PenaltySpec::Block {
5500                local, col_range, ..
5501            } => {
5502                s_lambda_internal
5503                    .slice_mut(ndarray::s![col_range.clone(), col_range.clone()])
5504                    .scaled_add(fit.lambdas[k], local);
5505            }
5506            PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
5507                s_lambda_internal.scaled_add(fit.lambdas[k], m);
5508            }
5509        }
5510    }
5511    let mut latent_precision = h_data.clone();
5512    latent_precision += &s_lambda_internal;
5513    let user_precision_internal =
5514        transform_bounded_latent_precision_to_user_internal(&latent_precision, &jac_diag)?;
5515    let penalized_hessian =
5516        conditioning.transform_penalized_hessian_to_original(&user_precision_internal);
5517
5518    // User-scale posterior covariance via the delta method. The reported
5519    // geometry precision `penalized_hessian` is the user-scale penalized
5520    // Hessian `H_user = C⁻ᵀ J⁻¹ (H_latent + S_λ) J⁻¹ C⁻¹` (latent precision
5521    // pushed through the bounded transform's Jacobian `J = diag(dβ_user/dθ)`
5522    // and the conditioning map `C`). Its exact inverse `H_user⁻¹` is the
5523    // delta-method pushforward of the latent posterior precision-inverse
5524    // `(H_latent + S_λ)⁻¹` — but on the UNSCALED (unit-dispersion) scale. For a
5525    // free-dispersion family (profiled Gaussian) the reported coefficient
5526    // covariance is `Vb = φ̂ · H_user⁻¹` with `φ̂ = σ̂²`, so the unscaled inverse
5527    // below is multiplied by the dispersion scale `cov_scale` once `σ̂²` is
5528    // known (after the EDF, which sets the residual d.f.). For fixed-scale
5529    // families (Binomial, `φ ≡ 1`) `cov_scale == 1` and `Vb = H_user⁻¹`
5530    // unchanged. Skipping this scale was gam#1514: an interior, well-identified
5531    // Gaussian bounded slope reported an SE ≈ 1/√Σ(xᵢ−x̄)² instead of
5532    // σ̂/√Σ(xᵢ−x̄)², i.e. ~`1/σ̂` (≈20×) too wide.
5533    //
5534    // Inverting the same matrix the geometry reports keeps
5535    // `inv(penalized_hessian) == cov_scale⁻¹ · covariance` and removes the
5536    // dependency on the inner solver's optional, canonical-space
5537    // `covariance_conditional` (which is `None` whenever the bounded blockspec
5538    // carries no smoothing parameters — the no-rho fit path — leaving a bounded
5539    // fit with a populated precision but no user-scale covariance, the gam#854
5540    // symptom). The latent precision is SPD at a strict posterior maximum; on a
5541    // marginally-indefinite boundary Hessian we invert the positive-eigenvalue
5542    // subspace (the structural null space of a penalised model is a flat
5543    // posterior direction, not something to ridge away), matching the
5544    // strict-pseudo-Laplace covariance contract (gam#748).
5545    let beta_covariance_unscaled = if options.compute_inference {
5546        Some(symmetric_positive_definite_inverse_or_pseudo(
5547            &penalized_hessian,
5548        )?)
5549    } else {
5550        None
5551    };
5552    // EDF `p − Σ_k λ_k tr(H_latent⁻¹ S_k)` is computed in the *latent*
5553    // (untransformed) coordinate system the penalties `fit_penalties` live in,
5554    // so it needs the latent posterior covariance `(H_latent + S_λ)⁻¹`, not the
5555    // user-scale one. Invert the same latent precision that produced the
5556    // reported user precision so the two are an exact transform pair.
5557    let latent_cov = if options.compute_inference {
5558        Some(symmetric_positive_definite_inverse_or_pseudo(
5559            &latent_precision,
5560        )?)
5561    } else {
5562        None
5563    };
5564    let s_lambda_original = weighted_blockwise_penalty_sum(
5565        &design.penalties,
5566        fit.lambdas.as_slice().unwrap(),
5567        design.design.ncols(),
5568    );
5569    let penalty_term = beta_user.dot(&s_lambda_original.dot(&beta_user));
5570    let deviance = if family.is_gaussian_identity() {
5571        y.iter()
5572            .zip(eta_state.mu.iter())
5573            .zip(weights.iter())
5574            .map(|((&yy, &mu), &w)| w.max(0.0) * (yy - mu) * (yy - mu))
5575            .sum()
5576    } else {
5577        -2.0 * eta_state.log_likelihood
5578    };
5579    let (edf_by_block, penalty_block_trace, edf_total) = if let Some(cov) = latent_cov.as_ref() {
5580        exact_bounded_edf(&fit_penalties, &fit.lambdas, cov)?
5581    } else {
5582        (
5583            vec![0.0; fit_penalties.len()],
5584            vec![0.0; fit_penalties.len()],
5585            0.0,
5586        )
5587    };
5588
5589    // Dispersion. The bounded fit's working weight is scale-free for a profiled
5590    // Gaussian (`W = priorweights`), so the unscaled penalized Hessian carries
5591    // unit implicit dispersion and the reported coefficient covariance must be
5592    // restored to `Vb = σ̂²·H_user⁻¹` with the REML residual variance
5593    // `σ̂² = RSS/(n − edf_total)` — identical to the ordinary GAM path
5594    // (`solver/estimate/optimizer.rs`). Fixed-scale families (Binomial here,
5595    // `φ ≡ 1`) keep their full Fisher information in `W`, so `cov_scale == 1`
5596    // and the covariance is `H_user⁻¹` unscaled. The single source of truth for
5597    // the per-family scale is `GlmLikelihoodSpec::coefficient_covariance_scale`
5598    // / `dispersion_from_likelihood`, reused verbatim so the bounded path can
5599    // never drift from the standard contract (gam#1514).
5600    let glm_likelihood = gam_spec::GlmLikelihoodSpec::canonical(family.clone());
5601    let standard_deviation = if family.is_gaussian_identity() {
5602        let denom = if options.compute_inference {
5603            (y.len() as f64 - edf_total).max(1.0)
5604        } else {
5605            (y.len() as f64).max(1.0)
5606        };
5607        (deviance / denom).sqrt()
5608    } else {
5609        1.0
5610    };
5611    let cov_scale = glm_likelihood
5612        .coefficient_covariance_scale(standard_deviation * standard_deviation)
5613        .max(f64::MIN_POSITIVE);
5614    let dispersion = gam_solve::estimate::dispersion_from_likelihood(&glm_likelihood, standard_deviation);
5615    // Apply the dispersion scale to the unscaled inverse, producing the reported
5616    // `Vb = cov_scale · H_user⁻¹` and its diagonal standard errors. The stored
5617    // `penalized_hessian` stays UNSCALED (`H_user`) per the dispersion-ownership
5618    // contract in `inference::dispersion_cov`; the sampler re-applies `√cov_scale`
5619    // when it reconstructs the latent posterior (see `sample_standard_bounded`).
5620    let beta_covariance = beta_covariance_unscaled.map(|mut cov| {
5621        if cov_scale != 1.0 {
5622            cov.mapv_inplace(|v| v * cov_scale);
5623        }
5624        cov
5625    });
5626    let beta_standard_errors = beta_covariance
5627        .as_ref()
5628        .map(|cov| Array1::from_iter((0..cov.nrows()).map(|i| cov[[i, i]].max(0.0).sqrt())));
5629
5630    let geometry = Some(gam_solve::estimate::FitGeometry {
5631        penalized_hessian: penalized_hessian.clone().into(),
5632        working_weights: eta_state.fisherweight.clone(),
5633        working_response: {
5634            let mut working_response = eta_state.eta.clone();
5635            for i in 0..working_response.len() {
5636                let wi = eta_state.fisherweight[i].max(1e-12);
5637                working_response[i] += eta_state.score[i] / wi;
5638            }
5639            working_response
5640        },
5641    });
5642    let max_abs_eta = eta_state
5643        .eta
5644        .iter()
5645        .fold(0.0_f64, |acc, &v| acc.max(v.abs()));
5646    Ok(FittedTermCollection {
5647        fit: {
5648            let log_lambdas = fit.lambdas.mapv(|v| v.max(1e-300).ln());
5649            let inf = FitInference {
5650                edf_by_block,
5651                penalty_block_trace,
5652                edf_total,
5653                smoothing_correction: None,
5654                // Boundary adapter: `penalized_hessian` storage is now
5655                // `UnscaledPrecision`.
5656                penalized_hessian: penalized_hessian.clone().into(),
5657                working_weights: eta_state.fisherweight.clone(),
5658                working_response: {
5659                    let mut working_response = eta_state.eta.clone();
5660                    for i in 0..working_response.len() {
5661                        let wi = eta_state.fisherweight[i].max(1e-12);
5662                        working_response[i] += eta_state.score[i] / wi;
5663                    }
5664                    working_response
5665                },
5666                reparam_qs: None,
5667                dispersion,
5668                beta_covariance: beta_covariance
5669                    .clone()
5670                    .map(gam_problem::dispersion_cov::PhiScaledCovariance::from),
5671                beta_standard_errors,
5672                beta_covariance_corrected: None,
5673                beta_standard_errors_corrected: None,
5674                beta_covariance_frequentist: None,
5675                coefficient_influence: None,
5676                weighted_gram: None,
5677                bias_correction_beta: None,
5678            };
5679            let covariance_conditional = beta_covariance;
5680            let pirls_status_val = if fit.outer_converged {
5681                gam_solve::pirls::PirlsStatus::Converged
5682            } else {
5683                gam_solve::pirls::PirlsStatus::StalledAtValidMinimum
5684            };
5685            UnifiedFitResult::try_from_parts(UnifiedFitResultParts {
5686                blocks: vec![gam_solve::estimate::FittedBlock {
5687                    beta: beta_user.clone(),
5688                    role: gam_problem::BlockRole::Mean,
5689                    edf: edf_total,
5690                    lambdas: fit.lambdas.clone(),
5691                }],
5692                log_lambdas,
5693                lambdas: fit.lambdas,
5694                likelihood_scale: family.default_scale_metadata(),
5695                likelihood_family: Some(family),
5696                log_likelihood_normalization:
5697                    gam_spec::LogLikelihoodNormalization::UserProvided,
5698                log_likelihood: eta_state.log_likelihood,
5699                deviance,
5700                reml_score: fit.penalized_objective,
5701                stable_penalty_term: penalty_term,
5702                penalized_objective: fit.penalized_objective,
5703                used_device: false,
5704                outer_iterations: fit.outer_iterations,
5705                outer_converged: fit.outer_converged,
5706                outer_gradient_norm: fit.outer_gradient_norm,
5707                standard_deviation,
5708                covariance_conditional,
5709                covariance_corrected: None,
5710                inference: Some(inf),
5711                fitted_link: gam_solve::estimate::FittedLinkState::Standard(None),
5712                geometry,
5713                block_states: Vec::new(),
5714                pirls_status: pirls_status_val,
5715                max_abs_eta,
5716                constraint_kkt: None,
5717                artifacts: gam_solve::estimate::FitArtifacts {
5718                    pirls: None,
5719                    ..Default::default()
5720                },
5721                inner_cycles: 0,
5722            })?
5723        },
5724        design: design.clone(),
5725        adaptive_diagnostics: None,
5726    })
5727}
5728
5729fn enforce_term_constraint_feasibility(
5730    design: &TermCollectionDesign,
5731    fit: &UnifiedFitResult,
5732) -> Result<(), EstimationError> {
5733    // Geometric (per-row-scaled) tolerance, matching the public contract on
5734    // `ACTIVE_SET_PRIMAL_FEASIBILITY_TOL` and the diagnostic that
5735    // `compute_constraint_kkt_diagnostics` exposes via `fit.constraint_kkt`.
5736    // Lower-bound rows are unit-norm (a_i = e_i) so the scale-invariant and
5737    // raw checks coincide there. Linear-inequality rows generally are NOT
5738    // unit-norm — e.g. a B-spline endpoint-derivative clamp at k = 12 carries
5739    // ‖a_i‖ ≈ 38, so a 1e-6 raw residual is only 2.6e-8 in geometric units.
5740    // Holding this gate to raw 1e-7 while the in-solver acceptance gate
5741    // measures geometric 1e-8 is the inconsistency that made well-conditioned
5742    // clamped fits get rejected after they completed cleanly.
5743    /// Raw (unscaled) constraint-residual tolerance for the post-fit feasibility
5744    /// audit; kept loose enough to be consistent with the geometric in-solver
5745    /// acceptance gate on non-unit-norm linear-inequality rows (see comment).
5746    const CONSTRAINT_FEASIBILITY_RAW_TOL: f64 = 1e-7;
5747    let tol = CONSTRAINT_FEASIBILITY_RAW_TOL;
5748    let smooth_start = design
5749        .design
5750        .ncols()
5751        .saturating_sub(design.smooth.total_smooth_cols());
5752    let mut violations: Vec<String> = Vec::new();
5753    for term in &design.smooth.terms {
5754        let gr = (smooth_start + term.coeff_range.start)..(smooth_start + term.coeff_range.end);
5755        let beta_local = fit.beta.slice(s![gr.clone()]).to_owned();
5756        if let Some(lb) = term.lower_bounds_local.as_ref() {
5757            let mut worst = 0.0_f64;
5758            let mut worst_idx = 0usize;
5759            for i in 0..lb.len().min(beta_local.len()) {
5760                if lb[i].is_finite() {
5761                    let viol = (lb[i] - beta_local[i]).max(0.0);
5762                    if viol > worst {
5763                        worst = viol;
5764                        worst_idx = i;
5765                    }
5766                }
5767            }
5768            if worst > tol {
5769                violations.push(format!(
5770                    "term='{}' kind=lower-bound maxviolation={:.3e} coeff_index={}",
5771                    term.name, worst, worst_idx
5772                ));
5773            }
5774        }
5775        if let Some(lin) = term.linear_constraints_local.as_ref() {
5776            let mut worst = 0.0_f64;
5777            let mut worstrow = 0usize;
5778            for i in 0..lin.a.nrows() {
5779                let norm = lin.a.row(i).dot(&lin.a.row(i)).sqrt();
5780                let inv = if norm > 0.0 { 1.0 / norm } else { 0.0 };
5781                let s = (lin.a.row(i).dot(&beta_local) - lin.b[i]) * inv;
5782                let viol = (-s).max(0.0);
5783                if viol > worst {
5784                    worst = viol;
5785                    worstrow = i;
5786                }
5787            }
5788            if worst > tol {
5789                violations.push(format!(
5790                    "term='{}' kind=linear-inequality maxviolation={:.3e} row={}",
5791                    term.name, worst, worstrow
5792                ));
5793            }
5794        }
5795    }
5796
5797    if !violations.is_empty() {
5798        let mut msg = format!(
5799            "constraint violation after fit ({} violating term constraints): {}",
5800            violations.len(),
5801            violations.join(" | ")
5802        );
5803        if let Some(kkt) = fit.constraint_kkt.as_ref() {
5804            msg.push_str(&format!(
5805                "; KKT[primal={:.3e}, dual={:.3e}, comp={:.3e}, stat={:.3e}]",
5806                kkt.primal_feasibility, kkt.dual_feasibility, kkt.complementarity, kkt.stationarity
5807            ));
5808        }
5809        return Err(EstimationError::ParameterConstraintViolation(msg));
5810    }
5811    Ok(())
5812}
5813
5814fn stratified_spatial_subsample(
5815    data: ArrayView2<'_, f64>,
5816    spec: &TermCollectionSpec,
5817    target_size: usize,
5818) -> Vec<usize> {
5819    use rand::SeedableRng;
5820    use rand::rngs::StdRng;
5821    use rand::seq::SliceRandom;
5822
5823    let n = data.nrows();
5824    if n <= target_size {
5825        return (0..n).collect();
5826    }
5827
5828    let spatial_cols: Option<Vec<usize>> =
5829        spec.smooth_terms.iter().find_map(|term| match &term.basis {
5830            SmoothBasisSpec::ThinPlate { feature_cols, .. }
5831            | SmoothBasisSpec::Matern { feature_cols, .. }
5832            | SmoothBasisSpec::Duchon { feature_cols, .. } => {
5833                if !feature_cols.is_empty() {
5834                    Some(feature_cols.clone())
5835                } else {
5836                    None
5837                }
5838            }
5839            _ => None,
5840        });
5841
5842    let cols = match spatial_cols {
5843        Some(c) if !c.is_empty() => c,
5844        _ => {
5845            let mut rng = StdRng::seed_from_u64(spatial_subsample_seed(data, &[], target_size));
5846            let mut indices: Vec<usize> = (0..n).collect();
5847            indices.shuffle(&mut rng);
5848            indices.truncate(target_size);
5849            indices.sort_unstable();
5850            return indices;
5851        }
5852    };
5853    let mut rng = StdRng::seed_from_u64(spatial_subsample_seed(data, &cols, target_size));
5854
5855    let d = cols.len();
5856    let mut mins = vec![f64::INFINITY; d];
5857    let mut maxs = vec![f64::NEG_INFINITY; d];
5858    for i in 0..n {
5859        for (ax, &col) in cols.iter().enumerate() {
5860            let v = data[[i, col]];
5861            if v < mins[ax] {
5862                mins[ax] = v;
5863            }
5864            if v > maxs[ax] {
5865                maxs[ax] = v;
5866            }
5867        }
5868    }
5869
5870    // Aim for roughly this many sampled points per stratification cell so each
5871    // occupied cell can contribute a representative draw without collapsing the
5872    // grid to one point per cell.
5873    const TARGET_POINTS_PER_CELL: usize = 5;
5874    let total_cells_target = (target_size / TARGET_POINTS_PER_CELL).max(1);
5875    let cells_per_axis = ((total_cells_target as f64).powf(1.0 / d as f64)).ceil() as usize;
5876    let cells_per_axis = cells_per_axis.max(1);
5877
5878    let mut cell_members: std::collections::HashMap<Vec<usize>, Vec<usize>> =
5879        std::collections::HashMap::new();
5880    for i in 0..n {
5881        let mut cell_key = Vec::with_capacity(d);
5882        for (ax, &col) in cols.iter().enumerate() {
5883            let range = maxs[ax] - mins[ax];
5884            let cell = if range <= 0.0 {
5885                0
5886            } else {
5887                let frac = (data[[i, col]] - mins[ax]) / range;
5888                (frac * cells_per_axis as f64).floor() as usize
5889            };
5890            cell_key.push(cell.min(cells_per_axis - 1));
5891        }
5892        cell_members.entry(cell_key).or_default().push(i);
5893    }
5894
5895    let mut selected: Vec<usize> = Vec::with_capacity(target_size);
5896    let mut remaining_budget = target_size;
5897    let mut remaining_population = n;
5898
5899    let mut cells: Vec<(Vec<usize>, Vec<usize>)> = cell_members.into_iter().collect();
5900    cells.sort_by(|a, b| a.0.cmp(&b.0));
5901
5902    for (_, members) in &mut cells {
5903        if remaining_budget == 0 {
5904            break;
5905        }
5906        let alloc = ((members.len() as f64 / remaining_population as f64) * remaining_budget as f64)
5907            .round() as usize;
5908        let alloc = alloc.max(1).min(members.len()).min(remaining_budget);
5909        members.shuffle(&mut rng);
5910        selected.extend_from_slice(&members[..alloc]);
5911        remaining_budget = remaining_budget.saturating_sub(alloc);
5912        remaining_population = remaining_population.saturating_sub(members.len());
5913    }
5914
5915    if selected.len() > target_size {
5916        selected.shuffle(&mut rng);
5917        selected.truncate(target_size);
5918    }
5919
5920    selected.sort_unstable();
5921    selected
5922}
5923
5924fn spatial_subsample_seed(
5925    data: ArrayView2<'_, f64>,
5926    spatial_cols: &[usize],
5927    target_size: usize,
5928) -> u64 {
5929    let mut state = 0x5350_4154_4941_4C53_u64;
5930    spatial_seed_mix(&mut state, data.nrows() as u64);
5931    spatial_seed_mix(&mut state, data.ncols() as u64);
5932    spatial_seed_mix(&mut state, target_size as u64);
5933    spatial_seed_mix(&mut state, spatial_cols.len() as u64);
5934    for &col in spatial_cols {
5935        spatial_seed_mix(&mut state, col as u64);
5936    }
5937
5938    if data.nrows() > 0 {
5939        let mid = data.nrows() / 2;
5940        let last = data.nrows() - 1;
5941        for &row in &[0usize, mid, last] {
5942            for &col in spatial_cols {
5943                let value = data[[row, col]];
5944                spatial_seed_mix(&mut state, value.to_bits());
5945            }
5946        }
5947    }
5948    state
5949}
5950
5951#[inline]
5952fn spatial_seed_mix(state: &mut u64, value: u64) {
5953    // Canonical SplitMix64 step over `value + state` (the step adds G itself),
5954    // then an extra rotate-multiply avalanche unique to the spatial seed mix.
5955    let mut s = value.wrapping_add(*state);
5956    let z = gam_linalg::utils::splitmix64(&mut s);
5957    *state ^= z;
5958    *state = (*state).rotate_left(27).wrapping_mul(0x3C79_AC49_2BA7_B653);
5959}
5960
5961fn sampled_rows(data: ArrayView2<'_, f64>, indices: &[usize]) -> Array2<f64> {
5962    let mut sampled = Array2::<f64>::zeros((indices.len(), data.ncols()));
5963    for (new_row, &orig_row) in indices.iter().enumerate() {
5964        sampled.row_mut(new_row).assign(&data.row(orig_row));
5965    }
5966    sampled
5967}
5968
5969fn spatial_term_user_centers(term: &SmoothTermSpec) -> Option<ArrayView2<'_, f64>> {
5970    match spatial_term_center_strategy(term) {
5971        Some(CenterStrategy::UserProvided(centers)) => Some(centers.view()),
5972        _ => None,
5973    }
5974}
5975
5976fn finite_centered_axis_contrasts(values: &[f64], expected_dim: usize) -> Option<Vec<f64>> {
5977    if values.len() != expected_dim || expected_dim <= 1 {
5978        return None;
5979    }
5980    if values.iter().any(|value| !value.is_finite()) {
5981        return None;
5982    }
5983    Some(center_aniso_log_scales(values))
5984}
5985
5986fn blended_pilot_axis_contrasts(
5987    pilot_data: ArrayView2<'_, f64>,
5988    term: &SmoothTermSpec,
5989    centers: ArrayView2<'_, f64>,
5990) -> Option<Vec<f64>> {
5991    let d = centers.ncols();
5992    if d <= 1 {
5993        return None;
5994    }
5995    let center_eta = initial_aniso_contrasts(centers);
5996    let data_eta = standardized_spatial_term_data(pilot_data, term)
5997        .ok()
5998        .and_then(|x| finite_centered_axis_contrasts(&initial_aniso_contrasts(x.view()), d));
5999    let center_eta = finite_centered_axis_contrasts(&center_eta, d)?;
6000    let blended = match data_eta {
6001        Some(data_eta) => center_eta
6002            .iter()
6003            .zip(data_eta.iter())
6004            .map(|(&from_centers, &from_data)| 0.5 * (from_centers + from_data))
6005            .collect::<Vec<_>>(),
6006        None => center_eta,
6007    };
6008    finite_centered_axis_contrasts(&blended, d)
6009}
6010
6011fn apply_pilot_spatial_psi_reseed(
6012    pilot_data: ArrayView2<'_, f64>,
6013    spec: &TermCollectionSpec,
6014    spatial_terms: &[usize],
6015    kappa_options: &SpatialLengthScaleOptimizationOptions,
6016) -> Result<TermCollectionSpec, EstimationError> {
6017    let dims_per_term = spatial_dims_per_term(spec, spatial_terms);
6018    let use_aniso = has_aniso_terms(spec, spatial_terms);
6019    let log_kappa0 = if use_aniso {
6020        SpatialLogKappaCoords::from_length_scales_aniso(spec, spatial_terms, kappa_options)
6021    } else {
6022        SpatialLogKappaCoords::from_length_scales(spec, spatial_terms, kappa_options)
6023    };
6024    let log_kappa0 = log_kappa0.reseed_from_data(pilot_data, spec, spatial_terms, kappa_options);
6025    let log_kappa_lower = if use_aniso {
6026        SpatialLogKappaCoords::lower_bounds_aniso_from_data(
6027            pilot_data,
6028            spec,
6029            spatial_terms,
6030            &dims_per_term,
6031            kappa_options,
6032        )
6033    } else {
6034        SpatialLogKappaCoords::lower_bounds_from_data(
6035            pilot_data,
6036            spec,
6037            spatial_terms,
6038            kappa_options,
6039        )
6040    };
6041    let log_kappa_upper = if use_aniso {
6042        SpatialLogKappaCoords::upper_bounds_aniso_from_data(
6043            pilot_data,
6044            spec,
6045            spatial_terms,
6046            &dims_per_term,
6047            kappa_options,
6048        )
6049    } else {
6050        SpatialLogKappaCoords::upper_bounds_from_data(
6051            pilot_data,
6052            spec,
6053            spatial_terms,
6054            kappa_options,
6055        )
6056    };
6057    log_kappa0
6058        .clamp_to_bounds(&log_kappa_lower, &log_kappa_upper)
6059        .apply_tospec(spec, spatial_terms)
6060}
6061
6062pub(crate) fn apply_spatial_anisotropy_pilot_initializer(
6063    data: ArrayView2<'_, f64>,
6064    spec: &mut TermCollectionSpec,
6065    spatial_terms: &[usize],
6066    target_size: usize,
6067    kappa_options: &SpatialLengthScaleOptimizationOptions,
6068) -> usize {
6069    if target_size == 0 || data.nrows() <= target_size.saturating_mul(2) || spatial_terms.is_empty()
6070    {
6071        return 0;
6072    }
6073    if !has_aniso_terms(spec, spatial_terms) {
6074        return 0;
6075    }
6076    let indices = stratified_spatial_subsample(data, spec, target_size);
6077    let pilot_data = sampled_rows(data, &indices);
6078    let mut working = spec.clone();
6079    let mut updated_terms = 0usize;
6080    const GEOMETRY_UPDATES: usize = 2;
6081
6082    for pass in 0..GEOMETRY_UPDATES {
6083        let planned_terms = match plan_joint_spatial_centers_for_term_blocks(
6084            pilot_data.view(),
6085            &[working.smooth_terms.clone()],
6086        )
6087        .and_then(|mut blocks| {
6088            blocks.pop().ok_or_else(|| {
6089                BasisError::InvalidInput(
6090                    "pilot geometry initializer produced no smooth-term block".to_string(),
6091                )
6092            })
6093        }) {
6094            Ok(terms) => terms,
6095            Err(err) => {
6096                log::warn!(
6097                    "[spatial-kappa] pilot geometry initializer skipped after center planning failed: {err}"
6098                );
6099                return updated_terms;
6100            }
6101        };
6102
6103        for &term_idx in spatial_terms {
6104            let Some(current_eta) = get_spatial_aniso_log_scales(&working, term_idx) else {
6105                continue;
6106            };
6107            let Some(d) = get_spatial_feature_dim(&working, term_idx) else {
6108                continue;
6109            };
6110            if d <= 1 || current_eta.len() != d {
6111                continue;
6112            }
6113            let Some(planned_term) = planned_terms.get(term_idx) else {
6114                continue;
6115            };
6116            let Some(centers) = spatial_term_user_centers(planned_term) else {
6117                continue;
6118            };
6119            let Some(eta) = blended_pilot_axis_contrasts(pilot_data.view(), planned_term, centers)
6120            else {
6121                continue;
6122            };
6123            if set_spatial_aniso_log_scales(&mut working, term_idx, eta).is_ok() {
6124                updated_terms += usize::from(pass == 0);
6125            }
6126        }
6127
6128        match apply_pilot_spatial_psi_reseed(
6129            pilot_data.view(),
6130            &working,
6131            spatial_terms,
6132            kappa_options,
6133        ) {
6134            Ok(updated) => {
6135                working = updated;
6136            }
6137            Err(err) => {
6138                log::warn!(
6139                    "[spatial-kappa] pilot geometry ψ reseed skipped after deterministic initializer error: {err}"
6140                );
6141                break;
6142            }
6143        }
6144    }
6145
6146    if updated_terms > 0 {
6147        log::info!(
6148            "[spatial-kappa] initialized anisotropy from {}-row pilot geometry for {} spatial term(s); proceeding to full-data optimization",
6149            indices.len(),
6150            updated_terms
6151        );
6152        *spec = working;
6153    }
6154    updated_terms
6155}
6156
6157pub(crate) fn spatial_length_scale_term_indices(spec: &TermCollectionSpec) -> Vec<usize> {
6158    spec.smooth_terms
6159        .iter()
6160        .enumerate()
6161        .filter_map(|(idx, _)| spatial_term_supports_hyper_optimization(spec, idx).then_some(idx))
6162        .collect()
6163}
6164
6165/// Returns `true` when every spatial term in `spec` has a locked kernel
6166/// scale (explicit `length_scale=X` without anisotropy) and therefore
6167/// contributes no outer ψ/κ optimization axis. Empty term collections
6168/// also return `true` — there are no kappas to optimize.
6169///
6170/// Used by family entry points that want to honor a user-supplied scalar
6171/// length scale exactly: when all spatial terms are locked the n-block
6172/// joint-spatial outer solver has nothing to optimize, and routing
6173/// through it merely spends ~80 outer iters chasing a stalled ARC at the
6174/// user's chosen ρ. Skipping straight to the rho-only path avoids that
6175/// waste and respects the user's explicit kernel-scale input.
6176fn fit_score(fit: &UnifiedFitResult) -> f64 {
6177    if fit.reml_score.is_finite() {
6178        return fit.reml_score;
6179    }
6180    let score = 0.5 * fit.deviance + 0.5 * fit.stable_penalty_term;
6181    if score.is_finite() {
6182        score
6183    } else {
6184        f64::INFINITY
6185    }
6186}
6187
6188/// Classify an outer-evaluation error as a *recoverable trial-point
6189/// infeasibility* versus a genuine fatal failure.
6190///
6191/// The spatial-κ / anisotropy outer optimizer probes a sequence of trial
6192/// hyperparameters. At an extreme trial point the realized kernel design or
6193/// its ψ-derivatives may simply be non-constructible — e.g. a learned
6194/// per-axis log-scale stretches the anisotropic distance `r = |Λh|` until the
6195/// Duchon polyharmonic blocks `r^(2m−d)` overflow, or a degenerate metric
6196/// collapses two centers onto a non-C² collision. Those points lie outside
6197/// the model's feasible domain; the principled response is to treat them like
6198/// the cost-only path already does (objective `+∞`) so the line-search /
6199/// trust-region solver retreats, rather than aborting the entire REML fit.
6200///
6201/// A `BasisError` is exactly this class: it means "the basis/design cannot be
6202/// built at this hyperparameter". Everything else (PIRLS divergence wired to a
6203/// distinct variant, layout/topology invariants, over-parameterization) stays
6204/// fatal so genuine bugs are never masked.
6205fn is_recoverable_trial_point_error(err: &EstimationError) -> bool {
6206    matches!(err, EstimationError::BasisError(_))
6207}
6208
6209fn require_successful_spatial_optimization_result<T>(
6210    initial_score: f64,
6211    result: Result<Option<(T, f64)>, EstimationError>,
6212) -> Result<T, EstimationError> {
6213    match result {
6214        Ok(Some((value, exact_score))) => {
6215            // Allow rounding-level worsening: REML scores accumulate
6216            // log-determinant terms whose finite-precision re-evaluation
6217            // can drift well past 1e-10 absolute near a converged optimum
6218            // (we have seen ~1e-6 between two evaluations whose printed
6219            // values round to identical 6-digit scientific). Reject genuine
6220            // worsenings (>1 unit) but admit anything within ~1e-6
6221            // absolute / 1e-8 relative — meaningful REML gains are
6222            // orders of magnitude larger.
6223            const SCORE_DRIFT_ABS_TOL: f64 = 1e-6;
6224            const SCORE_DRIFT_REL_TOL: f64 = 1e-8;
6225            let tol = SCORE_DRIFT_ABS_TOL.max(initial_score.abs() * SCORE_DRIFT_REL_TOL);
6226            if exact_score <= initial_score + tol {
6227                Ok(value)
6228            } else {
6229                Err(EstimationError::RemlOptimizationFailed(format!(
6230                    "spatial kappa optimization made REML score worse ({initial_score:.6e} -> {exact_score:.6e})"
6231                )))
6232            }
6233        }
6234        Ok(None) => Err(EstimationError::RemlOptimizationFailed(
6235            "spatial kappa optimization is unavailable for one or more eligible spatial terms"
6236                .to_string(),
6237        )),
6238        Err(err) => Err(EstimationError::RemlOptimizationFailed(format!(
6239            "spatial kappa optimization failed: {err}"
6240        ))),
6241    }
6242}
6243
6244fn external_opts_for_design(
6245    family: &LikelihoodSpec,
6246    design: &TermCollectionDesign,
6247    options: &FitOptions,
6248) -> ExternalOptimOptions {
6249    ExternalOptimOptions {
6250        family: family.clone(),
6251        latent_cloglog: options.latent_cloglog,
6252        mixture_link: options.mixture_link.clone(),
6253        optimize_mixture: options.optimize_mixture,
6254        sas_link: options.sas_link,
6255        optimize_sas: options.optimize_sas,
6256        compute_inference: options.compute_inference,
6257        skip_rho_posterior_inference: options.skip_rho_posterior_inference,
6258        max_iter: options.max_iter,
6259        tol: options.tol,
6260        nullspace_dims: design.nullspace_dims.clone(),
6261        linear_constraints: design.linear_constraints.clone(),
6262        firth_bias_reduction: Some(options.firth_bias_reduction),
6263        penalty_shrinkage_floor: options.penalty_shrinkage_floor,
6264        rho_prior: options.rho_prior.clone(),
6265        // Propagate Kronecker structure so the joint optimizer minimizes the
6266        // same REML surface as the baseline/refit (adaptive_fit_options_base).
6267        kronecker_penalty_system: design.kronecker_penalty_system(),
6268        kronecker_factored: design
6269            .smooth
6270            .terms
6271            .iter()
6272            .find_map(|t| t.kronecker_factored.clone()),
6273        persist_warm_start_disk: options.persist_warm_start_disk,
6274    }
6275}
6276
6277/// Evaluate the joint REML cost, gradient, and Hessian result at a given θ = [ρ, ψ]
6278/// for a single-block term collection with spatial hyperparameters.
6279///
6280/// This provides a direct evaluation of the profiled REML objective using the
6281/// external-caller interface, which exposes exact cost/gradient/Hessian without
6282/// running the full outer smoothing loop. The returned tuple is
6283/// `(cost, gradient, hessian)` in the joint [ρ, ψ] space.
6284fn evaluate_joint_reml_outer_eval_at_theta(
6285    evaluator: &mut gam_solve::estimate::ExternalJointHyperEvaluator<'_>,
6286    design: &TermCollectionDesign,
6287    theta: &Array1<f64>,
6288    rho_dim: usize,
6289    hyper_dirs: Vec<gam_solve::estimate::reml::DirectionalHyperParam>,
6290    warm_start_beta: Option<ArrayView1<'_, f64>>,
6291    order: gam_solve::rho_optimizer::OuterEvalOrder,
6292    design_revision: Option<u64>,
6293) -> Result<
6294    (
6295        f64,
6296        Array1<f64>,
6297        gam_problem::HessianResult,
6298    ),
6299    EstimationError,
6300> {
6301    evaluator.evaluate_with_order(
6302        &design.design,
6303        &design.penalties,
6304        &design.nullspace_dims,
6305        design.linear_constraints.clone(),
6306        theta,
6307        rho_dim,
6308        hyper_dirs,
6309        warm_start_beta,
6310        "evaluate_joint_reml_outer_eval_at_theta",
6311        order,
6312        design_revision,
6313    )
6314}
6315
6316fn evaluate_joint_reml_efs_at_theta(
6317    evaluator: &mut gam_solve::estimate::ExternalJointHyperEvaluator<'_>,
6318    design: &TermCollectionDesign,
6319    theta: &Array1<f64>,
6320    rho_dim: usize,
6321    hyper_dirs: Vec<gam_solve::estimate::reml::DirectionalHyperParam>,
6322    warm_start_beta: Option<ArrayView1<'_, f64>>,
6323    design_revision: Option<u64>,
6324) -> Result<gam_problem::EfsEval, EstimationError> {
6325    evaluator.evaluate_efs(
6326        &design.design,
6327        &design.penalties,
6328        &design.nullspace_dims,
6329        design.linear_constraints.clone(),
6330        theta,
6331        rho_dim,
6332        hyper_dirs,
6333        warm_start_beta,
6334        "evaluate_joint_reml_efs_at_theta",
6335        design_revision,
6336    )
6337}
6338
6339fn exact_joint_spatial_outer_hessian_available(
6340    family: &LikelihoodSpec,
6341    design: &TermCollectionDesign,
6342) -> bool {
6343    // Every `LikelihoodSpec` variant (Gaussian, Binomial-*, Poisson, Gamma,
6344    // Royston-Parmar) routes through the unified evaluator's outer-Hessian
6345    // path: Gaussian Identity uses the no-correction dense form, all GLM
6346    // variants supply scalar-GLM derivative ingredients consumed by
6347    // `compute_outer_hessian` / `build_outer_hessian_operator`, and the
6348    // (n, p, K) crossover in `prefer_outer_hessian_operator` chooses the
6349    // matrix-free `HessianResult::Operator` representation at large scale
6350    // for dense-lazy designs.  The previous `Identity || sparse_design`
6351    // gate predates that operator routing and forced binomial+logit+Matern
6352    // (and any other non-Gaussian dense-lazy spatial design) onto the
6353    // gradient-only BFGS path even though analytic Hessian is fully
6354    // available — capability check, not cost.  Match every variant
6355    // explicitly so any future family addition (which may not yet provide
6356    // outer-Hessian ingredients) forces an authoring decision here rather
6357    // than silently inheriting `true`.
6358    // Every supported response (Gaussian, Binomial-*, Poisson, Tweedie,
6359    // NegativeBinomial, Beta, Gamma, Royston-Parmar) routes through the
6360    // unified evaluator's outer-Hessian path; the spec-level capability
6361    // check therefore always succeeds. Match every response explicitly so
6362    // any future family addition (which may not yet provide outer-Hessian
6363    // ingredients) forces an authoring decision here rather than silently
6364    // inheriting `true`.
6365    let family_supported = match &family.response {
6366        ResponseFamily::Gaussian
6367        | ResponseFamily::Binomial
6368        | ResponseFamily::Poisson
6369        | ResponseFamily::Tweedie { .. }
6370        | ResponseFamily::NegativeBinomial { .. }
6371        | ResponseFamily::Beta { .. }
6372        | ResponseFamily::Gamma
6373        | ResponseFamily::RoystonParmar => true,
6374    };
6375    // A design with zero columns has no joint outer-Hessian to compute;
6376    // the analytic path is only meaningful for non-empty parameter blocks.
6377    family_supported && design.design.ncols() > 0
6378}
6379
6380fn smooth_term_penalty_index(
6381    spec: &TermCollectionSpec,
6382    design: &TermCollectionDesign,
6383    term_idx: usize,
6384) -> Option<usize> {
6385    if term_idx >= design.smooth.terms.len() || term_idx >= spec.smooth_terms.len() {
6386        return None;
6387    }
6388    if design.smooth.terms[term_idx].penalties_local.is_empty() {
6389        return None;
6390    }
6391    let linear_penalties = spec
6392        .linear_terms
6393        .iter()
6394        .filter(|t| t.double_penalty)
6395        .count()
6396        * 2;
6397    let random_penalties = design
6398        .random_effect_ranges
6399        .iter()
6400        .filter(|(_, range)| !range.is_empty())
6401        .count();
6402    let smooth_offset = linear_penalties + random_penalties;
6403    let local_offset = design
6404        .smooth
6405        .terms
6406        .iter()
6407        .take(term_idx)
6408        .map(|term| term.penalties_local.len())
6409        .sum::<usize>();
6410    Some(smooth_offset + local_offset)
6411}
6412
6413fn try_build_spatial_term_log_kappa_derivativeinfo(
6414    data: ArrayView2<'_, f64>,
6415    resolvedspec: &TermCollectionSpec,
6416    design: &TermCollectionDesign,
6417    term_idx: usize,
6418) -> Result<Option<SpatialPsiDerivative>, EstimationError> {
6419    let Some((
6420        global_range,
6421        total_p,
6422        x_psi_local,
6423        s_psi_local_check,
6424        x_psi_psi_local,
6425        s_psi_psi_local,
6426        s_psi_components_local,
6427        s_psi_psi_components_local,
6428        implicit_operator,
6429    )) = try_build_spatial_term_log_kappa_derivative(data, resolvedspec, design, term_idx)?
6430    else {
6431        return Ok(None);
6432    };
6433    let Some(penalty_start) = smooth_term_penalty_index(resolvedspec, design, term_idx) else {
6434        return Ok(None);
6435    };
6436    if s_psi_components_local.is_empty() || s_psi_psi_components_local.is_empty() {
6437        return Ok(None);
6438    }
6439    if s_psi_components_local.len() != s_psi_psi_components_local.len() {
6440        return Ok(None);
6441    }
6442    let penalty_indices = (0..s_psi_components_local.len())
6443        .map(|j| penalty_start + j)
6444        .collect::<Vec<_>>();
6445    let penalty_index = penalty_indices[0];
6446    if s_psi_local_check.nrows() == 0 || s_psi_psi_local.nrows() == 0 {
6447        return Ok(None);
6448    }
6449    Ok(Some(SpatialPsiDerivative {
6450        penalty_index,
6451        penalty_indices,
6452        global_range,
6453        total_p,
6454        x_psi_local,
6455        s_psi_components_local,
6456        x_psi_psi_local,
6457        s_psi_psi_components_local,
6458        aniso_group_id: None,
6459        aniso_cross_designs: None,
6460        aniso_cross_penalty_provider: None,
6461        implicit_operator,
6462        implicit_axis: 0,
6463    }))
6464}
6465
6466pub(crate) fn try_build_spatial_log_kappa_derivativeinfo_list(
6467    data: ArrayView2<'_, f64>,
6468    resolvedspec: &TermCollectionSpec,
6469    design: &TermCollectionDesign,
6470    spatial_terms: &[usize],
6471) -> Result<Option<Vec<SpatialPsiDerivative>>, EstimationError> {
6472    let mut out = Vec::new();
6473    let mut aniso_gid = 0usize;
6474    for &term_idx in spatial_terms {
6475        if spatial_term_uses_per_axis_psi(resolvedspec, term_idx) {
6476            if let Some(entries) = try_build_spatial_term_log_kappa_aniso_derivativeinfos(
6477                data,
6478                resolvedspec,
6479                design,
6480                term_idx,
6481                aniso_gid,
6482            )? {
6483                aniso_gid += 1;
6484                out.extend(entries);
6485                continue;
6486            } else {
6487                return Ok(None);
6488            }
6489        }
6490        let Some(info) =
6491            try_build_spatial_term_log_kappa_derivativeinfo(data, resolvedspec, design, term_idx)?
6492        else {
6493            return Ok(None);
6494        };
6495        out.push(info);
6496    }
6497    Ok(Some(out))
6498}
6499
6500/// For an aniso term with d axes, produce d `SpatialPsiDerivative` entries.
6501fn try_build_spatial_term_log_kappa_aniso_derivativeinfos(
6502    data: ArrayView2<'_, f64>,
6503    resolvedspec: &TermCollectionSpec,
6504    design: &TermCollectionDesign,
6505    term_idx: usize,
6506    aniso_group_id: usize,
6507) -> Result<Option<Vec<SpatialPsiDerivative>>, EstimationError> {
6508    let Some(smooth_term) = design.smooth.terms.get(term_idx) else {
6509        return Ok(None);
6510    };
6511    let Some(termspec) = resolvedspec.smooth_terms.get(term_idx) else {
6512        return Ok(None);
6513    };
6514    let mut aniso_result = match &termspec.basis {
6515        SmoothBasisSpec::Sphere { .. } => return Ok(None),
6516        SmoothBasisSpec::Matern {
6517            feature_cols,
6518            spec,
6519            input_scales,
6520        } => {
6521            let mut x = select_columns(data, feature_cols).map_err(EstimationError::from)?;
6522            if let Some(s) = input_scales {
6523                apply_input_standardization(&mut x, s);
6524            }
6525            build_matern_basis_log_kappa_aniso_derivatives(x.view(), spec)
6526                .map_err(EstimationError::from)?
6527        }
6528        // Measure-jet: the grouped dial coordinates ride the same per-axis
6529        // carrier. The producer runs on the FROZEN spec (the driver runs
6530        // post-freeze), so per-trial rebuilds move only the dials; the
6531        // coordinate layout, zero design drift, and shared candidate
6532        // normalization are owned by `build_measure_jet_basis_psi_derivatives`.
6533        SmoothBasisSpec::MeasureJet {
6534            feature_cols,
6535            spec,
6536            input_scales,
6537        } => {
6538            let mut x = select_columns(data, feature_cols).map_err(EstimationError::from)?;
6539            if let Some(s) = input_scales {
6540                apply_input_standardization(&mut x, s);
6541            }
6542            build_measure_jet_basis_psi_derivatives(x.view(), spec)
6543                .map_err(EstimationError::from)?
6544        }
6545        _ => return Ok(None),
6546    };
6547    // Get number of axes from the shared operator when available; otherwise
6548    // fall back to the dense design list.
6549    let d = if let Some(ref op) = aniso_result.implicit_operator {
6550        op.n_axes()
6551    } else if !aniso_result.design_first.is_empty() {
6552        aniso_result.design_first.len()
6553    } else {
6554        0
6555    };
6556    if d == 0 {
6557        return Ok(None);
6558    }
6559    let Some(penalty_start) = smooth_term_penalty_index(resolvedspec, design, term_idx) else {
6560        return Ok(None);
6561    };
6562    let p_total = design.design.ncols();
6563    let smooth_start = p_total.saturating_sub(design.smooth.total_smooth_cols());
6564    let global_range = (smooth_start + smooth_term.coeff_range.start)
6565        ..(smooth_start + smooth_term.coeff_range.end);
6566    let num_penalties = aniso_result.penalties_first[0].len();
6567    let penalty_indices: Vec<usize> = (0..num_penalties).map(|j| penalty_start + j).collect();
6568    let penalties_cross_provider = aniso_result.penalties_cross_provider.clone();
6569
6570    // Dense first/diagonal-second matrices may be present even when the shared
6571    // operator is available. The operator remains the canonical source for
6572    // exact cross-axis second derivatives.
6573    let use_implicit_design = aniso_result.design_first.is_empty();
6574    let implicit_op_arc = aniso_result
6575        .implicit_operator
6576        .as_ref()
6577        .map(|op| std::sync::Arc::new(op.clone()));
6578
6579    let mut entries = Vec::with_capacity(d);
6580    for a in 0..d {
6581        let (x_psi_local, x_psi_psi_local) = if use_implicit_design {
6582            // Implicit path: design-derivative matvecs will be dispatched through
6583            // the ImplicitDerivativeOp inside HyperDesignDerivative, so we do NOT
6584            // need to materialize the dense (n x p) matrices here.  Store empty
6585            // placeholders — they are never read when the implicit operator is
6586            // present (spatial_log_kappa_hyper_dirs_frominfo_list uses from_implicit).
6587            (Array2::<f64>::zeros((0, 0)), Array2::<f64>::zeros((0, 0)))
6588        } else {
6589            // Move the dense (n × p) matrices out of aniso_result instead of
6590            // cloning. Each axis index `a` is read exactly once across the
6591            // loop, and aniso_result is dropped at function exit, so leaving
6592            // empty placeholders behind in those vec slots is safe.
6593            let x_first = std::mem::take(&mut aniso_result.design_first[a]);
6594            let x_second = std::mem::take(&mut aniso_result.design_second_diag[a]);
6595            if x_first.ncols() != smooth_term.coeff_range.len() {
6596                return Ok(None);
6597            }
6598            (x_first, x_second)
6599        };
6600        let s_psi_components = std::mem::take(&mut aniso_result.penalties_first[a]);
6601        let s_psi_psi_components = std::mem::take(&mut aniso_result.penalties_second_diag[a]);
6602        // Build cross-design entries for other axes b != a in this group.
6603        // These will be indexed by (b, cross_matrix) where b is the axis
6604        // offset within the d-entry block.
6605        // Cross-axis second derivatives are sourced from the shared operator,
6606        // so we only need placeholder entries to preserve the axis layout.
6607        let cross_designs = if implicit_op_arc.is_some() {
6608            let mut cd = Vec::with_capacity(d - 1);
6609            for b in 0..d {
6610                if b == a {
6611                    continue;
6612                }
6613                cd.push((b, Array2::<f64>::zeros((0, 0))));
6614            }
6615            cd
6616        } else if !aniso_result.design_second_cross.is_empty() {
6617            let mut cd = Vec::new();
6618            for (cross_idx, &(pa, pb)) in aniso_result.design_second_cross_pairs.iter().enumerate()
6619            {
6620                if pa == a {
6621                    cd.push((pb, aniso_result.design_second_cross[cross_idx].clone()));
6622                } else if pb == a {
6623                    cd.push((pa, aniso_result.design_second_cross[cross_idx].clone()));
6624                }
6625            }
6626            cd
6627        } else {
6628            Vec::new()
6629        };
6630        let cross_penalty_provider = if d > 1 {
6631            let penalties_cross_provider = penalties_cross_provider.clone();
6632            Some(std::sync::Arc::new(
6633                move |b_axis: usize| -> Result<Vec<Array2<f64>>, EstimationError> {
6634                    if b_axis == a {
6635                        return Ok(Vec::new());
6636                    }
6637                    let (axis_lo, axis_hi) = if a < b_axis { (a, b_axis) } else { (b_axis, a) };
6638                    if let Some(provider) = penalties_cross_provider.as_ref() {
6639                        provider
6640                            .evaluate(axis_lo, axis_hi)
6641                            .map_err(EstimationError::from)
6642                    } else {
6643                        // No provider: either the pair is unregistered, or it
6644                        // was registered without data (early-return raw-operator
6645                        // paths). Both cases contribute no cross penalties.
6646                        Ok(Vec::new())
6647                    }
6648                },
6649            )
6650                as std::sync::Arc<
6651                    dyn Fn(usize) -> Result<Vec<Array2<f64>>, EstimationError>
6652                        + Send
6653                        + Sync
6654                        + 'static,
6655                >)
6656        } else {
6657            None
6658        };
6659
6660        entries.push(SpatialPsiDerivative {
6661            penalty_index: penalty_indices[0],
6662            penalty_indices: penalty_indices.clone(),
6663            global_range: global_range.clone(),
6664            total_p: p_total,
6665            x_psi_local,
6666            s_psi_components_local: s_psi_components,
6667            x_psi_psi_local,
6668            s_psi_psi_components_local: s_psi_psi_components,
6669            aniso_group_id: Some(aniso_group_id),
6670            aniso_cross_designs: if cross_designs.is_empty() {
6671                None
6672            } else {
6673                Some(cross_designs)
6674            },
6675            aniso_cross_penalty_provider: cross_penalty_provider,
6676            implicit_operator: implicit_op_arc.clone(),
6677            implicit_axis: a,
6678        });
6679    }
6680    Ok(Some(entries))
6681}
6682
6683#[cfg(test)]
6684mod glm_eta_observation_fd_tests {
6685    //! #1615/#1616: the non-Gaussian GLM arms of `evaluate_standard_familyobservations`
6686    //! (Poisson / Gamma / NegativeBinomial / Tweedie) must have a self-consistent
6687    //! derivative tower: `score = ∂ℓ/∂η`, `neghessian_eta = −∂(score)/∂η`, and
6688    //! `neghessian_eta_derivative = ∂(neghessian_eta)/∂η`. Pin each against central
6689    //! finite differences of the assembled log-likelihood / score.
6690    use super::*;
6691
6692    fn one_obs(spec: &LikelihoodSpec, y: f64, eta: f64) -> StandardFamilyObservationState {
6693        let yv = Array1::from_vec(vec![y]);
6694        let wv = Array1::from_vec(vec![1.0]);
6695        let ev = Array1::from_vec(vec![eta]);
6696        evaluate_standard_familyobservations(spec.clone(), None, None, None, &yv, &wv, &ev)
6697            .expect("standard family observation state assembles")
6698    }
6699
6700    fn check_fd(label: &str, spec: &LikelihoodSpec, y: f64, eta: f64) {
6701        let h = 1e-5;
6702        let s0 = one_obs(spec, y, eta);
6703        let sp = one_obs(spec, y, eta + h);
6704        let sm = one_obs(spec, y, eta - h);
6705
6706        // score = d(log_likelihood)/d(eta)
6707        let score_fd = (sp.log_likelihood - sm.log_likelihood) / (2.0 * h);
6708        let score = s0.score[0];
6709        assert!(
6710            (score - score_fd).abs() <= 1e-4 * (1.0 + score.abs()),
6711            "{label}: score {score} vs FD {score_fd}"
6712        );
6713
6714        // neghessian_eta = -d(score)/d(eta)
6715        let neghess_fd = -(sp.score[0] - sm.score[0]) / (2.0 * h);
6716        let neghess = s0.neghessian_eta[0];
6717        assert!(
6718            (neghess - neghess_fd).abs() <= 1e-3 * (1.0 + neghess.abs()),
6719            "{label}: neghessian_eta {neghess} vs FD {neghess_fd}"
6720        );
6721
6722        // neghessian_eta_derivative = d(neghessian_eta)/d(eta)
6723        let nhd_fd = (sp.neghessian_eta[0] - sm.neghessian_eta[0]) / (2.0 * h);
6724        let nhd = s0.neghessian_eta_derivative[0];
6725        assert!(
6726            (nhd - nhd_fd).abs() <= 1e-2 * (1.0 + nhd.abs()),
6727            "{label}: neghessian_eta_derivative {nhd} vs FD {nhd_fd}"
6728        );
6729    }
6730
6731    #[test]
6732    fn poisson_gamma_nb_tweedie_arms_match_finite_differences_1615_1616() {
6733        let log = InverseLink::Standard(StandardLink::Log);
6734        let poisson = LikelihoodSpec {
6735            response: ResponseFamily::Poisson,
6736            link: log.clone(),
6737        };
6738        check_fd("poisson y=3", &poisson, 3.0, 0.4);
6739        check_fd("poisson y=0", &poisson, 0.0, -0.2);
6740
6741        let gamma = LikelihoodSpec {
6742            response: ResponseFamily::Gamma,
6743            link: log.clone(),
6744        };
6745        check_fd("gamma y=2.5", &gamma, 2.5, 0.3);
6746        check_fd("gamma y=0.7", &gamma, 0.7, -0.1);
6747
6748        let nb = LikelihoodSpec {
6749            response: ResponseFamily::NegativeBinomial {
6750                theta: 1.5,
6751                theta_fixed: true,
6752            },
6753            link: log.clone(),
6754        };
6755        check_fd("negbin y=4", &nb, 4.0, 0.5);
6756        check_fd("negbin y=0", &nb, 0.0, -0.3);
6757
6758        let tweedie = LikelihoodSpec {
6759            response: ResponseFamily::Tweedie { p: 1.5 },
6760            link: log.clone(),
6761        };
6762        check_fd("tweedie y=2", &tweedie, 2.0, 0.25);
6763        check_fd("tweedie y=0.5", &tweedie, 0.5, -0.15);
6764    }
6765}