Skip to main content

gam_models/gamlss/
builders.rs

1// Real concern-organized submodule of the gamlss family stack.
2// Cross-module items are re-exported flat through the parent (`gamlss.rs`),
3// so `use super::*;` makes the sibling-concern symbols this module references
4// resolve through the parent namespace.
5use super::*;
6
7#[derive(Clone, Copy)]
8pub(crate) struct GamlssLambdaLayout {
9    pub(crate) k_mean: usize,
10    pub(crate) k_noise: usize,
11    pub(crate) kwiggle: usize,
12}
13
14impl GamlssLambdaLayout {
15    pub(crate) fn two_block(k_mean: usize, k_noise: usize) -> Self {
16        Self {
17            k_mean,
18            k_noise,
19            kwiggle: 0,
20        }
21    }
22
23    pub(crate) fn withwiggle(k_mean: usize, k_noise: usize, kwiggle: usize) -> Self {
24        Self {
25            k_mean,
26            k_noise,
27            kwiggle,
28        }
29    }
30
31    pub(crate) fn total(self) -> usize {
32        self.k_mean + self.k_noise + self.kwiggle
33    }
34
35    pub(crate) fn noise_start(self) -> usize {
36        self.k_mean
37    }
38
39    pub(crate) fn noise_end(self) -> usize {
40        self.k_mean + self.k_noise
41    }
42
43    pub(crate) fn wiggle_start(self) -> usize {
44        self.k_mean + self.k_noise
45    }
46
47    pub(crate) fn wiggle_end(self) -> usize {
48        self.k_mean + self.k_noise + self.kwiggle
49    }
50
51    pub(crate) fn validate_theta_len(self, theta_len: usize, context: &str) -> Result<(), String> {
52        let needed = self.total();
53        if theta_len < needed {
54            return Err(GamlssError::DimensionMismatch {
55                reason: format!(
56                    "{context} theta too short: got {}, need at least {}",
57                    theta_len, needed
58                ),
59            }
60            .into());
61        }
62        Ok(())
63    }
64
65    pub(crate) fn mean_from(self, theta: &Array1<f64>) -> Array1<f64> {
66        theta.slice(s![0..self.k_mean]).to_owned()
67    }
68
69    pub(crate) fn noise_from(self, theta: &Array1<f64>) -> Array1<f64> {
70        theta
71            .slice(s![self.noise_start()..self.noise_end()])
72            .to_owned()
73    }
74
75    pub(crate) fn wiggle_from(self, theta: &Array1<f64>) -> Array1<f64> {
76        theta
77            .slice(s![self.wiggle_start()..self.wiggle_end()])
78            .to_owned()
79    }
80}
81
82#[derive(Clone, Copy)]
83pub(crate) struct GamlssBetaLayout {
84    pub(crate) pt: usize,
85    pub(crate) pls: usize,
86    pub(crate) pw: usize,
87}
88
89impl GamlssBetaLayout {
90    pub(crate) fn withwiggle(pt: usize, pls: usize, pw: usize) -> Self {
91        Self { pt, pls, pw }
92    }
93
94    pub(crate) fn total(self) -> usize {
95        self.pt + self.pls + self.pw
96    }
97
98    pub(crate) fn split_three(
99        self,
100        flat: &Array1<f64>,
101        context: &str,
102    ) -> Result<(Array1<f64>, Array1<f64>, Array1<f64>), String> {
103        if flat.len() != self.total() {
104            return Err(GamlssError::DimensionMismatch {
105                reason: format!(
106                    "{context} length mismatch: got {}, expected {}",
107                    flat.len(),
108                    self.total()
109                ),
110            }
111            .into());
112        }
113        Ok((
114            flat.slice(s![0..self.pt]).to_owned(),
115            flat.slice(s![self.pt..self.pt + self.pls]).to_owned(),
116            flat.slice(s![self.pt + self.pls..self.total()]).to_owned(),
117        ))
118    }
119}
120
121#[derive(Clone, Debug)]
122pub struct FamilyMetadata {
123    pub name: &'static str,
124    pub parameternames: &'static [&'static str],
125    pub parameter_links: &'static [ParameterLink],
126}
127
128pub(crate) const DEFAULT_GAUGE_PRIORITY: u8 = 100;
129
130pub(crate) const LINK_WIGGLE_GAUGE_PRIORITY: u8 = 80;
131
132pub(crate) fn initial_log_lambdas_orzeros(
133    block: &ParameterBlockInput,
134) -> Result<Array1<f64>, String> {
135    let k = block.penalties.len();
136    let lambdas = block
137        .initial_log_lambdas
138        .clone()
139        .unwrap_or_else(|| Array1::<f64>::zeros(k));
140    if lambdas.len() != k {
141        return Err(GamlssError::DimensionMismatch {
142            reason: format!(
143                "initial_log_lambdas length mismatch: got {}, expected {}",
144                lambdas.len(),
145                k
146            ),
147        }
148        .into());
149    }
150    Ok(lambdas)
151}
152
153pub(crate) fn build_two_block_exact_joint_setup(
154    data: ArrayView2<'_, f64>,
155    meanspec: &TermCollectionSpec,
156    noisespec: &TermCollectionSpec,
157    mean_penalties: usize,
158    noise_penalties: usize,
159    extra_rho0: &[f64],
160    rho0_override: Option<&Array1<f64>>,
161    kappa_options: &SpatialLengthScaleOptimizationOptions,
162) -> ExactJointHyperSetup {
163    // GAMLSS-specific part: assemble the rho seed in [mean | noise | extra]
164    // penalty order, honoring a caller override when it matches the layout.
165    let rho_dim = mean_penalties + noise_penalties + extra_rho0.len();
166    let mut rho0vec = Array1::<f64>::zeros(rho_dim);
167    if let Some(rho0) = rho0_override.filter(|rho0| rho0.len() == rho_dim) {
168        rho0vec.assign(rho0);
169    } else {
170        for (i, &rho_init) in extra_rho0.iter().enumerate() {
171            rho0vec[mean_penalties + noise_penalties + i] = rho_init;
172        }
173    }
174
175    // Generic part: per-block log(kappa) seed/bounds and exact-joint assembly,
176    // with the two linear predictors (mean, noise) in theta order.
177    build_location_scale_exact_joint_setup(data, &[meanspec, noisespec], rho0vec, kappa_options)
178}
179
180pub(crate) fn gaussian_location_scalewarm_start(
181    y: &Array1<f64>,
182    weights: &Array1<f64>,
183    mu_block: &ParameterBlockSpec,
184    log_sigma_block: &ParameterBlockSpec,
185    ridge_floor: f64,
186    mean_beta_hint: Option<&Array1<f64>>,
187    noise_beta_hint: Option<&Array1<f64>>,
188) -> Result<(Array1<f64>, Array1<f64>, f64), String> {
189    let betamu = if let Some(beta) = mean_beta_hint {
190        beta.clone()
191    } else {
192        solve_penalizedweighted_projection(
193            &mu_block.design,
194            &mu_block.offset,
195            y,
196            weights,
197            &mu_block.penalties,
198            &mu_block.initial_log_lambdas,
199            ridge_floor,
200        )?
201    };
202    let mut mu_hat = mu_block.solver_design().matrixvectormultiply(&betamu);
203    mu_hat += mu_block.solver_offset();
204    let mut weighted_ss = 0.0;
205    let mut weight_sum = 0.0;
206    for i in 0..y.len() {
207        let wi = weights[i].max(0.0);
208        let resid = y[i] - mu_hat[i];
209        weighted_ss += wi * resid * resid;
210        weight_sum += wi;
211    }
212    if !weighted_ss.is_finite() || !weight_sum.is_finite() || weight_sum <= 0.0 {
213        return Err(
214            "gaussian location-scale warm start could not estimate residual scale".to_string(),
215        );
216    }
217    // Warm-start σ̂ must clear the logb floor so the inverse link
218    //   η = log(σ − b)
219    // is finite. Use a relative cushion above b so the warm-start is in the
220    // smooth interior of the link domain.
221    let sigma_hat = (weighted_ss / weight_sum)
222        .sqrt()
223        .max(LOGB_SIGMA_FLOOR * 1.5);
224    let beta_log_sigma = if let Some(beta) = noise_beta_hint {
225        beta.clone()
226    } else {
227        let eta_sigma = (sigma_hat - LOGB_SIGMA_FLOOR).ln();
228        let sigma_target = Array1::from_elem(y.len(), eta_sigma);
229        solve_penalizedweighted_projection(
230            &log_sigma_block.design,
231            &log_sigma_block.offset,
232            &sigma_target,
233            weights,
234            &log_sigma_block.penalties,
235            &log_sigma_block.initial_log_lambdas,
236            ridge_floor,
237        )?
238    };
239    Ok((betamu, beta_log_sigma, sigma_hat))
240}
241
242/// Total output count for every two-block location-scale family in this
243/// module (mu/log_sigma or threshold/log_sigma). The wiggle variants add a
244/// third zero-channel block but still drive only two output channels.
245pub(crate) const LOCATION_SCALE_N_OUTPUTS: usize = 2;
246
247/// Construct a fully wired location-scale parameter block.
248///
249/// This is the **only** way to build a LocationScale `ParameterBlockSpec` in
250/// this module — by construction the `AdditiveBlockJacobian` callback is
251/// always installed, so the channel-aware identifiability audit cannot be
252/// silently bypassed by a future `build_blocks` impl that forgets to wire
253/// the callback at the tail (re-introducing #319).
254///
255/// `own_output` is the zero-based output channel this block drives
256/// (e.g. 0 for `mu`/`threshold`, 1 for `log_sigma`). `n_family_outputs` is
257/// fixed at [`LOCATION_SCALE_N_OUTPUTS`] for every two-block family here
258/// but is exposed so the helper composes cleanly with any future
259/// k-block extension.
260pub(crate) fn build_location_scale_block(
261    name: impl Into<String>,
262    design: DesignMatrix,
263    offset: Array1<f64>,
264    penalties: Vec<PenaltyMatrix>,
265    nullspace_dims: Vec<usize>,
266    initial_log_lambdas: Array1<f64>,
267    initial_beta: Option<Array1<f64>>,
268    own_output: usize,
269    n_family_outputs: usize,
270    caller: &str,
271) -> Result<ParameterBlockSpec, String> {
272    if own_output >= n_family_outputs {
273        return Err(format!(
274            "{caller}: own_output={own_output} >= n_family_outputs={n_family_outputs}"
275        ));
276    }
277    let mut spec = ParameterBlockSpec {
278        name: name.into(),
279        design,
280        offset,
281        penalties,
282        nullspace_dims,
283        initial_log_lambdas,
284        initial_beta,
285        gauge_priority: 100,
286        jacobian_callback: None,
287        stacked_design: None,
288        stacked_offset: None,
289    };
290    let dense = spec.effective_design(caller)?;
291    spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
292        design: dense,
293        own_output,
294        n_family_outputs,
295    }));
296    Ok(spec)
297}
298
299/// Construct the wiggle block that accompanies a two-block location-scale
300/// family. The wiggle modulates the inverse link nonlinearly and
301/// contributes no linear effective Jacobian — the installed callback
302/// therefore exposes a zero `(n × p_w)` design under
303/// `n_family_outputs = LOCATION_SCALE_N_OUTPUTS`.
304pub(crate) fn build_location_scale_wiggle_block(
305    name: impl Into<String>,
306    design: DesignMatrix,
307    offset: Array1<f64>,
308    penalties: Vec<PenaltyMatrix>,
309    nullspace_dims: Vec<usize>,
310    initial_log_lambdas: Array1<f64>,
311    initial_beta: Option<Array1<f64>>,
312    n_rows: usize,
313) -> Result<ParameterBlockSpec, String> {
314    let p_w = design.ncols();
315    let mut spec = ParameterBlockSpec {
316        name: name.into(),
317        design,
318        offset,
319        penalties,
320        nullspace_dims,
321        initial_log_lambdas,
322        initial_beta,
323        gauge_priority: 100,
324        jacobian_callback: None,
325        stacked_design: None,
326        stacked_offset: None,
327    };
328    spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
329        design: ndarray::Array2::<f64>::zeros((n_rows, p_w)),
330        own_output: 0,
331        n_family_outputs: LOCATION_SCALE_N_OUTPUTS,
332    }));
333    Ok(spec)
334}
335
336pub(crate) fn prepared_gaussian_log_sigma_design(
337    mu_design: &DesignMatrix,
338    log_sigma_design: &DesignMatrix,
339) -> Result<DesignMatrix, String> {
340    if mu_design.nrows() != log_sigma_design.nrows() {
341        return Err(GamlssError::DimensionMismatch {
342            reason: format!(
343                "gaussian log-sigma design row mismatch: mean rows={}, log_sigma rows={}",
344                mu_design.nrows(),
345                log_sigma_design.nrows()
346            ),
347        }
348        .into());
349    }
350    // Gaussian location-scale remains identifiable even when μ and log σ use
351    // the same covariate basis:
352    //
353    //   L(μ, η) = 0.5 * Σ_i [ (y_i - μ_i)^2 exp(-2η_i) + 2η_i ],
354    //   μ = X_μ β_μ,  η = X_σ β_σ.
355    //
356    // Shared columns are not a frame mismatch. β_μ and β_σ enter through
357    // different sufficient statistics (residual and residual²), so replacing
358    // X_σ with (I - P_{X_μ}) X_σ would impose an extra constraint and can
359    // erase real heteroscedastic signal when the two blocks share a basis.
360    Ok(log_sigma_design.clone())
361}
362
363pub(crate) fn identified_binomial_log_sigma_design(
364    threshold_design: &TermCollectionDesign,
365    log_sigma_design: &TermCollectionDesign,
366    weights: &Array1<f64>,
367) -> Result<DesignMatrix, String> {
368    let non_intercept_start = log_sigma_design
369        .intercept_range
370        .end
371        .min(log_sigma_design.design.ncols());
372    let transform = build_scale_deviation_transform_design(
373        &threshold_design.design,
374        &log_sigma_design.design,
375        weights,
376        non_intercept_start,
377    )?;
378    build_scale_deviation_operator(
379        threshold_design.design.clone(),
380        log_sigma_design.design.clone(),
381        &transform,
382    )
383}
384
385pub(crate) fn identity_penalty(dim: usize) -> Array2<f64> {
386    let mut penalty = Array2::<f64>::zeros((dim, dim));
387    for i in 0..dim {
388        penalty[[i, i]] = 1.0;
389    }
390    penalty
391}
392
393/// Orthogonal projector `P₀ = U₀U₀ᵀ` onto the joint null space of the supplied
394/// penalty blocks over a `dim`-column coefficient space.
395///
396/// Used as the log-σ *shrinkage* penalty for the Gaussian location-scale scale
397/// block. The smooth's own wiggliness penalty already governs its range space
398/// (the curvature directions REML trades off against fit); its null space —
399/// the constant + low-order polynomial log-σ trend that carries the dominant
400/// heteroscedastic signal — is left unpenalized and is only weakly identified
401/// in the coupled (μ, log σ) likelihood, which lets the inner Newton wander
402/// (#1073's "flat/ill-conditioned surface"). A *full-space* identity ridge
403/// fixed that instability but DOUBLE-penalized the range space: REML then drove
404/// the shrinkage λ up, crushing the genuine heteroscedastic curve back to a
405/// constant σ (the underfit this issue reports). Penalizing the null space
406/// ALONE keeps the weakly-identified polynomial trend from blowing up without
407/// touching the wiggliness directions the smooth penalty already controls —
408/// exactly mgcv's `select = TRUE` null-space penalty.
409///
410/// When the supplied penalties already span the whole space (null space empty),
411/// the projector is the zero matrix and the shrinkage term is inert; when there
412/// are no penalties at all (e.g. a purely parametric log-σ design), the null
413/// space is everything and this returns the identity — recovering the previous
414/// full-space ridge exactly where it was the right thing to do.
415pub(crate) fn penalty_nullspace_projector(penalties: &[PenaltyMatrix], dim: usize) -> Array2<f64> {
416    use gam_linalg::faer_ndarray::FaerEigh;
417    use faer::Side;
418
419    if dim == 0 {
420        return Array2::<f64>::zeros((0, 0));
421    }
422    // Combined penalty S = Σ_k S_k over the dim-column scale space. Each block
423    // penalty is already expressed on this space (the scale design's columns).
424    let mut combined = Array2::<f64>::zeros((dim, dim));
425    for pen in penalties {
426        let dense = pen.to_dense();
427        assert_eq!(
428            dense.nrows(),
429            dim,
430            "scale penalty block dim {} != scale design cols {dim}",
431            dense.nrows()
432        );
433        if dense.nrows() == dim && dense.ncols() == dim {
434            combined += &dense;
435        }
436    }
437    // Symmetrize defensively (eigendecomposition assumes self-adjoint input).
438    let combined_sym = 0.5 * (&combined + &combined.t());
439    let (eigvals, eigvecs) = match combined_sym.eigh(Side::Lower) {
440        Ok(decomp) => decomp,
441        // A failed decomposition (degenerate / non-finite) should not silently
442        // drop the stabilizing shrinkage; fall back to the full-space ridge,
443        // which is the conservative (always-positive-definite) choice.
444        Err(_) => return identity_penalty(dim),
445    };
446    // Null space = eigenvectors whose eigenvalue is ≈ 0 relative to the largest.
447    // The combined wiggliness penalty's range-space eigenvalues are O(1) after
448    // basis normalization, so a relative floor cleanly separates the genuine
449    // null directions (constant / low-order polynomial) from the penalized
450    // curvature directions.
451    let max_eig = eigvals.iter().cloned().fold(0.0_f64, f64::max);
452    let tol = (max_eig * 1e-8).max(1e-12);
453    let mut projector = Array2::<f64>::zeros((dim, dim));
454    for (j, &lambda) in eigvals.iter().enumerate() {
455        if lambda <= tol {
456            let v = eigvecs.column(j);
457            // Accumulate v vᵀ into the projector.
458            for a in 0..dim {
459                let va = v[a];
460                if va == 0.0 {
461                    continue;
462                }
463                for b in 0..dim {
464                    projector[[a, b]] += va * v[b];
465                }
466            }
467        }
468    }
469    projector
470}
471
472pub(crate) fn append_binomial_log_sigma_shrinkage_penalty_design(
473    design: &mut TermCollectionDesign,
474) {
475    let p = design.design.ncols();
476    design
477        .penalties
478        .push(BlockwisePenalty::new(0..p, identity_penalty(p)));
479    // Identity penalty penalizes the full space → nullspace dimension is 0.
480    design.nullspace_dims.push(0);
481    design.penaltyinfo.push(PenaltyBlockInfo {
482        global_index: design.penaltyinfo.len(),
483        termname: Some("log_sigma_shrinkage".to_string()),
484        penalty: PenaltyInfo {
485            source: PenaltySource::Other("shrinkage".to_string()),
486            original_index: 0,
487            active: true,
488            effective_rank: p,
489            dropped_reason: None,
490            nullspace_dim_hint: 0,
491            normalization_scale: 1.0,
492            kronecker_factors: None,
493        },
494    });
495}
496
497/// Build the (mean, log-σ) parameter-block pair for a Gaussian location-scale
498/// family. Shared verbatim by the non-wiggle and wiggle Gaussian builders so the
499/// scale-block construction — prepared log-σ design, the REML-selected full-span
500/// shrinkage penalty on the scale nullspace, and the joint Gaussian warm start —
501/// lives in exactly one place. Callers supply the per-block log-λ vectors sliced
502/// from their own layout (two-block vs with-wiggle) and append any extra blocks.
503pub(crate) fn build_gaussian_mean_and_scale_blocks(
504    y: &Array1<f64>,
505    weights: &Array1<f64>,
506    mean_design: &TermCollectionDesign,
507    noise_design: &TermCollectionDesign,
508    mean_offset: &Array1<f64>,
509    noise_offset: &Array1<f64>,
510    mean_log_lambdas: Array1<f64>,
511    noise_log_lambdas: Array1<f64>,
512    mean_beta_hint: Option<Array1<f64>>,
513    noise_beta_hint: Option<Array1<f64>>,
514    context: &str,
515) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
516    let mut meanspec = build_location_scale_block(
517        "mu",
518        mean_design.design.clone(),
519        mean_offset.clone(),
520        mean_design.penalties_as_penalty_matrix(),
521        mean_design.nullspace_dims.clone(),
522        mean_log_lambdas,
523        mean_beta_hint,
524        0,
525        LOCATION_SCALE_N_OUTPUTS,
526        &format!("{context}: mu"),
527    )?;
528    let prepared_noise_design =
529        prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)?;
530    let p_noise = prepared_noise_design.ncols();
531    let mut log_sigma_penalty_matrices = noise_design.penalties_as_penalty_matrix();
532    // Shrinkage penalty on the scale block's *null space only* (mgcv
533    // `select = TRUE`): it stabilizes the weakly-identified constant/polynomial
534    // log-σ trend without double-penalizing the wiggliness directions the
535    // smooth penalty already governs. A full-space identity here over-shrinks
536    // the genuine heteroscedastic curve back to a constant σ (#1073).
537    let shrinkage = penalty_nullspace_projector(&log_sigma_penalty_matrices, p_noise);
538    // The rank of an orthogonal projector equals its trace (P = P² for a projector,
539    // so trace(P) = trace(P²) = ||P||_F² = sum of squared singular values = rank).
540    // The diagonal-threshold test `diag[i] > 0.5` used previously was wrong: for a
541    // rank-d projector onto a low-dimensional subspace (e.g. d=2 null directions of
542    // a TP spline with p=10 columns), each diagonal entry is O(d/p) << 0.5, so the
543    // threshold always returned 0 — misreporting the shrinkage penalty as having
544    // zero penalized dimensions. Trace-based rank is exact for a symmetric
545    // idempotent matrix (rounded to the nearest integer to absorb floating-point
546    // rounding in the eigendecomposition).
547    let shrinkage_rank = (0..p_noise).map(|i| shrinkage[[i, i]]).sum::<f64>().round() as usize;
548    log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(shrinkage));
549    let mut log_sigma_nullspace_dims = noise_design.nullspace_dims.clone();
550    // The null-space projector penalizes a rank-`shrinkage_rank` subspace, so
551    // the remaining unpenalized directions number `p_noise − shrinkage_rank`.
552    log_sigma_nullspace_dims.push(p_noise.saturating_sub(shrinkage_rank));
553    let mut noisespec = build_location_scale_block(
554        "log_sigma",
555        prepared_noise_design,
556        noise_offset.clone(),
557        log_sigma_penalty_matrices,
558        log_sigma_nullspace_dims,
559        noise_log_lambdas,
560        noise_beta_hint,
561        1,
562        LOCATION_SCALE_N_OUTPUTS,
563        &format!("{context}: log_sigma"),
564    )?;
565    if meanspec.initial_beta.is_none() || noisespec.initial_beta.is_none() {
566        let (betamu0, beta_ls0, _) = gaussian_location_scalewarm_start(
567            y,
568            weights,
569            &meanspec,
570            &noisespec,
571            1e-10,
572            meanspec.initial_beta.as_ref(),
573            noisespec.initial_beta.as_ref(),
574        )?;
575        if meanspec.initial_beta.is_none() {
576            meanspec.initial_beta = Some(betamu0);
577        }
578        if noisespec.initial_beta.is_none() {
579            noisespec.initial_beta = Some(beta_ls0);
580        }
581    }
582    Ok((meanspec, noisespec))
583}
584
585/// Build the (threshold, log-σ) parameter-block pair for a Binomial
586/// location-scale family. Shared by the non-wiggle and wiggle Binomial builders;
587/// mirrors [`build_gaussian_mean_and_scale_blocks`] but with the binomial-
588/// identified log-σ design, the link-aware joint warm start, and the same
589/// REML-selected full-span scale shrinkage penalty.
590pub(crate) fn build_binomial_threshold_and_scale_blocks(
591    y: &Array1<f64>,
592    weights: &Array1<f64>,
593    link_kind: &InverseLink,
594    mean_design: &TermCollectionDesign,
595    noise_design: &TermCollectionDesign,
596    mean_offset: &Array1<f64>,
597    noise_offset: &Array1<f64>,
598    mean_log_lambdas: Array1<f64>,
599    noise_log_lambdas: Array1<f64>,
600    mean_beta_hint: Option<Array1<f64>>,
601    noise_beta_hint: Option<Array1<f64>>,
602    context: &str,
603) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
604    let identifiednoise_design =
605        identified_binomial_log_sigma_design(mean_design, noise_design, weights)?;
606    let p_noise = identifiednoise_design.ncols();
607    let mut log_sigma_penalty_matrices: Vec<PenaltyMatrix> =
608        noise_design.penalties_as_penalty_matrix();
609    log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(identity_penalty(p_noise)));
610    let mut thresholdspec = build_location_scale_block(
611        "threshold",
612        mean_design.design.clone(),
613        mean_offset.clone(),
614        mean_design.penalties_as_penalty_matrix(),
615        vec![],
616        mean_log_lambdas,
617        mean_beta_hint,
618        0,
619        LOCATION_SCALE_N_OUTPUTS,
620        &format!("{context}: threshold"),
621    )?;
622    let mut log_sigmaspec = build_location_scale_block(
623        "log_sigma",
624        identifiednoise_design,
625        noise_offset.clone(),
626        log_sigma_penalty_matrices,
627        vec![],
628        noise_log_lambdas,
629        noise_beta_hint,
630        1,
631        LOCATION_SCALE_N_OUTPUTS,
632        &format!("{context}: log_sigma"),
633    )?;
634    if thresholdspec.initial_beta.is_none() || log_sigmaspec.initial_beta.is_none() {
635        let (beta_t0, beta_ls0) = binomial_location_scalewarm_start(
636            y,
637            weights,
638            link_kind,
639            &thresholdspec,
640            &log_sigmaspec,
641            thresholdspec.initial_beta.as_ref(),
642            log_sigmaspec.initial_beta.as_ref(),
643        )?;
644        if thresholdspec.initial_beta.is_none() {
645            thresholdspec.initial_beta = Some(beta_t0);
646        }
647        if log_sigmaspec.initial_beta.is_none() {
648            log_sigmaspec.initial_beta = Some(beta_ls0);
649        }
650    }
651    Ok((thresholdspec, log_sigmaspec))
652}
653
654/// Convert a wiggle block's `PenaltySpec`s into the `PenaltyMatrix` list the
655/// location-scale wiggle block expects. Shared by the Gaussian and Binomial
656/// wiggle builders, which previously inlined the identical match.
657pub(crate) fn wiggle_block_penalty_matrices(
658    wiggle_block: &ParameterBlockInput,
659) -> Vec<PenaltyMatrix> {
660    let p_wiggle = wiggle_block.design.ncols();
661    wiggle_block
662        .penalties
663        .iter()
664        .map(|spec| match spec {
665            crate::model_types::PenaltySpec::Block {
666                local, col_range, ..
667            } => PenaltyMatrix::Blockwise {
668                local: local.clone(),
669                col_range: col_range.clone(),
670                total_dim: p_wiggle,
671            },
672            crate::model_types::PenaltySpec::Dense(m)
673            | crate::model_types::PenaltySpec::DenseWithMean { matrix: m, .. } => {
674                PenaltyMatrix::Dense(m.clone())
675            }
676        })
677        .collect()
678}
679
680pub(crate) fn binomial_location_scale_link_eta_from_probability(
681    link_kind: &InverseLink,
682    probability: f64,
683) -> Result<f64, String> {
684    let target = probability.clamp(1e-6, 1.0 - 1e-6);
685    match link_kind {
686        InverseLink::Standard(StandardLink::Logit) => Ok((target / (1.0 - target)).ln()),
687        InverseLink::Standard(StandardLink::Probit) => standard_normal_quantile(target)
688            .map_err(|err| format!("failed to invert probit warm-start probability: {err}")),
689        InverseLink::Standard(StandardLink::CLogLog) => Ok((-((1.0 - target).ln())).ln()),
690        other => Err(GamlssError::UnsupportedConfiguration { reason: format!(
691            "binomial location-scale warm start requires logit, probit, or cloglog link, got {other:?}"
692        ) }.into()),
693    }
694}
695
696pub(crate) fn weighted_binomial_prevalence(
697    y: &Array1<f64>,
698    weights: &Array1<f64>,
699) -> Result<f64, String> {
700    if y.len() != weights.len() {
701        return Err(GamlssError::DimensionMismatch { reason: format!(
702            "binomial location-scale warm start dimension mismatch: y has length {}, weights have length {}",
703            y.len(),
704            weights.len()
705        ) }.into());
706    }
707    let mut weight_sum = 0.0;
708    let mut success_sum = 0.0;
709    for (&yi, &wi) in y.iter().zip(weights.iter()) {
710        if !yi.is_finite() {
711            return Err(GamlssError::NonFinite {
712                reason: format!(
713                    "binomial location-scale warm start encountered non-finite response {yi}"
714                ),
715            }
716            .into());
717        }
718        let weight = floor_positiveweight(wi, MIN_WEIGHT);
719        if weight > 0.0 {
720            weight_sum += weight;
721            success_sum += weight * yi;
722        }
723    }
724    if !weight_sum.is_finite() || weight_sum <= 0.0 {
725        return Err(
726            "binomial location-scale warm start requires positive total weight".to_string(),
727        );
728    }
729    Ok(success_sum / weight_sum)
730}
731
732pub(crate) fn project_constant_eta_into_block(
733    block: &ParameterBlockSpec,
734    weights: &Array1<f64>,
735    eta: f64,
736) -> Result<Array1<f64>, String> {
737    let target_eta = Array1::from_elem(block.design.nrows(), eta);
738    solve_penalizedweighted_projection(
739        &block.design,
740        &block.offset,
741        &target_eta,
742        weights,
743        &block.penalties,
744        &block.initial_log_lambdas,
745        1e-10,
746    )
747}
748
749// Deterministic warm start for the binomial location-scale model. This stays
750// out of the optimizer: it projects a prevalence-matched threshold and neutral
751// log-sigma value into the actual penalized block spaces.
752pub(crate) fn binomial_location_scalewarm_start(
753    y: &Array1<f64>,
754    weights: &Array1<f64>,
755    link_kind: &InverseLink,
756    threshold_block: &ParameterBlockSpec,
757    log_sigma_block: &ParameterBlockSpec,
758    mean_beta_hint: Option<&Array1<f64>>,
759    noise_beta_hint: Option<&Array1<f64>>,
760) -> Result<(Array1<f64>, Array1<f64>), String> {
761    if let (Some(mean_beta), Some(noise_beta)) = (mean_beta_hint, noise_beta_hint) {
762        return Ok((mean_beta.clone(), noise_beta.clone()));
763    }
764
765    let beta_threshold = match mean_beta_hint {
766        Some(beta) => beta.clone(),
767        None => {
768            let prevalence = weighted_binomial_prevalence(y, weights)?;
769            let eta = binomial_location_scale_link_eta_from_probability(link_kind, prevalence)?;
770            project_constant_eta_into_block(threshold_block, weights, eta)?
771        }
772    };
773    let beta_log_sigma = match noise_beta_hint {
774        Some(beta) => beta.clone(),
775        None => project_constant_eta_into_block(log_sigma_block, weights, 0.0)?,
776    };
777    Ok((beta_threshold, beta_log_sigma))
778}
779
780#[derive(Clone)]
781pub(crate) struct BinomialMeanWiggleSpec {
782    pub y: Array1<f64>,
783    pub weights: Array1<f64>,
784    pub link_kind: InverseLink,
785    pub wiggle_knots: Array1<f64>,
786    pub wiggle_degree: usize,
787    pub eta_block: ParameterBlockInput,
788    pub wiggle_block: ParameterBlockInput,
789}
790
791#[derive(Clone)]
792pub struct GaussianLocationScaleTermSpec {
793    pub y: Array1<f64>,
794    pub weights: Array1<f64>,
795    pub meanspec: TermCollectionSpec,
796    pub log_sigmaspec: TermCollectionSpec,
797    pub mean_offset: Array1<f64>,
798    pub log_sigma_offset: Array1<f64>,
799}
800
801#[derive(Clone)]
802pub struct GaussianLocationScaleWiggleTermSpec {
803    pub y: Array1<f64>,
804    pub weights: Array1<f64>,
805    pub meanspec: TermCollectionSpec,
806    pub log_sigmaspec: TermCollectionSpec,
807    pub mean_offset: Array1<f64>,
808    pub log_sigma_offset: Array1<f64>,
809    pub wiggle_knots: Array1<f64>,
810    pub wiggle_degree: usize,
811    pub wiggle_block: ParameterBlockInput,
812}
813
814#[derive(Clone)]
815pub struct BinomialLocationScaleTermSpec {
816    pub y: Array1<f64>,
817    pub weights: Array1<f64>,
818    pub link_kind: InverseLink,
819    pub thresholdspec: TermCollectionSpec,
820    pub log_sigmaspec: TermCollectionSpec,
821    pub threshold_offset: Array1<f64>,
822    pub log_sigma_offset: Array1<f64>,
823}
824
825#[derive(Clone)]
826pub struct BinomialLocationScaleWiggleTermSpec {
827    pub y: Array1<f64>,
828    pub weights: Array1<f64>,
829    pub link_kind: InverseLink,
830    pub thresholdspec: TermCollectionSpec,
831    pub log_sigmaspec: TermCollectionSpec,
832    pub threshold_offset: Array1<f64>,
833    pub log_sigma_offset: Array1<f64>,
834    pub wiggle_knots: Array1<f64>,
835    pub wiggle_degree: usize,
836    pub wiggle_block: ParameterBlockInput,
837}
838
839#[derive(Clone, Debug)]
840pub struct BlockwiseTermFitResult {
841    pub fit: UnifiedFitResult,
842    pub meanspec_resolved: TermCollectionSpec,
843    pub noisespec_resolved: TermCollectionSpec,
844    pub mean_design: TermCollectionDesign,
845    pub noise_design: TermCollectionDesign,
846}
847
848pub(crate) struct BlockwiseTermFitResultParts {
849    pub fit: UnifiedFitResult,
850    pub meanspec_resolved: TermCollectionSpec,
851    pub noisespec_resolved: TermCollectionSpec,
852    pub mean_design: TermCollectionDesign,
853    pub noise_design: TermCollectionDesign,
854}
855
856pub struct BlockwiseTermWiggleFitResult {
857    pub fit: BlockwiseTermFitResult,
858    pub wiggle_knots: Array1<f64>,
859    pub wiggle_degree: usize,
860}
861
862pub struct BinomialMeanWiggleTermFitResult {
863    pub fit: UnifiedFitResult,
864    pub resolvedspec: TermCollectionSpec,
865    pub design: TermCollectionDesign,
866    pub wiggle_knots: Array1<f64>,
867    pub wiggle_degree: usize,
868    /// Standard-basis warp coefficients `β_w = Z·γ` for the saved-model predict
869    /// runtime, when the frozen-basis de-aliasing engaged (#1596). The fit's own
870    /// coefficients stay in the reduced, identifiable `γ` coordinate; this is the
871    /// out-of-band full-width lift consumed by `beta_link_wiggle`.
872    pub saved_warp_beta: Option<Vec<f64>>,
873}
874
875pub(crate) struct BlockwiseTermWiggleFitResultParts {
876    pub fit: BlockwiseTermFitResult,
877    pub wiggle_knots: Array1<f64>,
878    pub wiggle_degree: usize,
879}
880
881pub(crate) fn validate_term_collection_design(
882    label: &str,
883    design: &TermCollectionDesign,
884) -> Result<(), String> {
885    let p = design.design.ncols();
886    let n = design.design.nrows();
887    for rows in exact_design_row_chunks(n, p) {
888        let chunk = design
889            .design
890            .try_row_chunk(rows)
891            .map_err(|e| format!("{label}.design row chunk materialization failed: {e}"))?;
892        validate_all_finite_estimation(&format!("{label}.design"), chunk.iter().copied())
893            .map_err(|e| e.to_string())?;
894    }
895    if design.nullspace_dims.len() != design.penalties.len() {
896        return Err(GamlssError::DimensionMismatch {
897            reason: format!(
898                "{label}.nullspace_dims length mismatch: got {}, expected {}",
899                design.nullspace_dims.len(),
900                design.penalties.len()
901            ),
902        }
903        .into());
904    }
905    if design.penaltyinfo.len() != design.penalties.len() {
906        return Err(GamlssError::DimensionMismatch {
907            reason: format!(
908                "{label}.penaltyinfo length mismatch: got {}, expected {}",
909                design.penaltyinfo.len(),
910                design.penalties.len()
911            ),
912        }
913        .into());
914    }
915    for (idx, bp) in design.penalties.iter().enumerate() {
916        validate_all_finite_estimation(
917            &format!("{label}.penalties[{idx}]"),
918            bp.local.iter().copied(),
919        )
920        .map_err(|e| e.to_string())?;
921        if bp.col_range.end > p {
922            return Err(GamlssError::DimensionMismatch {
923                reason: format!(
924                    "{label}.penalties[{idx}] col_range {}..{} exceeds design width {}",
925                    bp.col_range.start, bp.col_range.end, p
926                ),
927            }
928            .into());
929        }
930    }
931    if let Some(bounds) = design.coefficient_lower_bounds.as_ref() {
932        if bounds.len() != p {
933            return Err(GamlssError::ConstraintViolation {
934                reason: format!(
935                    "{label}.coefficient_lower_bounds length mismatch: got {}, expected {p}",
936                    bounds.len()
937                ),
938            }
939            .into());
940        }
941        for (idx, &bound) in bounds.iter().enumerate() {
942            if !(bound.is_finite() || bound == f64::NEG_INFINITY) {
943                return Err(GamlssError::NonFinite { reason: format!(
944                    "{label}.coefficient_lower_bounds[{idx}] must be finite or -inf, got {bound}",
945                ) }.into());
946            }
947        }
948    }
949    if let Some(constraints) = design.linear_constraints.as_ref() {
950        validate_all_finite_estimation(
951            &format!("{label}.linear_constraints.a"),
952            constraints.a.iter().copied(),
953        )
954        .map_err(|e| e.to_string())?;
955        validate_all_finite_estimation(
956            &format!("{label}.linear_constraints.b"),
957            constraints.b.iter().copied(),
958        )
959        .map_err(|e| e.to_string())?;
960        if constraints.a.ncols() != p {
961            return Err(GamlssError::DimensionMismatch {
962                reason: format!(
963                    "{label}.linear_constraints.a column mismatch: got {}, expected {p}",
964                    constraints.a.ncols()
965                ),
966            }
967            .into());
968        }
969        if constraints.a.nrows() != constraints.b.len() {
970            return Err(GamlssError::DimensionMismatch {
971                reason: format!(
972                    "{label}.linear_constraints row mismatch: a has {}, b has {}",
973                    constraints.a.nrows(),
974                    constraints.b.len()
975                ),
976            }
977            .into());
978        }
979    }
980    if design.intercept_range.start > design.intercept_range.end || design.intercept_range.end > p {
981        return Err(GamlssError::ConstraintViolation {
982            reason: format!(
983                "{label}.intercept_range out of bounds: {:?} for {} columns",
984                design.intercept_range, p
985            ),
986        }
987        .into());
988    }
989    Ok(())
990}
991
992impl BlockwiseTermFitResult {
993    pub(crate) fn try_from_parts(parts: BlockwiseTermFitResultParts) -> Result<Self, String> {
994        let BlockwiseTermFitResultParts {
995            fit,
996            meanspec_resolved,
997            noisespec_resolved,
998            mean_design,
999            noise_design,
1000        } = parts;
1001
1002        fit.validate_numeric_finiteness()
1003            .map_err(|e| format!("{e}"))?;
1004        if fit.block_states.len() < 2 {
1005            return Err(GamlssError::DimensionMismatch {
1006                reason: format!(
1007                    "BlockwiseTermFitResult requires at least 2 block states, got {}",
1008                    fit.block_states.len()
1009                ),
1010            }
1011            .into());
1012        }
1013        validate_term_collection_design("blockwise_term.mean_design", &mean_design)?;
1014        validate_term_collection_design("blockwise_term.noise_design", &noise_design)?;
1015        if mean_design.design.nrows() != noise_design.design.nrows() {
1016            return Err(GamlssError::DimensionMismatch {
1017                reason: format!(
1018                    "BlockwiseTermFitResult row mismatch: mean_design={}, noise_design={}",
1019                    mean_design.design.nrows(),
1020                    noise_design.design.nrows()
1021                ),
1022            }
1023            .into());
1024        }
1025        if fit.block_states[0].beta.len() != mean_design.design.ncols() {
1026            return Err(GamlssError::DimensionMismatch {
1027                reason: format!(
1028                    "BlockwiseTermFitResult mean beta length mismatch: got {}, expected {}",
1029                    fit.block_states[0].beta.len(),
1030                    mean_design.design.ncols()
1031                ),
1032            }
1033            .into());
1034        }
1035        if fit.block_states[1].beta.len() != noise_design.design.ncols() {
1036            return Err(GamlssError::DimensionMismatch {
1037                reason: format!(
1038                    "BlockwiseTermFitResult noise beta length mismatch: got {}, expected {}",
1039                    fit.block_states[1].beta.len(),
1040                    noise_design.design.ncols()
1041                ),
1042            }
1043            .into());
1044        }
1045        if fit.block_states[0].eta.len() != mean_design.design.nrows() {
1046            return Err(GamlssError::DimensionMismatch {
1047                reason: format!(
1048                    "BlockwiseTermFitResult mean eta length mismatch: got {}, expected {}",
1049                    fit.block_states[0].eta.len(),
1050                    mean_design.design.nrows()
1051                ),
1052            }
1053            .into());
1054        }
1055        if fit.block_states[1].eta.len() != noise_design.design.nrows() {
1056            return Err(GamlssError::DimensionMismatch {
1057                reason: format!(
1058                    "BlockwiseTermFitResult noise eta length mismatch: got {}, expected {}",
1059                    fit.block_states[1].eta.len(),
1060                    noise_design.design.nrows()
1061                ),
1062            }
1063            .into());
1064        }
1065
1066        Ok(Self {
1067            fit,
1068            meanspec_resolved,
1069            noisespec_resolved,
1070            mean_design,
1071            noise_design,
1072        })
1073    }
1074
1075    pub(crate) fn validate_numeric_finiteness(&self) -> Result<(), String> {
1076        Self::try_from_parts(BlockwiseTermFitResultParts {
1077            fit: self.fit.clone(),
1078            meanspec_resolved: self.meanspec_resolved.clone(),
1079            noisespec_resolved: self.noisespec_resolved.clone(),
1080            mean_design: self.mean_design.clone(),
1081            noise_design: self.noise_design.clone(),
1082        })
1083        .map(|_| ())
1084    }
1085}
1086
1087impl BlockwiseTermWiggleFitResult {
1088    pub(crate) fn try_from_parts(parts: BlockwiseTermWiggleFitResultParts) -> Result<Self, String> {
1089        let BlockwiseTermWiggleFitResultParts {
1090            fit,
1091            wiggle_knots,
1092            wiggle_degree,
1093        } = parts;
1094
1095        fit.validate_numeric_finiteness()
1096            .map_err(|e| e.to_string())?;
1097        if fit.fit.block_states.len() < 3 {
1098            return Err(GamlssError::DimensionMismatch {
1099                reason: format!(
1100                    "BlockwiseTermWiggleFitResult requires at least 3 block states, got {}",
1101                    fit.fit.block_states.len()
1102                ),
1103            }
1104            .into());
1105        }
1106        if wiggle_knots.is_empty() {
1107            return Err(GamlssError::UnsupportedConfiguration {
1108                reason: "BlockwiseTermWiggleFitResult requires non-empty wiggle_knots".to_string(),
1109            }
1110            .into());
1111        }
1112        validate_all_finite_estimation(
1113            "blockwise_term_wiggle.wiggle_knots",
1114            wiggle_knots.iter().copied(),
1115        )
1116        .map_err(|e| e.to_string())?;
1117
1118        Ok(Self {
1119            fit,
1120            wiggle_knots,
1121            wiggle_degree,
1122        })
1123    }
1124}
1125
1126pub struct BinomialLocationScaleFitResult {
1127    pub fit: BlockwiseTermFitResult,
1128    pub wiggle_knots: Option<Array1<f64>>,
1129    pub wiggle_degree: Option<usize>,
1130    pub beta_link_wiggle: Option<Vec<f64>>,
1131}
1132
1133pub struct GaussianLocationScaleFitResult {
1134    pub fit: BlockwiseTermFitResult,
1135    pub wiggle_knots: Option<Array1<f64>>,
1136    pub wiggle_degree: Option<usize>,
1137    pub beta_link_wiggle: Option<Vec<f64>>,
1138    /// Response standardization factor applied internally during fitting.
1139    ///
1140    /// The Gaussian location-scale path fits on `y / response_scale` so the
1141    /// fixed log-σ soft floor `LOGB_SIGMA_FLOOR = 0.01` is *operationally*
1142    /// scale-relative (1 % of the response spread) rather than absolute,
1143    /// keeping κ = dlogσ/dη ≈ 1 across the realistic σ range and informing the
1144    /// scale block like gamlss. The returned coefficient `blocks`, `beta`, and
1145    /// link-wiggle knots/coefficients are already mapped back to **raw response
1146    /// units** (the Location/Mean block scaled by `response_scale`, the Scale
1147    /// block intercept shifted by `+ln(response_scale)`), so downstream
1148    /// reconstruction `μ = X_mean·β` comes out in raw units with no further
1149    /// rescaling.
1150    ///
1151    /// The σ reconstruction, however, **must scale the floor too** to stay
1152    /// response-scale-equivariant (#884):
1153    ///
1154    /// ```text
1155    /// σ = response_scale·LOGB_SIGMA_FLOOR + exp(X_scale·β)
1156    ///   = response_scale·(LOGB_SIGMA_FLOOR + exp(η_internal)).
1157    /// ```
1158    ///
1159    /// The intercept shift carries only the `exp(η)` term; reconstructing with a
1160    /// raw `LOGB_SIGMA_FLOOR` instead of `response_scale·LOGB_SIGMA_FLOOR` leaves
1161    /// the non-equivariant residual `LOGB_SIGMA_FLOOR·(1 − response_scale)`.
1162    ///
1163    /// This field records the factor that was applied for transparency,
1164    /// covariance bookkeeping, and the equivariant σ-floor reconstruction; it is
1165    /// `1.0` when no standardization was needed (degenerate constant response).
1166    pub response_scale: f64,
1167}
1168
1169/// Fit the binomial mean link-wiggle model. Returns the γ-space fit and, when
1170/// the frozen-basis de-aliasing engaged (#1596), the standard-basis warp
1171/// coefficients `β_w = Z·γ` for the saved-model predict runtime.
1172pub(crate) fn fit_binomial_mean_wiggle(
1173    spec: BinomialMeanWiggleSpec,
1174    options: &BlockwiseFitOptions,
1175) -> Result<(UnifiedFitResult, Option<Vec<f64>>), String> {
1176    let n = spec.y.len();
1177    validate_len_match("weights vs y", n, spec.weights.len())?;
1178    validateweights(&spec.weights, "fit_binomial_mean_wiggle")?;
1179    validate_binomial_response(&spec.y, "fit_binomial_mean_wiggle")?;
1180    validate_blockrows("eta", n, &spec.eta_block)?;
1181    validate_blockrows("wiggle", n, &spec.wiggle_block)?;
1182    if matches!(
1183        spec.link_kind,
1184        InverseLink::Standard(StandardLink::Identity)
1185    ) {
1186        return Err(GamlssError::UnsupportedConfiguration {
1187            reason: "fit_binomial_mean_wiggle does not support identity link".to_string(),
1188        }
1189        .into());
1190    }
1191    gam_terms::inference::formula_dsl::require_binomial_inverse_link_supports_joint_wiggle(
1192        &spec.link_kind,
1193        "fit_binomial_mean_wiggle",
1194    )?;
1195    if spec.wiggle_degree < 2 {
1196        return Err(GamlssError::ConstraintViolation {
1197            reason: format!(
1198                "fit_binomial_mean_wiggle: wiggle_degree must be >= 2, got {}",
1199                spec.wiggle_degree
1200            ),
1201        }
1202        .into());
1203    }
1204    let minimum_knots = minimum_monotone_wiggle_knot_count(spec.wiggle_degree)?;
1205    if spec.wiggle_knots.len() < minimum_knots {
1206        return Err(GamlssError::DimensionMismatch { reason: format!(
1207            "fit_binomial_mean_wiggle: wiggle_knots length {} is too short for degree {} (need at least {})",
1208            spec.wiggle_knots.len(),
1209            spec.wiggle_degree,
1210            minimum_knots
1211        ) }.into());
1212    }
1213
1214    // ----- Frozen-basis Gauss-Newton link-warp fit (#1596) -----
1215    //
1216    // The warp basis `B(η)` is frozen at the current index `η̂` so that
1217    // `q = η + B(η̂)·β_w` is linear in `(β_η, β_w)` (`∂q/∂η = 1`). To keep the
1218    // mean block `X` full and identifiable we fit the warp in the *de-aliased*
1219    // coordinate `B̃ = B(η̂)·Z`, where `Z` spans `null(Xᵀ B(η̂))` — the warp
1220    // curvature orthogonal to the mean's column space (see
1221    // `BinomialMeanWiggleFamily::frozen_warp_design`). We re-freeze at the refit
1222    // `η̂` across a few outer iterations until it stops moving.
1223    let x_dense: Array2<f64> = spec.eta_block.design.to_dense();
1224    let pilot_eta: Array1<f64> = {
1225        let pilot_beta = spec.eta_block.initial_beta.clone().ok_or_else(|| {
1226            "fit_binomial_mean_wiggle: eta block carries no pilot β to seed the \
1227             frozen-basis warp index"
1228                .to_string()
1229        })?;
1230        if x_dense.ncols() != pilot_beta.len() {
1231            return Err(GamlssError::DimensionMismatch {
1232                reason: format!(
1233                    "fit_binomial_mean_wiggle: eta design has {} columns but pilot β has {} \
1234                     coefficients",
1235                    x_dense.ncols(),
1236                    pilot_beta.len()
1237                ),
1238            }
1239            .into());
1240        }
1241        let mut eta = x_dense.dot(&pilot_beta);
1242        if spec.eta_block.offset.len() == eta.len() {
1243            eta += &spec.eta_block.offset;
1244        }
1245        eta
1246    };
1247
1248    // Original (full-width) warp penalties / nullspace metadata, captured before
1249    // `spec.wiggle_block` is consumed. The de-aliased block carries `ZᵀSZ`.
1250    let wiggle_penalties_full = spec.wiggle_block.penalties.clone();
1251    let wiggle_log_lambdas = spec.wiggle_block.initial_log_lambdas.clone();
1252    let eta_block_input = spec.eta_block.clone();
1253
1254    let family = BinomialMeanWiggleFamily {
1255        y: spec.y,
1256        weights: spec.weights,
1257        link_kind: spec.link_kind,
1258        wiggle_knots: spec.wiggle_knots,
1259        wiggle_degree: spec.wiggle_degree,
1260        policy: gam_runtime::resource::ResourcePolicy::default_library(),
1261        frozen_warp_design: None,
1262    };
1263
1264    // Build the de-aliased warp block (`B̃ = B·Z`, penalties `ZᵀSZ`) at a frozen
1265    // index, returning the block input, the reducing transform `Z`, and `B̃`.
1266    let build_dealiased = |frozen: &Array1<f64>| -> Result<
1267        (ParameterBlockInput, Array2<f64>, std::sync::Arc<Array2<f64>>),
1268        String,
1269    > {
1270        let b_full = family.wiggle_design(frozen.view())?;
1271        let btx = b_full.t().dot(&x_dense);
1272        let (z, _rank) = gam_linalg::faer_ndarray::rrqr_nullspace_basis(&btx, 1.0e3)
1273            .map_err(|e| format!("frozen-basis warp de-aliasing null-space failed: {e}"))?;
1274        if z.ncols() == 0 {
1275            return Err("frozen-basis warp de-aliasing left no identifiable warp \
1276                        direction (the mean block already spans the warp)"
1277                .to_string());
1278        }
1279        let bda = b_full.dot(&z);
1280        let penalties: Vec<crate::model_types::PenaltySpec> = wiggle_penalties_full
1281            .iter()
1282            .map(|p| {
1283                let s = penalty_spec_to_dense(p, b_full.ncols())?;
1284                Ok(crate::model_types::PenaltySpec::Dense(z.t().dot(&s).dot(&z)))
1285            })
1286            .collect::<Result<_, String>>()?;
1287        let q = bda.ncols();
1288        let block = ParameterBlockInput {
1289            design: DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(bda.clone())),
1290            offset: Array1::zeros(frozen.len()),
1291            penalties,
1292            nullspace_dims: vec![],
1293            initial_log_lambdas: wiggle_log_lambdas.clone(),
1294            initial_beta: Some(Array1::zeros(q)),
1295        };
1296        Ok((block, z, std::sync::Arc::new(bda)))
1297    };
1298
1299    // True iff the warped link `q = η + B(η)·β_w` is strictly increasing across
1300    // `[lo, hi]` (`dq/dη = 1 + B'(η)·β_w > 0`), evaluated on a dense grid.
1301    let link_monotone = |beta_w: &[f64], lo: f64, hi: f64| -> Result<bool, String> {
1302        if !(lo.is_finite() && hi.is_finite()) || hi <= lo {
1303            return Ok(true);
1304        }
1305        let grid = Array1::linspace(lo, hi, 257);
1306        let d1 = crate::wiggle::monotone_wiggle_basis_with_derivative_order(
1307            grid.view(),
1308            &family.wiggle_knots,
1309            family.wiggle_degree,
1310            1,
1311        )?;
1312        let beta = Array1::from_vec(beta_w.to_vec());
1313        if d1.ncols() != beta.len() {
1314            return Ok(false);
1315        }
1316        let dq = d1.dot(&beta) + 1.0;
1317        Ok(dq.iter().all(|v| *v > 0.0))
1318    };
1319
1320    // Outer Gauss-Newton / backfitting loop over the frozen warp index. One
1321    // iteration already recovers the warp; a few more align `η̂` with the refit
1322    // `η` so the in-sample warp matches what `predict` reconstructs.
1323    const MAX_FROZEN_OUTER: usize = 6;
1324    const FROZEN_ETA_TOL: f64 = 1e-5;
1325    let mut frozen_eta = pilot_eta;
1326    let mut last_fit: Option<UnifiedFitResult> = None;
1327    let mut last_reduction: Option<Array2<f64>> = None;
1328    for _outer in 0..MAX_FROZEN_OUTER {
1329        let (wiggle_block, z, bda) = build_dealiased(&frozen_eta)?;
1330        let blocks = vec![
1331            eta_block_input.clone().intospec("eta")?,
1332            wiggle_block.intospec("wiggle")?,
1333        ];
1334        let mut fam = family.clone();
1335        fam.frozen_warp_design = Some(bda);
1336        let fit = fit_custom_family(&fam, &blocks, options).map_err(|e| e.to_string())?;
1337        let new_eta = fit
1338            .block_states
1339            .get(BinomialMeanWiggleFamily::BLOCK_ETA)
1340            .map(|state| state.eta.clone())
1341            .filter(|eta| eta.len() == frozen_eta.len())
1342            .ok_or_else(|| {
1343                "fit_binomial_mean_wiggle: frozen-basis refit did not expose a fitted eta block"
1344                    .to_string()
1345            })?;
1346        // The fit stays entirely in the reduced, identifiable coordinate `γ`:
1347        // the result (top-level `beta`, role-tagged `blocks`, `block_states`,
1348        // penalized Hessian, and covariance) is full rank and self-consistent at
1349        // width `p_eta + q`. The widened standard-basis warp coefficients
1350        // `β_w = Z·γ` form a rank-deficient over-parametrization (the two
1351        // mean-aliased directions carry no curvature), so they are NOT folded
1352        // back into the result; instead `Z` is returned out-of-band and the
1353        // saved model stores `β_w = Z·γ` for the predict runtime, which
1354        // reconstructs the warp from the full-width basis as `B(η_new)·β_w`.
1355        last_reduction = Some(z.clone());
1356        let scale = 1.0
1357            + frozen_eta
1358                .iter()
1359                .map(|v: &f64| v.abs())
1360                .fold(0.0_f64, f64::max);
1361        let delta = new_eta
1362            .iter()
1363            .zip(frozen_eta.iter())
1364            .map(|(a, b)| (a - b).abs())
1365            .fold(0.0_f64, f64::max);
1366        last_fit = Some(fit);
1367        if delta <= FROZEN_ETA_TOL * scale {
1368            break;
1369        }
1370        frozen_eta = new_eta;
1371    }
1372    let mut fit = last_fit.ok_or_else(|| {
1373        "fit_binomial_mean_wiggle: frozen-basis outer loop produced no fit".to_string()
1374    })?;
1375    // Widen the reduced warp coefficient `γ` to the standard I-spline basis
1376    // `β_w = Z·γ` for the saved-model predict runtime (which reconstructs the
1377    // warp from the full-width basis).
1378    let warp_beta_from = |fit: &UnifiedFitResult, z: &Array2<f64>| -> Option<Vec<f64>> {
1379        fit.block_states
1380            .get(BinomialMeanWiggleFamily::BLOCK_WIGGLE)
1381            .filter(|state| state.beta.len() == z.ncols())
1382            .map(|state| z.dot(&state.beta).to_vec())
1383    };
1384    let mut saved_warp_beta = match last_reduction.as_ref() {
1385        Some(z) => warp_beta_from(&fit, z),
1386        None => None,
1387    };
1388
1389    // Monotone-link guarantee (#1596). The reduced curvature fit is
1390    // unconstrained, so the REML-selected warp can turn the link non-monotone
1391    // (non-invertible) over the fitted predictor range. The warp NESTS the base
1392    // link at λ → ∞ (`γ → 0`, `dq/dη ≡ 1`), so escalating the warp's smoothing
1393    // always reaches a strictly-increasing link while preserving as much of the
1394    // learned curvature as monotonicity allows. We fixed-λ re-fit at the
1395    // converged frozen index with a geometrically increasing penalty until the
1396    // link is monotone — a monotonicity-driven smoothing floor, not a hard
1397    // active-set constraint (which the coupled `Z·γ ≥ 0` inequality would
1398    // reintroduce). The base link itself is the λ → ∞ fixed point, so this
1399    // terminates.
1400    if last_reduction.is_some() {
1401        let lo = frozen_eta.iter().copied().fold(f64::INFINITY, f64::min);
1402        let hi = frozen_eta.iter().copied().fold(f64::NEG_INFINITY, f64::max);
1403        let already_monotone = saved_warp_beta
1404            .as_ref()
1405            .map(|bw| link_monotone(bw, lo, hi))
1406            .transpose()?
1407            .unwrap_or(true);
1408        if !already_monotone {
1409            let eta_penalty_count = eta_block_input.penalties.len();
1410            let reml_log = fit.log_lambdas.clone();
1411            const MAX_MONO_STEPS: usize = 16;
1412            const MONO_LOG_LAMBDA_STEP: f64 = 0.75;
1413            for step in 1..=MAX_MONO_STEPS {
1414                let bump = MONO_LOG_LAMBDA_STEP * step as f64;
1415                let (mut wb, z2, bda2) = build_dealiased(&frozen_eta)?;
1416                let wlen = wb.penalties.len();
1417                let wiggle_base: Array1<f64> = if reml_log.len() >= eta_penalty_count + wlen {
1418                    reml_log
1419                        .slice(s![eta_penalty_count..eta_penalty_count + wlen])
1420                        .to_owned()
1421                } else {
1422                    Array1::zeros(wlen)
1423                };
1424                wb.initial_log_lambdas = Some(wiggle_base.mapv(|v| v + bump));
1425                let mut eta_in = eta_block_input.clone();
1426                if reml_log.len() >= eta_penalty_count && eta_penalty_count > 0 {
1427                    eta_in.initial_log_lambdas =
1428                        Some(reml_log.slice(s![0..eta_penalty_count]).to_owned());
1429                }
1430                let blocks = vec![eta_in.intospec("eta")?, wb.intospec("wiggle")?];
1431                let mut fam = family.clone();
1432                fam.frozen_warp_design = Some(bda2);
1433                let refit = crate::custom_family::fit_custom_family_fixed_log_lambdas(
1434                    &fam, &blocks, options, None, 0, None, true,
1435                )
1436                .map_err(|e| e.to_string())?;
1437                let refit_beta = warp_beta_from(&refit, &z2);
1438                let monotone = refit_beta
1439                    .as_ref()
1440                    .map(|bw| link_monotone(bw, lo, hi))
1441                    .transpose()?
1442                    .unwrap_or(true);
1443                if monotone || step == MAX_MONO_STEPS {
1444                    fit = refit;
1445                    saved_warp_beta = refit_beta;
1446                    break;
1447                }
1448            }
1449            // Final certification: the λ → ∞ base link is monotone, so this
1450            // should hold; surface loudly if a pathological basis defeats it.
1451            let final_monotone = saved_warp_beta
1452                .as_ref()
1453                .map(|bw| link_monotone(bw, lo, hi))
1454                .transpose()?
1455                .unwrap_or(true);
1456            if !final_monotone {
1457                return Err("binomial flexible link could not be smoothed to a monotone \
1458                            (invertible) link over the fitted predictor range"
1459                    .to_string());
1460            }
1461        }
1462    }
1463    Ok((fit, saved_warp_beta))
1464}
1465
1466/// Densify a wiggle-block penalty spec to its full `p×p` matrix for the
1467/// `ZᵀSZ` de-aliasing transform (#1596). The link-warp block carries only
1468/// `Dense`/`DenseWithMean` difference (and optional ridge) penalties.
1469fn penalty_spec_to_dense(
1470    spec: &crate::model_types::PenaltySpec,
1471    p: usize,
1472) -> Result<Array2<f64>, String> {
1473    use crate::model_types::PenaltySpec;
1474    match spec {
1475        PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
1476            if m.nrows() != p || m.ncols() != p {
1477                return Err(format!(
1478                    "frozen-basis warp penalty must be {p}x{p}, got {}x{}",
1479                    m.nrows(),
1480                    m.ncols()
1481                ));
1482            }
1483            Ok(m.clone())
1484        }
1485        PenaltySpec::Block {
1486            local, col_range, ..
1487        } => {
1488            let mut full = Array2::<f64>::zeros((p, p));
1489            if col_range.end > p || local.nrows() != col_range.len() {
1490                return Err("frozen-basis warp penalty block range out of bounds".to_string());
1491            }
1492            full.slice_mut(s![col_range.clone(), col_range.clone()])
1493                .assign(local);
1494            Ok(full)
1495        }
1496    }
1497}
1498
1499pub(crate) trait LocationScaleFamilyBuilder {
1500    type Family: CustomFamily + Clone + Send + Sync + 'static;
1501
1502    fn meanspec(&self) -> &TermCollectionSpec;
1503    fn noisespec(&self) -> &TermCollectionSpec;
1504
1505    fn build_blocks(
1506        &self,
1507        theta: &Array1<f64>,
1508        mean_design: &TermCollectionDesign,
1509        noise_design: &TermCollectionDesign,
1510        mean_beta_hint: Option<Array1<f64>>,
1511        noise_beta_hint: Option<Array1<f64>>,
1512    ) -> Result<Vec<ParameterBlockSpec>, String>;
1513
1514    fn build_family(
1515        &self,
1516        mean_design: &TermCollectionDesign,
1517        noise_design: &TermCollectionDesign,
1518    ) -> Self::Family;
1519
1520    fn extract_primary_betas(
1521        &self,
1522        fit: &UnifiedFitResult,
1523    ) -> Result<(Array1<f64>, Array1<f64>), String>;
1524
1525    fn mean_penalty_count(&self, mean_design: &TermCollectionDesign) -> usize {
1526        mean_design.penalties.len()
1527    }
1528
1529    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
1530        noise_design.penalties.len()
1531    }
1532
1533    fn exact_spatial_joint_supported(&self) -> bool {
1534        false
1535    }
1536
1537    fn require_exact_spatial_joint(&self) -> bool {
1538        false
1539    }
1540
1541    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
1542        crate::seeding::SeedRiskProfile::GeneralizedLinear
1543    }
1544
1545    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
1546        Ok(Array1::zeros(0))
1547    }
1548
1549    fn build_psiderivative_blocks(
1550        &self,
1551        arr: ndarray::ArrayView2<'_, f64>,
1552        term_spec: &TermCollectionSpec,
1553        term_spec2: &TermCollectionSpec,
1554        term_design: &TermCollectionDesign,
1555        term_design2: &TermCollectionDesign,
1556    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String>;
1557}
1558
1559pub(crate) fn fit_location_scale_terms<B: LocationScaleFamilyBuilder>(
1560    data: ndarray::ArrayView2<'_, f64>,
1561    builder: B,
1562    options: &BlockwiseFitOptions,
1563    kappa_options: &SpatialLengthScaleOptimizationOptions,
1564) -> Result<BlockwiseTermFitResult, String> {
1565    // Large-n location-scale fits keep the caller's explicit Hessian request.
1566    // The unified REML evaluator chooses a dense or matrix-free exact
1567    // representation from the realized (n, p, K) work model, so there is no
1568    // large-scale downgrade to BFGS here.
1569
1570    let mut mean_beta_hint: Option<Array1<f64>> = None;
1571    let mut noise_beta_hint: Option<Array1<f64>> = None;
1572    let extra_rho0 = builder.extra_rho0()?;
1573
1574    let mean_boot_design =
1575        build_term_collection_design(data, builder.meanspec()).map_err(|e| e.to_string())?;
1576    let noise_boot_design =
1577        build_term_collection_design(data, builder.noisespec()).map_err(|e| e.to_string())?;
1578    let mean_bootspec = freeze_term_collection_from_design(builder.meanspec(), &mean_boot_design)
1579        .map_err(|e| e.to_string())?;
1580    let noise_bootspec =
1581        freeze_term_collection_from_design(builder.noisespec(), &noise_boot_design)
1582            .map_err(|e| e.to_string())?;
1583
1584    let require_exact_spatial_joint = builder.require_exact_spatial_joint();
1585    let analytic_joint_derivatives_check = if builder.exact_spatial_joint_supported() {
1586        builder
1587            .build_psiderivative_blocks(
1588                data,
1589                &mean_bootspec,
1590                &noise_bootspec,
1591                &mean_boot_design,
1592                &noise_boot_design,
1593            )
1594            .map(|_| ())
1595    } else {
1596        Err(
1597            "analytic spatial psi derivatives are unavailable for this location-scale family"
1598                .to_string(),
1599        )
1600    };
1601    let analytic_joint_derivatives_available = analytic_joint_derivatives_check.is_ok();
1602    if require_exact_spatial_joint {
1603        analytic_joint_derivatives_check.map_err(|err| {
1604            format!("exact two-block spatial path requires analytic psi derivatives: {err}")
1605        })?;
1606    }
1607    let mean_penalty_count = builder.mean_penalty_count(&mean_boot_design);
1608    let noise_penalty_count = builder.noise_penalty_count(&noise_boot_design);
1609
1610    // Honor an explicit user-supplied `length_scale=X` on every spatial term
1611    // in both the mean and noise blocks: when every term is κ-locked (no
1612    // anisotropy, no per-axis ψ contrasts), the joint-spatial outer optimizer
1613    // has nothing to optimize. Routing through it anyway wraps the full
1614    // two-block coefficient solve inside an unnecessary outer loop where
1615    // each evaluation runs the inner Newton from scratch. This is the same
1616    // short-circuit the Bernoulli marginal-slope entry point performs at
1617    // bernoulli_marginal_slope.rs:16432-16442; mirroring it here makes the
1618    // GAMLSS path skip straight to the `(!enabled || log_kappa_dim == 0)`
1619    // fast path in `optimize_spatial_length_scale_exact_joint`.
1620    let mut effective_kappa_options = kappa_options.clone();
1621    if effective_kappa_options.enabled
1622        && gam_terms::smooth::all_spatial_terms_kappa_fixed(&mean_bootspec)
1623        && gam_terms::smooth::all_spatial_terms_kappa_fixed(&noise_bootspec)
1624    {
1625        log::info!(
1626            "[GAMLSS spatial] disabling κ/ψ optimization: every spatial term in \
1627             both blocks has an explicit length_scale and no anisotropy; \
1628             user-supplied kernel scale is fixed"
1629        );
1630        effective_kappa_options.enabled = false;
1631    }
1632    let kappa_options: &SpatialLengthScaleOptimizationOptions = &effective_kappa_options;
1633
1634    // Macro to invoke the exact-joint spatial optimizer with shared closures.
1635    // The exact path evaluates the full profiled/Laplace objective over
1636    // theta = [rho, psi] with the real joint Hessian required by NewtonTR/ARC.
1637    macro_rules! run_exact_joint_spatial {
1638        () => {{
1639            let joint_setup = build_two_block_exact_joint_setup(
1640                data,
1641                builder.meanspec(),
1642                builder.noisespec(),
1643                mean_penalty_count,
1644                noise_penalty_count,
1645                extra_rho0.as_slice().unwrap_or(&[]),
1646                None,
1647                kappa_options,
1648            );
1649            let mean_terms = spatial_length_scale_term_indices(builder.meanspec());
1650            let noise_terms = spatial_length_scale_term_indices(builder.noisespec());
1651            let mean_beta_hint_cell = std::cell::RefCell::new(mean_beta_hint.clone());
1652            let noise_beta_hint_cell = std::cell::RefCell::new(noise_beta_hint.clone());
1653            let hyper_warm_start_cell =
1654                std::cell::RefCell::new(None::<CustomFamilyWarmStart>);
1655            // Two-block GAMLSS/location-scale joint likelihoods have a
1656            // β-dependent cross-block Hessian (the (μ,log σ) / (t,log σ)
1657            // off-diagonal blocks involve residual/response scalars that
1658            // shift when β moves). The Wood-Fasiolo structural property
1659            // `H^{-1/2} B_k H^{-1/2} ≽ 0` plus parameter-independent
1660            // nullspace — the mathematical basis for EFS convergence —
1661            // fails here, so EFS/HybridEFS must be excluded at plan time
1662            // rather than retried as a silent first attempt that stalls
1663            // for hundreds of seconds before the runner falls back.
1664            let gamlss_disable_fixed_point = true;
1665            let outer_policy = {
1666                // GAMLSS spatial path: psi_dim = log_kappa_dim + auxiliary_dim,
1667                // matching the (theta_dim - rho_dim) decomposition the
1668                // optimizer uses internally. Build realized ParameterBlockSpecs
1669                // at the seed rho so the family's own cost model — which
1670                // multiplies coefficient-gradient / coefficient-Hessian
1671                // per-row cost by the joint outer-coordinate dimension and
1672                // total p — produces honest `predicted_*_work` estimates.
1673                // Previously this fed `predicted_*_work: 0` to the planner,
1674                // which then ungated dense outer Hessian work that costs
1675                // hundreds of seconds per eval at large scale (see
1676                // `OuterDerivativePolicy::OUTER_HESSIAN_WORK_BUDGET`).
1677                let theta_seed = joint_setup.theta0();
1678                let rho_dim = joint_setup.rho_dim();
1679                let psi_dim = theta_seed.len() - rho_dim;
1680                let rho_seed = theta_seed.slice(s![..rho_dim]).to_owned();
1681                let policy_blocks_res = builder.build_blocks(
1682                    &rho_seed,
1683                    &mean_boot_design,
1684                    &noise_boot_design,
1685                    mean_beta_hint_cell.borrow().clone(),
1686                    noise_beta_hint_cell.borrow().clone(),
1687                );
1688                let mut policy = match policy_blocks_res {
1689                    Ok(policy_blocks) => {
1690                        let policy_family =
1691                            builder.build_family(&mean_boot_design, &noise_boot_design);
1692                        crate::custom_family::CustomFamily::outer_derivative_policy(
1693                            &policy_family,
1694                            &policy_blocks,
1695                            psi_dim,
1696                            options,
1697                        )
1698                    }
1699                    Err(err) => {
1700                        // Block construction at the seed should not fail for
1701                        // any in-tree family, but if it does, fall back to a
1702                        // policy that names the capability honestly and
1703                        // declines to predict cost. Setting work to
1704                        // `u128::MAX` routes the planner through gradient-only
1705                        // BFGS (the universal Hessian-work budget is
1706                        // saturating, so a sentinel is fine here).
1707                        log::warn!(
1708                            "[GAMLSS spatial] failed to realize policy blocks at seed rho ({err}); \
1709                             routing outer optimizer through gradient-only BFGS"
1710                        );
1711                        let capability = if analytic_joint_derivatives_available {
1712                            crate::custom_family::ExactOuterDerivativeOrder::Second
1713                        } else {
1714                            crate::custom_family::ExactOuterDerivativeOrder::First
1715                        };
1716                        crate::custom_family::OuterDerivativePolicy {
1717                            capability,
1718                            predicted_gradient_work: u128::MAX,
1719                            predicted_hessian_work: u128::MAX,
1720                            // No GAMLSS family today overrides its
1721                            // outer-only `_with_options` hooks to consume
1722                            // `outer_score_subsample`; staged-κ would
1723                            // build pilot masks the family then ignores.
1724                            subsample_capable: false,
1725                        }
1726                    }
1727                };
1728                if !analytic_joint_derivatives_available {
1729                    // Capability must not exceed what the analytic derivatives
1730                    // path can supply — the macro's hyper evaluator returns
1731                    // an error otherwise.
1732                    policy.capability =
1733                        crate::custom_family::ExactOuterDerivativeOrder::First;
1734                }
1735                policy
1736            };
1737            optimize_spatial_length_scale_exact_joint(
1738                data,
1739                &[builder.meanspec().clone(), builder.noisespec().clone()],
1740                &[mean_terms, noise_terms],
1741                kappa_options,
1742                &joint_setup,
1743                builder.exact_spatial_seed_risk_profile(),
1744                analytic_joint_derivatives_available,
1745                analytic_joint_derivatives_available,
1746                gamlss_disable_fixed_point,
1747                None,
1748                outer_policy,
1749                |theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
1750                    assert_eq!(
1751                        specs.len(),
1752                        2,
1753                        "joint spatial closure expects exactly two block specs (mean, noise); got {}",
1754                        specs.len(),
1755                    );
1756                    assert_eq!(
1757                        designs.len(),
1758                        2,
1759                        "joint spatial closure expects exactly two block designs (mean, noise); got {}",
1760                        designs.len(),
1761                    );
1762                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
1763                    let fit = {
1764                        let blocks = builder.build_blocks(
1765                            &rho,
1766                            &designs[0],
1767                            &designs[1],
1768                            mean_beta_hint_cell.borrow().clone(),
1769                            noise_beta_hint_cell.borrow().clone(),
1770                        )?;
1771                        if mean_beta_hint_cell.borrow().is_none()
1772                            && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
1773                        {
1774                            *mean_beta_hint_cell.borrow_mut() = Some(beta);
1775                        }
1776                        if noise_beta_hint_cell.borrow().is_none()
1777                            && let Some(beta) =
1778                                blocks.get(1).and_then(|block| block.initial_beta.clone())
1779                        {
1780                            *noise_beta_hint_cell.borrow_mut() = Some(beta);
1781                        }
1782                        let family = builder.build_family(&designs[0], &designs[1]);
1783                        // Branch on whether the κ optimizer drives rho.
1784                        //
1785                        // * `log_kappa_dim() > 0 && kappa_options.enabled` ⇒
1786                        //   the outer (ρ, ψ) optimizer is active and
1787                        //   passes each candidate ρ to this closure;
1788                        //   the inner fit must hold log-lambdas fixed
1789                        //   at the supplied ρ so the outer derivative
1790                        //   has a well-defined directional gradient.
1791                        //
1792                        // * Otherwise (κ disabled via the locked-κ
1793                        //   short-circuit, or no spatial terms at all)
1794                        //   the fast path in
1795                        //   `optimize_spatial_length_scale_exact_joint`
1796                        //   calls this closure exactly once at
1797                        //   `theta = theta0`; ρ must still be optimized
1798                        //   from data because the user never pinned it.
1799                        //   `fit_custom_family` performs the joint
1800                        //   ρ + coefficient REML fit at the user's
1801                        //   (now-fixed) kernel scale, which is the
1802                        //   intended behaviour when `length_scale=…` is
1803                        //   set on every spatial term.
1804                        if joint_setup.log_kappa_dim() > 0 && kappa_options.enabled {
1805                            let warm_start = hyper_warm_start_cell.borrow().clone();
1806                            fit_custom_family_fixed_log_lambdas(
1807                                &family,
1808                                &blocks,
1809                                options,
1810                                warm_start.as_ref(),
1811                                0,
1812                                None,
1813                                true,
1814                            )?
1815                        } else {
1816                            fit_custom_family(&family, &blocks, options)?
1817                        }
1818                    };
1819                    let (mean_beta, noise_beta) = builder.extract_primary_betas(&fit)?;
1820                    mean_beta_hint = Some(mean_beta);
1821                    noise_beta_hint = Some(noise_beta);
1822                    *mean_beta_hint_cell.borrow_mut() = mean_beta_hint.clone();
1823                    *noise_beta_hint_cell.borrow_mut() = noise_beta_hint.clone();
1824                    Ok(fit)
1825                },
1826                |theta,
1827                 specs: &[TermCollectionSpec],
1828                 designs: &[TermCollectionDesign],
1829                 eval_mode,
1830                 row_set: &crate::row_kernel::RowSet| {
1831                    use gam_problem::EvalMode;
1832                    if !analytic_joint_derivatives_available {
1833                        return Err(
1834                            "analytic spatial psi derivatives are unavailable for this exact two-block path"
1835                                .to_string(),
1836                        );
1837                    }
1838                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
1839                    let blocks = builder.build_blocks(
1840                        &rho,
1841                        &designs[0],
1842                        &designs[1],
1843                        mean_beta_hint_cell.borrow().clone(),
1844                        noise_beta_hint_cell.borrow().clone(),
1845                    )?;
1846                    if mean_beta_hint_cell.borrow().is_none()
1847                        && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
1848                    {
1849                        *mean_beta_hint_cell.borrow_mut() = Some(beta);
1850                    }
1851                    if noise_beta_hint_cell.borrow().is_none()
1852                        && let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
1853                    {
1854                        *noise_beta_hint_cell.borrow_mut() = Some(beta);
1855                    }
1856                    let family = builder.build_family(&designs[0], &designs[1]);
1857                    let psiderivative_blocks = builder.build_psiderivative_blocks(
1858                        data,
1859                        &specs[0],
1860                        &specs[1],
1861                        &designs[0],
1862                        &designs[1],
1863                    )?;
1864                    let warm_start = hyper_warm_start_cell.borrow().clone();
1865                    // Forward the κ-staging row set to the family by installing it
1866                    // on the canonical `outer_score_subsample` option. Inner-PIRLS
1867                    // and final covariance still run on full data (the per-row
1868                    // weight is consulted only by outer-only paths inside the
1869                    // family). When the staging schedule is full-data the option
1870                    // stays `None` and the call is equivalent to the prior path.
1871                    let eval_options = match row_set {
1872                        crate::row_kernel::RowSet::All => {
1873                            std::borrow::Cow::Borrowed(options)
1874                        }
1875                        crate::row_kernel::RowSet::Subsample {
1876                            rows,
1877                            n_full,
1878                        } => {
1879                            let subsample = crate::outer_subsample::
1880                                OuterScoreSubsample::from_weighted_rows(
1881                                    (**rows).clone(),
1882                                    *n_full,
1883                                    *n_full as u64,
1884                                );
1885                            let mut cloned = options.clone();
1886                            cloned.outer_score_subsample =
1887                                Some(std::sync::Arc::new(subsample));
1888                            std::borrow::Cow::Owned(cloned)
1889                        }
1890                    };
1891                    let eval = evaluate_custom_family_joint_hyper(
1892                        &family,
1893                        &blocks,
1894                        eval_options.as_ref(),
1895                        &rho,
1896                        &psiderivative_blocks,
1897                        warm_start.as_ref(),
1898                        eval_mode,
1899                    )?;
1900                    *hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
1901                    if !eval.inner_converged {
1902                        return Err(
1903                            "exact two-block spatial inner solve did not converge".to_string(),
1904                        );
1905                    }
1906                    if matches!(eval_mode, EvalMode::ValueGradientHessian)
1907                        && !eval.outer_hessian.is_analytic()
1908                    {
1909                        return Err(
1910                            "exact two-block spatial objective requires a full joint [rho, psi] hessian"
1911                                .to_string(),
1912                        );
1913                    }
1914                    Ok((eval.objective, eval.gradient, eval.outer_hessian))
1915                },
1916                |theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
1917                    if !analytic_joint_derivatives_available {
1918                        return Err(
1919                            "analytic spatial psi derivatives are unavailable for this exact two-block path"
1920                                .to_string(),
1921                        );
1922                    }
1923                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
1924                    let blocks = builder.build_blocks(
1925                        &rho,
1926                        &designs[0],
1927                        &designs[1],
1928                        mean_beta_hint_cell.borrow().clone(),
1929                        noise_beta_hint_cell.borrow().clone(),
1930                    )?;
1931                    if mean_beta_hint_cell.borrow().is_none()
1932                        && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
1933                    {
1934                        *mean_beta_hint_cell.borrow_mut() = Some(beta);
1935                    }
1936                    if noise_beta_hint_cell.borrow().is_none()
1937                        && let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
1938                    {
1939                        *noise_beta_hint_cell.borrow_mut() = Some(beta);
1940                    }
1941                    let family = builder.build_family(&designs[0], &designs[1]);
1942                    let psiderivative_blocks = builder.build_psiderivative_blocks(
1943                        data,
1944                        &specs[0],
1945                        &specs[1],
1946                        &designs[0],
1947                        &designs[1],
1948                    )?;
1949                    let warm_start = hyper_warm_start_cell.borrow().clone();
1950                    let eval = evaluate_custom_family_joint_hyper_efs(
1951                        &family,
1952                        &blocks,
1953                        options,
1954                        &rho,
1955                        &psiderivative_blocks,
1956                        warm_start.as_ref(),
1957                    )?;
1958                    *hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
1959                    if !eval.inner_converged {
1960                        return Err(
1961                            "exact two-block spatial EFS inner solve did not converge".to_string(),
1962                        );
1963                    }
1964                    Ok(eval.efs_eval)
1965                },
1966                |_beta: &Array1<f64>| Ok(gam_solve::rho_optimizer::SeedOutcome::NoSlot),
1967            )
1968        }};
1969    }
1970
1971    let mut solved = run_exact_joint_spatial!()
1972        .map_err(|err| format!("exact two-block spatial optimization failed: {err}"))?;
1973
1974    let expected_noise_penalty_count = builder.noise_penalty_count(&solved.designs[1]);
1975    let actual_noise_penalty_count = solved.designs[1].penalties.len();
1976    if expected_noise_penalty_count > actual_noise_penalty_count {
1977        if expected_noise_penalty_count != actual_noise_penalty_count + 1 {
1978            return Err(GamlssError::UnsupportedConfiguration {
1979                reason: format!(
1980                    "location-scale result noise design expected {} penalties after augmentation, got {} before augmentation",
1981                    expected_noise_penalty_count, actual_noise_penalty_count
1982                ),
1983            }
1984            .into());
1985        }
1986        append_binomial_log_sigma_shrinkage_penalty_design(&mut solved.designs[1]);
1987    }
1988
1989    BlockwiseTermFitResult::try_from_parts(BlockwiseTermFitResultParts {
1990        fit: solved.fit,
1991        meanspec_resolved: solved.resolved_specs.remove(0),
1992        noisespec_resolved: solved.resolved_specs.remove(0),
1993        mean_design: solved.designs.remove(0),
1994        noise_design: solved.designs.remove(0),
1995    })
1996}
1997
1998pub(crate) struct GaussianLocationScaleTermBuilder {
1999    pub(crate) y: Array1<f64>,
2000    pub(crate) weights: Array1<f64>,
2001    pub(crate) meanspec: TermCollectionSpec,
2002    pub(crate) noisespec: TermCollectionSpec,
2003    pub(crate) mean_offset: Array1<f64>,
2004    pub(crate) noise_offset: Array1<f64>,
2005}
2006
2007impl LocationScaleFamilyBuilder for GaussianLocationScaleTermBuilder {
2008    type Family = GaussianLocationScaleFamily;
2009
2010    fn meanspec(&self) -> &TermCollectionSpec {
2011        &self.meanspec
2012    }
2013
2014    fn noisespec(&self) -> &TermCollectionSpec {
2015        &self.noisespec
2016    }
2017
2018    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2019        // Mirror the Binomial location-scale path: the log-sigma (scale)
2020        // block carries an extra nullspace shrinkage penalty so its
2021        // polynomial nullspace (constant log-sigma, plus the linear term for
2022        // tp/Duchon bases) is not left unpenalized. Without it, outer REML
2023        // optimizes lambda_sigma on a flat/ill-conditioned surface, which can
2024        // flatten the scale envelope (bad Pearson/CRPS/PIT/NLL) and diverge
2025        // the coupled inner Newton (log_sigma residual blows up, beta ->
2026        // infinity). The strength of this shrinkage is REML-selected.
2027        noise_design.penalties.len() + 1
2028    }
2029
2030    fn exact_spatial_joint_supported(&self) -> bool {
2031        true
2032    }
2033
2034    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
2035        crate::seeding::SeedRiskProfile::GaussianLocationScale
2036    }
2037
2038    fn build_blocks(
2039        &self,
2040        theta: &Array1<f64>,
2041        mean_design: &TermCollectionDesign,
2042        noise_design: &TermCollectionDesign,
2043        mean_beta_hint: Option<Array1<f64>>,
2044        noise_beta_hint: Option<Array1<f64>>,
2045    ) -> Result<Vec<ParameterBlockSpec>, String> {
2046        let layout = GamlssLambdaLayout::two_block(
2047            mean_design.penalties.len(),
2048            self.noise_penalty_count(noise_design),
2049        );
2050        layout.validate_theta_len(theta.len(), "gaussian location-scale")?;
2051        let (meanspec, noisespec) = build_gaussian_mean_and_scale_blocks(
2052            &self.y,
2053            &self.weights,
2054            mean_design,
2055            noise_design,
2056            &self.mean_offset,
2057            &self.noise_offset,
2058            layout.mean_from(theta),
2059            layout.noise_from(theta),
2060            mean_beta_hint,
2061            noise_beta_hint,
2062            "GaussianLocationScale::build_blocks",
2063        )?;
2064        Ok(vec![meanspec, noisespec])
2065    }
2066
2067    fn build_family(
2068        &self,
2069        mean_design: &TermCollectionDesign,
2070        noise_design: &TermCollectionDesign,
2071    ) -> Self::Family {
2072        let preparednoise_design =
2073            prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)
2074                .expect("prepared Gaussian log-sigma design should match block construction");
2075        GaussianLocationScaleFamily {
2076            y: self.y.clone(),
2077            weights: self.weights.clone(),
2078            mu_design: Some(mean_design.design.clone()),
2079            log_sigma_design: Some(preparednoise_design),
2080            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2081            cached_row_scalars: std::sync::RwLock::new(None),
2082        }
2083    }
2084
2085    fn extract_primary_betas(
2086        &self,
2087        fit: &UnifiedFitResult,
2088    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2089        let mean_beta = fit
2090            .block_states
2091            .get(GaussianLocationScaleFamily::BLOCK_MU)
2092            .ok_or_else(|| "missing Gaussian mu block state".to_string())?
2093            .beta
2094            .clone();
2095        let noise_beta = fit
2096            .block_states
2097            .get(GaussianLocationScaleFamily::BLOCK_LOG_SIGMA)
2098            .ok_or_else(|| "missing Gaussian log_sigma block state".to_string())?
2099            .beta
2100            .clone();
2101        Ok((mean_beta, noise_beta))
2102    }
2103
2104    fn build_psiderivative_blocks(
2105        &self,
2106        data: ndarray::ArrayView2<'_, f64>,
2107        meanspec_resolved: &TermCollectionSpec,
2108        noisespec_resolved: &TermCollectionSpec,
2109        mean_design: &TermCollectionDesign,
2110        noise_design: &TermCollectionDesign,
2111    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2112        let mean_derivs =
2113            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2114                .ok_or_else(|| "missing Gaussian mean spatial psi derivatives".to_string())?;
2115        let noise_derivs =
2116            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2117                .ok_or_else(|| "missing Gaussian log-sigma spatial psi derivatives".to_string())?;
2118        Ok(vec![mean_derivs, noise_derivs])
2119    }
2120}
2121
2122pub(crate) struct GaussianLocationScaleWiggleTermBuilder {
2123    pub(crate) y: Array1<f64>,
2124    pub(crate) weights: Array1<f64>,
2125    pub(crate) meanspec: TermCollectionSpec,
2126    pub(crate) noisespec: TermCollectionSpec,
2127    pub(crate) mean_offset: Array1<f64>,
2128    pub(crate) noise_offset: Array1<f64>,
2129    pub(crate) wiggle_knots: Array1<f64>,
2130    pub(crate) wiggle_degree: usize,
2131    pub(crate) wiggle_block: ParameterBlockInput,
2132}
2133
2134impl LocationScaleFamilyBuilder for GaussianLocationScaleWiggleTermBuilder {
2135    type Family = GaussianLocationScaleWiggleFamily;
2136
2137    fn meanspec(&self) -> &TermCollectionSpec {
2138        &self.meanspec
2139    }
2140
2141    fn noisespec(&self) -> &TermCollectionSpec {
2142        &self.noisespec
2143    }
2144
2145    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2146        // Same nullspace log-sigma shrinkage penalty as the non-wiggle
2147        // Gaussian builder; see GaussianLocationScaleTermBuilder.
2148        noise_design.penalties.len() + 1
2149    }
2150
2151    fn exact_spatial_joint_supported(&self) -> bool {
2152        true
2153    }
2154
2155    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
2156        crate::seeding::SeedRiskProfile::GaussianLocationScale
2157    }
2158
2159    fn require_exact_spatial_joint(&self) -> bool {
2160        true
2161    }
2162
2163    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
2164        initial_log_lambdas_orzeros(&self.wiggle_block)
2165    }
2166
2167    fn build_blocks(
2168        &self,
2169        theta: &Array1<f64>,
2170        mean_design: &TermCollectionDesign,
2171        noise_design: &TermCollectionDesign,
2172        mean_beta_hint: Option<Array1<f64>>,
2173        noise_beta_hint: Option<Array1<f64>>,
2174    ) -> Result<Vec<ParameterBlockSpec>, String> {
2175        let layout = GamlssLambdaLayout::withwiggle(
2176            mean_design.penalties.len(),
2177            self.noise_penalty_count(noise_design),
2178            self.wiggle_block.penalties.len(),
2179        );
2180        layout.validate_theta_len(theta.len(), "gaussian location-scale wiggle")?;
2181        let (mut meanspec, mut noisespec) = build_gaussian_mean_and_scale_blocks(
2182            &self.y,
2183            &self.weights,
2184            mean_design,
2185            noise_design,
2186            &self.mean_offset,
2187            &self.noise_offset,
2188            layout.mean_from(theta),
2189            layout.noise_from(theta),
2190            mean_beta_hint,
2191            noise_beta_hint,
2192            "GaussianLocationScaleWiggle::build_blocks",
2193        )?;
2194        // Keep the dynamic full-width wiggle basis safe from a canonical-gauge
2195        // column drop: route the shared level/intercept alias onto the
2196        // column-reducible mean and log-sigma blocks by giving them a lower
2197        // gauge priority than the wiggle block's fixed 100 (see the binomial
2198        // wiggle path and `build_location_scale_wiggle_block`).
2199        meanspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2200        noisespec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2201        let n_rows = meanspec.design.nrows();
2202        let wigglespec = build_location_scale_wiggle_block(
2203            "wiggle",
2204            self.wiggle_block.design.clone(),
2205            self.wiggle_block.offset.clone(),
2206            wiggle_block_penalty_matrices(&self.wiggle_block),
2207            self.wiggle_block.nullspace_dims.clone(),
2208            layout.wiggle_from(theta),
2209            self.wiggle_block.initial_beta.clone(),
2210            n_rows,
2211        )?;
2212        Ok(vec![meanspec, noisespec, wigglespec])
2213    }
2214
2215    fn build_family(
2216        &self,
2217        mean_design: &TermCollectionDesign,
2218        noise_design: &TermCollectionDesign,
2219    ) -> Self::Family {
2220        let preparednoise_design =
2221            prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design).expect(
2222                "prepared Gaussian log-sigma design should match wiggle block construction",
2223            );
2224        GaussianLocationScaleWiggleFamily {
2225            y: self.y.clone(),
2226            weights: self.weights.clone(),
2227            mu_design: Some(mean_design.design.clone()),
2228            log_sigma_design: Some(preparednoise_design),
2229            wiggle_knots: self.wiggle_knots.clone(),
2230            wiggle_degree: self.wiggle_degree,
2231            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2232            cached_row_scalars: std::sync::RwLock::new(None),
2233        }
2234    }
2235
2236    fn extract_primary_betas(
2237        &self,
2238        fit: &UnifiedFitResult,
2239    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2240        let mean_beta = fit
2241            .block_states
2242            .get(GaussianLocationScaleWiggleFamily::BLOCK_MU)
2243            .ok_or_else(|| "missing Gaussian wiggle mu block state".to_string())?
2244            .beta
2245            .clone();
2246        let noise_beta = fit
2247            .block_states
2248            .get(GaussianLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
2249            .ok_or_else(|| "missing Gaussian wiggle log_sigma block state".to_string())?
2250            .beta
2251            .clone();
2252        Ok((mean_beta, noise_beta))
2253    }
2254
2255    fn build_psiderivative_blocks(
2256        &self,
2257        data: ndarray::ArrayView2<'_, f64>,
2258        meanspec_resolved: &TermCollectionSpec,
2259        noisespec_resolved: &TermCollectionSpec,
2260        mean_design: &TermCollectionDesign,
2261        noise_design: &TermCollectionDesign,
2262    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2263        let mean_derivs =
2264            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?.ok_or_else(
2265                || "missing Gaussian wiggle mean spatial psi derivatives".to_string(),
2266            )?;
2267        let noise_derivs =
2268            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2269                .ok_or_else(|| {
2270                    "missing Gaussian wiggle log-sigma spatial psi derivatives".to_string()
2271                })?;
2272        Ok(vec![mean_derivs, noise_derivs, Vec::new()])
2273    }
2274}
2275
2276pub(crate) struct BinomialLocationScaleTermBuilder {
2277    pub(crate) y: Array1<f64>,
2278    pub(crate) weights: Array1<f64>,
2279    pub(crate) link_kind: InverseLink,
2280    pub(crate) meanspec: TermCollectionSpec,
2281    pub(crate) noisespec: TermCollectionSpec,
2282    pub(crate) mean_offset: Array1<f64>,
2283    pub(crate) noise_offset: Array1<f64>,
2284}
2285
2286impl LocationScaleFamilyBuilder for BinomialLocationScaleTermBuilder {
2287    type Family = BinomialLocationScaleFamily;
2288
2289    fn meanspec(&self) -> &TermCollectionSpec {
2290        &self.meanspec
2291    }
2292
2293    fn noisespec(&self) -> &TermCollectionSpec {
2294        &self.noisespec
2295    }
2296
2297    fn exact_spatial_joint_supported(&self) -> bool {
2298        true
2299    }
2300
2301    fn require_exact_spatial_joint(&self) -> bool {
2302        true
2303    }
2304
2305    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2306        noise_design.penalties.len() + 1
2307    }
2308
2309    fn build_blocks(
2310        &self,
2311        theta: &Array1<f64>,
2312        mean_design: &TermCollectionDesign,
2313        noise_design: &TermCollectionDesign,
2314        mean_beta_hint: Option<Array1<f64>>,
2315        noise_beta_hint: Option<Array1<f64>>,
2316    ) -> Result<Vec<ParameterBlockSpec>, String> {
2317        let layout = GamlssLambdaLayout::two_block(
2318            mean_design.penalties.len(),
2319            self.noise_penalty_count(noise_design),
2320        );
2321        layout.validate_theta_len(theta.len(), "binomial location-scale")?;
2322        let (thresholdspec, log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
2323            &self.y,
2324            &self.weights,
2325            &self.link_kind,
2326            mean_design,
2327            noise_design,
2328            &self.mean_offset,
2329            &self.noise_offset,
2330            layout.mean_from(theta),
2331            layout.noise_from(theta),
2332            mean_beta_hint,
2333            noise_beta_hint,
2334            "BinomialLocationScale::build_blocks",
2335        )?;
2336        Ok(vec![thresholdspec, log_sigmaspec])
2337    }
2338
2339    fn build_family(
2340        &self,
2341        mean_design: &TermCollectionDesign,
2342        noise_design: &TermCollectionDesign,
2343    ) -> Self::Family {
2344        let identifiednoise_design =
2345            identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
2346                .expect("identified binomial log-sigma design");
2347        BinomialLocationScaleFamily {
2348            y: self.y.clone(),
2349            weights: self.weights.clone(),
2350            link_kind: self.link_kind.clone(),
2351            threshold_design: Some(mean_design.design.clone()),
2352            log_sigma_design: Some(identifiednoise_design),
2353            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2354        }
2355    }
2356
2357    fn extract_primary_betas(
2358        &self,
2359        fit: &UnifiedFitResult,
2360    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2361        let mean_beta = fit
2362            .block_states
2363            .get(BinomialLocationScaleFamily::BLOCK_T)
2364            .ok_or_else(|| "missing Binomial threshold block state".to_string())?
2365            .beta
2366            .clone();
2367        let noise_beta = fit
2368            .block_states
2369            .get(BinomialLocationScaleFamily::BLOCK_LOG_SIGMA)
2370            .ok_or_else(|| "missing Binomial log_sigma block state".to_string())?
2371            .beta
2372            .clone();
2373        Ok((mean_beta, noise_beta))
2374    }
2375
2376    fn build_psiderivative_blocks(
2377        &self,
2378        data: ndarray::ArrayView2<'_, f64>,
2379        meanspec_resolved: &TermCollectionSpec,
2380        noisespec_resolved: &TermCollectionSpec,
2381        mean_design: &TermCollectionDesign,
2382        noise_design: &TermCollectionDesign,
2383    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2384        let mean_derivs =
2385            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2386                .ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
2387        let noise_derivs =
2388            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2389                .ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
2390        Ok(vec![mean_derivs, noise_derivs])
2391    }
2392}
2393
2394pub(crate) struct BinomialLocationScaleWiggleTermBuilder {
2395    pub(crate) y: Array1<f64>,
2396    pub(crate) weights: Array1<f64>,
2397    pub(crate) link_kind: InverseLink,
2398    pub(crate) meanspec: TermCollectionSpec,
2399    pub(crate) noisespec: TermCollectionSpec,
2400    pub(crate) mean_offset: Array1<f64>,
2401    pub(crate) noise_offset: Array1<f64>,
2402    pub(crate) wiggle_knots: Array1<f64>,
2403    pub(crate) wiggle_degree: usize,
2404    pub(crate) wiggle_block: ParameterBlockInput,
2405}
2406
2407impl LocationScaleFamilyBuilder for BinomialLocationScaleWiggleTermBuilder {
2408    type Family = BinomialLocationScaleWiggleFamily;
2409
2410    fn meanspec(&self) -> &TermCollectionSpec {
2411        &self.meanspec
2412    }
2413
2414    fn noisespec(&self) -> &TermCollectionSpec {
2415        &self.noisespec
2416    }
2417
2418    fn exact_spatial_joint_supported(&self) -> bool {
2419        true
2420    }
2421
2422    fn require_exact_spatial_joint(&self) -> bool {
2423        true
2424    }
2425
2426    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
2427        initial_log_lambdas_orzeros(&self.wiggle_block)
2428    }
2429
2430    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2431        noise_design.penalties.len() + 1
2432    }
2433
2434    fn build_blocks(
2435        &self,
2436        theta: &Array1<f64>,
2437        mean_design: &TermCollectionDesign,
2438        noise_design: &TermCollectionDesign,
2439        mean_beta_hint: Option<Array1<f64>>,
2440        noise_beta_hint: Option<Array1<f64>>,
2441    ) -> Result<Vec<ParameterBlockSpec>, String> {
2442        let layout = GamlssLambdaLayout::withwiggle(
2443            mean_design.penalties.len(),
2444            self.noise_penalty_count(noise_design),
2445            self.wiggle_block.penalties.len(),
2446        );
2447        layout.validate_theta_len(theta.len(), "wiggle location-scale")?;
2448        let (mut thresholdspec, mut log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
2449            &self.y,
2450            &self.weights,
2451            &self.link_kind,
2452            mean_design,
2453            noise_design,
2454            &self.mean_offset,
2455            &self.noise_offset,
2456            layout.mean_from(theta),
2457            layout.noise_from(theta),
2458            mean_beta_hint,
2459            noise_beta_hint,
2460            "BinomialLocationScaleWiggle::build_blocks",
2461        )?;
2462        // The dynamic monotone wiggle basis is regenerated at full raw width
2463        // every inner iteration and asserts `x.ncols() == spec.design.ncols()`
2464        // in `block_geometry`, so it cannot tolerate a canonical-gauge column
2465        // drop. The level/intercept direction the I-spline shares with the
2466        // threshold block must therefore be routed onto the threshold (and the
2467        // log-sigma) block, whose static designs are column-reducible and
2468        // lifted back via the canonical per-block transform `T`. Give both
2469        // non-wiggle blocks a lower gauge priority than the wiggle block (which
2470        // `build_location_scale_wiggle_block` fixes at 100) so the shared-level
2471        // alias drop lands on them and leaves the dynamic wiggle basis full
2472        // width — mirroring the binomial mean-wiggle path.
2473        thresholdspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2474        log_sigmaspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2475        let n_rows = thresholdspec.design.nrows();
2476        let wigglespec = build_location_scale_wiggle_block(
2477            "wiggle",
2478            self.wiggle_block.design.clone(),
2479            self.wiggle_block.offset.clone(),
2480            wiggle_block_penalty_matrices(&self.wiggle_block),
2481            vec![],
2482            layout.wiggle_from(theta),
2483            self.wiggle_block.initial_beta.clone(),
2484            n_rows,
2485        )?;
2486        Ok(vec![thresholdspec, log_sigmaspec, wigglespec])
2487    }
2488
2489    fn build_family(
2490        &self,
2491        mean_design: &TermCollectionDesign,
2492        noise_design: &TermCollectionDesign,
2493    ) -> Self::Family {
2494        let identifiednoise_design =
2495            identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
2496                .expect("identified binomial log-sigma design should match block construction");
2497        BinomialLocationScaleWiggleFamily {
2498            y: self.y.clone(),
2499            weights: self.weights.clone(),
2500            link_kind: self.link_kind.clone(),
2501            threshold_design: Some(mean_design.design.clone()),
2502            log_sigma_design: Some(identifiednoise_design),
2503            wiggle_knots: self.wiggle_knots.clone(),
2504            wiggle_degree: self.wiggle_degree,
2505            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2506        }
2507    }
2508
2509    fn extract_primary_betas(
2510        &self,
2511        fit: &UnifiedFitResult,
2512    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2513        let mean_beta = fit
2514            .block_states
2515            .get(BinomialLocationScaleWiggleFamily::BLOCK_T)
2516            .ok_or_else(|| "missing Binomial wiggle threshold block state".to_string())?
2517            .beta
2518            .clone();
2519        let noise_beta = fit
2520            .block_states
2521            .get(BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
2522            .ok_or_else(|| "missing Binomial wiggle log_sigma block state".to_string())?
2523            .beta
2524            .clone();
2525        Ok((mean_beta, noise_beta))
2526    }
2527
2528    fn build_psiderivative_blocks(
2529        &self,
2530        data: ndarray::ArrayView2<'_, f64>,
2531        meanspec_resolved: &TermCollectionSpec,
2532        noisespec_resolved: &TermCollectionSpec,
2533        mean_design: &TermCollectionDesign,
2534        noise_design: &TermCollectionDesign,
2535    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2536        let mean_derivs =
2537            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2538                .ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
2539        let noise_derivs =
2540            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2541                .ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
2542        // The wiggle block has no direct spatial design matrix of its own in the
2543        // term builder. Spatial psi moves the wiggle family only through the
2544        // realized threshold/log-sigma designs, which in turn perturb q0 and the
2545        // realized wiggle basis B(q0). The exact joint wiggle psi hooks consume
2546        // those threshold/log-sigma derivative payloads and reconstruct the full
2547        // flattened likelihood-side [rho, psi] calculus internally, so the
2548        // wiggle block intentionally contributes no direct CustomFamilyBlockPsiDerivative
2549        // entries here.
2550        Ok(vec![mean_derivs, noise_derivs, Vec::new()])
2551    }
2552}
2553
2554pub(crate) fn fit_gaussian_location_scale_terms(
2555    data: ndarray::ArrayView2<'_, f64>,
2556    spec: GaussianLocationScaleTermSpec,
2557    options: &BlockwiseFitOptions,
2558    kappa_options: &SpatialLengthScaleOptimizationOptions,
2559) -> Result<BlockwiseTermFitResult, String> {
2560    validate_gaussian_location_scale_termspec(data, &spec, "fit_gaussian_location_scale_terms")?;
2561    fit_location_scale_terms(
2562        data,
2563        GaussianLocationScaleTermBuilder {
2564            y: spec.y,
2565            weights: spec.weights,
2566            meanspec: spec.meanspec,
2567            noisespec: spec.log_sigmaspec,
2568            mean_offset: spec.mean_offset,
2569            noise_offset: spec.log_sigma_offset,
2570        },
2571        options,
2572        kappa_options,
2573    )
2574}
2575
2576pub(crate) fn fit_gaussian_location_scalewiggle_terms(
2577    data: ndarray::ArrayView2<'_, f64>,
2578    spec: GaussianLocationScaleWiggleTermSpec,
2579    options: &BlockwiseFitOptions,
2580    kappa_options: &SpatialLengthScaleOptimizationOptions,
2581) -> Result<BlockwiseTermFitResult, String> {
2582    validate_gaussian_location_scalewiggle_termspec(
2583        data,
2584        &spec,
2585        "fit_gaussian_location_scalewiggle_terms",
2586    )?;
2587    fit_location_scale_terms(
2588        data,
2589        GaussianLocationScaleWiggleTermBuilder {
2590            y: spec.y,
2591            weights: spec.weights,
2592            meanspec: spec.meanspec,
2593            noisespec: spec.log_sigmaspec,
2594            mean_offset: spec.mean_offset,
2595            noise_offset: spec.log_sigma_offset,
2596            wiggle_knots: spec.wiggle_knots,
2597            wiggle_degree: spec.wiggle_degree,
2598            wiggle_block: spec.wiggle_block,
2599        },
2600        options,
2601        kappa_options,
2602    )
2603}
2604
2605pub(crate) fn select_gaussian_location_scale_link_wiggle_basis_from_pilot(
2606    pilot: &BlockwiseTermFitResult,
2607    wiggle_cfg: &WiggleBlockConfig,
2608    wiggle_penalty_orders: &[usize],
2609) -> Result<SelectedWiggleBasis, String> {
2610    let q_seed = pilot
2611        .fit
2612        .block_states
2613        .first()
2614        .ok_or_else(|| "pilot Gaussian wiggle fit is missing mean block".to_string())?
2615        .eta
2616        .view();
2617    select_wiggle_basis_from_seed(q_seed, wiggle_cfg, wiggle_penalty_orders)
2618}
2619
2620pub(crate) fn fit_gaussian_location_scale_terms_with_selected_wiggle(
2621    data: ndarray::ArrayView2<'_, f64>,
2622    spec: GaussianLocationScaleTermSpec,
2623    selected_wiggle_basis: SelectedWiggleBasis,
2624    options: &BlockwiseFitOptions,
2625    kappa_options: &SpatialLengthScaleOptimizationOptions,
2626) -> Result<BlockwiseTermWiggleFitResult, String> {
2627    let SelectedWiggleBasis {
2628        knots: wiggle_knots,
2629        degree: wiggle_degree,
2630        block: wiggle_block,
2631        ..
2632    } = selected_wiggle_basis;
2633    let solved = fit_gaussian_location_scalewiggle_terms(
2634        data,
2635        GaussianLocationScaleWiggleTermSpec {
2636            y: spec.y,
2637            weights: spec.weights,
2638            meanspec: spec.meanspec,
2639            log_sigmaspec: spec.log_sigmaspec,
2640            mean_offset: spec.mean_offset,
2641            log_sigma_offset: spec.log_sigma_offset,
2642            wiggle_knots: wiggle_knots.clone(),
2643            wiggle_degree,
2644            wiggle_block,
2645        },
2646        options,
2647        kappa_options,
2648    )?;
2649
2650    BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
2651        fit: solved,
2652        wiggle_knots,
2653        wiggle_degree,
2654    })
2655}
2656
2657pub(crate) fn fit_binomial_location_scale_terms(
2658    data: ndarray::ArrayView2<'_, f64>,
2659    spec: BinomialLocationScaleTermSpec,
2660    options: &BlockwiseFitOptions,
2661    kappa_options: &SpatialLengthScaleOptimizationOptions,
2662) -> Result<BlockwiseTermFitResult, String> {
2663    validate_binomial_location_scale_termspec(data, &spec, "fit_binomial_location_scale_terms")?;
2664    fit_location_scale_terms(
2665        data,
2666        BinomialLocationScaleTermBuilder {
2667            y: spec.y,
2668            weights: spec.weights,
2669            link_kind: spec.link_kind,
2670            meanspec: spec.thresholdspec,
2671            noisespec: spec.log_sigmaspec,
2672            mean_offset: spec.threshold_offset,
2673            noise_offset: spec.log_sigma_offset,
2674        },
2675        options,
2676        kappa_options,
2677    )
2678}
2679
2680pub(crate) fn fit_binomial_location_scalewiggle_terms(
2681    data: ndarray::ArrayView2<'_, f64>,
2682    spec: BinomialLocationScaleWiggleTermSpec,
2683    options: &BlockwiseFitOptions,
2684    kappa_options: &SpatialLengthScaleOptimizationOptions,
2685) -> Result<BlockwiseTermFitResult, String> {
2686    validate_binomial_location_scalewiggle_termspec(
2687        data,
2688        &spec,
2689        "fit_binomial_location_scalewiggle_terms",
2690    )?;
2691    fit_location_scale_terms(
2692        data,
2693        BinomialLocationScaleWiggleTermBuilder {
2694            y: spec.y,
2695            weights: spec.weights,
2696            link_kind: spec.link_kind,
2697            meanspec: spec.thresholdspec,
2698            noisespec: spec.log_sigmaspec,
2699            mean_offset: spec.threshold_offset,
2700            noise_offset: spec.log_sigma_offset,
2701            wiggle_knots: spec.wiggle_knots,
2702            wiggle_degree: spec.wiggle_degree,
2703            wiggle_block: spec.wiggle_block,
2704        },
2705        options,
2706        kappa_options,
2707    )
2708}
2709
2710pub(crate) fn select_binomial_location_scale_link_wiggle_basis_from_pilot(
2711    pilot: &BlockwiseTermFitResult,
2712    wiggle_cfg: &WiggleBlockConfig,
2713    wiggle_penalty_orders: &[usize],
2714) -> Result<SelectedWiggleBasis, String> {
2715    let eta_t = pilot
2716        .fit
2717        .block_states
2718        .first()
2719        .ok_or_else(|| "pilot fit is missing threshold block".to_string())?
2720        .eta
2721        .view();
2722    let eta_ls = pilot
2723        .fit
2724        .block_states
2725        .get(1)
2726        .ok_or_else(|| "pilot fit is missing log_sigma block".to_string())?
2727        .eta
2728        .view();
2729    let sigma = eta_ls.mapv(safe_exp);
2730    let q_seed = Array1::from_iter(eta_t.iter().zip(sigma.iter()).map(|(&t, &s)| -t / s));
2731    select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
2732}
2733
2734pub(crate) fn fit_binomial_location_scale_terms_with_selected_wiggle(
2735    data: ndarray::ArrayView2<'_, f64>,
2736    spec: BinomialLocationScaleTermSpec,
2737    selected_wiggle_basis: SelectedWiggleBasis,
2738    options: &BlockwiseFitOptions,
2739    kappa_options: &SpatialLengthScaleOptimizationOptions,
2740) -> Result<BlockwiseTermWiggleFitResult, String> {
2741    let SelectedWiggleBasis {
2742        knots: wiggle_knots,
2743        degree: wiggle_degree,
2744        block: wiggle_block,
2745        ..
2746    } = selected_wiggle_basis;
2747    let solved = fit_binomial_location_scalewiggle_terms(
2748        data,
2749        BinomialLocationScaleWiggleTermSpec {
2750            y: spec.y,
2751            weights: spec.weights,
2752            link_kind: spec.link_kind,
2753            thresholdspec: spec.thresholdspec,
2754            log_sigmaspec: spec.log_sigmaspec,
2755            threshold_offset: spec.threshold_offset,
2756            log_sigma_offset: spec.log_sigma_offset,
2757            wiggle_knots: wiggle_knots.clone(),
2758            wiggle_degree,
2759            wiggle_block,
2760        },
2761        options,
2762        kappa_options,
2763    )?;
2764
2765    BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
2766        fit: solved,
2767        wiggle_knots,
2768        wiggle_degree,
2769    })
2770}
2771
2772pub(crate) fn select_binomial_mean_link_wiggle_basis_from_pilot(
2773    pilot_design: &TermCollectionDesign,
2774    pilot_fit: &UnifiedFitResult,
2775    wiggle_cfg: &WiggleBlockConfig,
2776    wiggle_penalty_orders: &[usize],
2777) -> Result<SelectedWiggleBasis, String> {
2778    let q_seed = pilot_design.design.dot(&pilot_fit.beta);
2779    select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
2780}
2781
2782pub(crate) fn fit_binomial_mean_wiggle_terms_with_selected_basis(
2783    data: ndarray::ArrayView2<'_, f64>,
2784    pilot_spec: &TermCollectionSpec,
2785    pilot_design: &TermCollectionDesign,
2786    pilot_fit: &UnifiedFitResult,
2787    y: &Array1<f64>,
2788    weights: &Array1<f64>,
2789    link_kind: InverseLink,
2790    selected_wiggle_basis: SelectedWiggleBasis,
2791    options: &BlockwiseFitOptions,
2792    kappa_options: &SpatialLengthScaleOptimizationOptions,
2793) -> Result<BinomialMeanWiggleTermFitResult, String> {
2794    const RHO_BOUND: f64 = 12.0;
2795
2796    validate_term_weights(
2797        data,
2798        y.len(),
2799        weights,
2800        "fit_binomial_mean_wiggle_terms_with_selected_basis",
2801    )?;
2802    validate_binomial_response(y, "fit_binomial_mean_wiggle_terms_with_selected_basis")?;
2803
2804    // Large-n binomial mean-wiggle fits keep the caller's explicit Hessian
2805    // request. The unified evaluator chooses the scalable exact representation
2806    // (dense for small work, operator HVP for large work) instead of routing to
2807    // gradient-only BFGS by observation count.
2808
2809    let SelectedWiggleBasis {
2810        knots: wiggle_knots,
2811        degree: wiggle_degree,
2812        block: wiggle_block,
2813        ..
2814    } = selected_wiggle_basis;
2815
2816    let spatial_terms = spatial_length_scale_term_indices(pilot_spec);
2817    if spatial_terms.is_empty() {
2818        let (fit, saved_warp_beta) = fit_binomial_mean_wiggle(
2819            BinomialMeanWiggleSpec {
2820                y: y.clone(),
2821                weights: weights.clone(),
2822                link_kind,
2823                wiggle_knots: wiggle_knots.clone(),
2824                wiggle_degree,
2825                eta_block: ParameterBlockInput {
2826                    design: pilot_design.design.clone(),
2827                    offset: Array1::zeros(y.len()),
2828                    penalties: pilot_design
2829                        .penalties
2830                        .iter()
2831                        .map(crate::model_types::PenaltySpec::from_blockwise_ref)
2832                        .collect(),
2833                    nullspace_dims: vec![],
2834                    initial_log_lambdas: Some(
2835                        pilot_fit
2836                            .lambdas
2837                            .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
2838                    ),
2839                    initial_beta: Some(pilot_fit.beta.clone()),
2840                },
2841                wiggle_block,
2842            },
2843            options,
2844        )?;
2845        return Ok(BinomialMeanWiggleTermFitResult {
2846            fit,
2847            resolvedspec: pilot_spec.clone(),
2848            design: pilot_design.clone(),
2849            wiggle_knots,
2850            wiggle_degree,
2851            saved_warp_beta,
2852        });
2853    }
2854
2855    let dims_per_term = spatial_dims_per_term(pilot_spec, &spatial_terms);
2856    let log_kappa0 =
2857        SpatialLogKappaCoords::from_length_scales_aniso(pilot_spec, &spatial_terms, kappa_options)
2858            .reseed_from_data(data, pilot_spec, &spatial_terms, kappa_options);
2859    let log_kappa_lower = SpatialLogKappaCoords::lower_bounds_aniso_from_data(
2860        data,
2861        pilot_spec,
2862        &spatial_terms,
2863        &dims_per_term,
2864        kappa_options,
2865    );
2866    let log_kappa_upper = SpatialLogKappaCoords::upper_bounds_aniso_from_data(
2867        data,
2868        pilot_spec,
2869        &spatial_terms,
2870        &dims_per_term,
2871        kappa_options,
2872    );
2873    // Project seed onto bounds; spec.length_scale is a hint, not a constraint.
2874    let log_kappa0 = log_kappa0.clamp_to_bounds(&log_kappa_lower, &log_kappa_upper);
2875
2876    let eta_penalty_count = pilot_design.penalties.len();
2877    let wiggle_penalty_count = initial_log_lambdas_orzeros(&wiggle_block)?.len();
2878    let rho_dim = eta_penalty_count + wiggle_penalty_count;
2879    let baseline_resolvedspec = log_kappa0
2880        .apply_tospec(pilot_spec, &spatial_terms)
2881        .map_err(|e| e.to_string())?;
2882    let baseline_design =
2883        build_term_collection_design(data, &baseline_resolvedspec).map_err(|e| e.to_string())?;
2884    let baseline_fit = fit_binomial_mean_wiggle(
2885        BinomialMeanWiggleSpec {
2886            y: y.clone(),
2887            weights: weights.clone(),
2888            link_kind: link_kind.clone(),
2889            wiggle_knots: wiggle_knots.clone(),
2890            wiggle_degree,
2891            eta_block: ParameterBlockInput {
2892                design: baseline_design.design.clone(),
2893                offset: Array1::zeros(y.len()),
2894                penalties: baseline_design
2895                    .penalties
2896                    .iter()
2897                    .map(crate::model_types::PenaltySpec::from_blockwise_ref)
2898                    .collect(),
2899                nullspace_dims: vec![],
2900                initial_log_lambdas: Some(
2901                    pilot_fit
2902                        .lambdas
2903                        .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
2904                ),
2905                initial_beta: Some(pilot_fit.beta.clone()),
2906            },
2907            wiggle_block: wiggle_block.clone(),
2908        },
2909        options,
2910    )?
2911    .0;
2912    let baseline_log_lambdas = baseline_fit
2913        .lambdas
2914        .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln());
2915    if baseline_log_lambdas.len() != rho_dim {
2916        return Err(GamlssError::DimensionMismatch {
2917            reason: format!(
2918                "baseline binomial mean-wiggle fit returned {} log-lambdas, expected {rho_dim}",
2919                baseline_log_lambdas.len()
2920            ),
2921        }
2922        .into());
2923    }
2924    let baseline_eta_beta = baseline_fit
2925        .block_states
2926        .get(BinomialMeanWiggleFamily::BLOCK_ETA)
2927        .ok_or_else(|| "baseline binomial mean-wiggle fit missing eta block".to_string())?
2928        .beta
2929        .clone();
2930    let baseline_wiggle_beta = Some(
2931        baseline_fit
2932            .block_states
2933            .get(BinomialMeanWiggleFamily::BLOCK_WIGGLE)
2934            .ok_or_else(|| "baseline binomial mean-wiggle fit missing wiggle block".to_string())?
2935            .beta
2936            .clone(),
2937    );
2938    let theta_dim = rho_dim + log_kappa0.len();
2939    let mut theta0 = Array1::<f64>::zeros(theta_dim);
2940    theta0
2941        .slice_mut(s![0..rho_dim])
2942        .assign(&baseline_log_lambdas);
2943    theta0
2944        .slice_mut(s![rho_dim..theta_dim])
2945        .assign(log_kappa0.as_array());
2946
2947    let mut lower = Array1::<f64>::from_elem(theta_dim, -RHO_BOUND);
2948    let mut upper = Array1::<f64>::from_elem(theta_dim, RHO_BOUND);
2949    lower
2950        .slice_mut(s![rho_dim..theta_dim])
2951        .assign(log_kappa_lower.as_array());
2952    upper
2953        .slice_mut(s![rho_dim..theta_dim])
2954        .assign(log_kappa_upper.as_array());
2955
2956    let pilot_spec_cloned = pilot_spec.clone();
2957    let pilot_beta = baseline_eta_beta;
2958    let wiggle_design = wiggle_block.design.clone();
2959    let wiggle_offset = wiggle_block.offset.clone();
2960    let wiggle_penalties = wiggle_block.penalties.clone();
2961    let wiggle_initial_beta = baseline_wiggle_beta;
2962    let wiggle_knots_cloned = wiggle_knots.clone();
2963    let y_cloned = y.clone();
2964    let weights_cloned = weights.clone();
2965    let link_kind_cloned = link_kind.clone();
2966    let outer_family = BinomialMeanWiggleFamily {
2967        y: y_cloned.clone(),
2968        weights: weights_cloned.clone(),
2969        link_kind: link_kind_cloned.clone(),
2970        wiggle_knots: wiggle_knots_cloned.clone(),
2971        wiggle_degree,
2972        policy: gam_runtime::resource::ResourcePolicy::default_library(),
2973        // The spatial joint-κ path keeps the dynamic warp basis (#1596 frozen
2974        // basis applies to the non-spatial `fit_binomial_mean_wiggle` loop).
2975        frozen_warp_design: None,
2976    };
2977    let screening_cap = Arc::new(AtomicUsize::new(0));
2978    let mut outer_options = options.clone();
2979    outer_options.screening_max_inner_iterations = Some(Arc::clone(&screening_cap));
2980    struct MeanWiggleOuterState {
2981        pub(crate) warm_cache: Option<crate::custom_family::CustomFamilyWarmStart>,
2982        pub(crate) last_eval: Option<(
2983            Array1<f64>,
2984            f64,
2985            Array1<f64>,
2986            gam_problem::HessianResult,
2987            crate::custom_family::CustomFamilyWarmStart,
2988        )>,
2989    }
2990
2991    let build_realized_blocks = |theta: &Array1<f64>| -> Result<
2992        (
2993            TermCollectionSpec,
2994            TermCollectionDesign,
2995            Vec<ParameterBlockSpec>,
2996            Vec<CustomFamilyBlockPsiDerivative>,
2997        ),
2998        String,
2999    > {
3000        let log_kappa =
3001            SpatialLogKappaCoords::from_theta_tail_with_dims(theta, rho_dim, dims_per_term.clone());
3002        let resolvedspec = log_kappa
3003            .apply_tospec(&pilot_spec_cloned, &spatial_terms)
3004            .map_err(|e| e.to_string())?;
3005        let design =
3006            build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
3007        let eta_derivs = build_block_spatial_psi_derivatives(data, &resolvedspec, &design)?
3008            .ok_or_else(|| {
3009                "missing eta spatial psi derivatives for binomial mean wiggle".to_string()
3010            })?;
3011        let blocks = vec![
3012            ParameterBlockSpec {
3013                name: "eta".to_string(),
3014                design: design.design.clone(),
3015                offset: Array1::zeros(y_cloned.len()),
3016                penalties: design.penalties_as_penalty_matrix(),
3017                nullspace_dims: vec![],
3018                initial_log_lambdas: theta.slice(s![0..eta_penalty_count]).to_owned(),
3019                initial_beta: Some(pilot_beta.clone()),
3020                // Lower gauge priority on the static eta design: it yields the
3021                // shared level/intercept direction to the dynamic full-width
3022                // wiggle I-spline block (see fit_binomial_mean_wiggle).
3023                gauge_priority: LINK_WIGGLE_GAUGE_PRIORITY,
3024                jacobian_callback: None,
3025                stacked_design: None,
3026                stacked_offset: None,
3027            },
3028            ParameterBlockSpec {
3029                name: "wiggle".to_string(),
3030                design: wiggle_design.clone(),
3031                offset: wiggle_offset.clone(),
3032                penalties: {
3033                    let p_wiggle = wiggle_design.ncols();
3034                    wiggle_penalties
3035                        .iter()
3036                        .map(|spec| match spec {
3037                            crate::model_types::PenaltySpec::Block {
3038                                local, col_range, ..
3039                            } => PenaltyMatrix::Blockwise {
3040                                local: local.clone(),
3041                                col_range: col_range.clone(),
3042                                total_dim: p_wiggle,
3043                            },
3044                            crate::model_types::PenaltySpec::Dense(m)
3045                            | crate::model_types::PenaltySpec::DenseWithMean {
3046                                matrix: m, ..
3047                            } => PenaltyMatrix::Dense(m.clone()),
3048                        })
3049                        .collect()
3050                },
3051                nullspace_dims: vec![],
3052                initial_log_lambdas: theta.slice(s![eta_penalty_count..rho_dim]).to_owned(),
3053                initial_beta: wiggle_initial_beta.clone(),
3054                gauge_priority: DEFAULT_GAUGE_PRIORITY,
3055                jacobian_callback: None,
3056                stacked_design: None,
3057                stacked_offset: None,
3058            },
3059        ];
3060        Ok((resolvedspec, design, blocks, eta_derivs))
3061    };
3062
3063    let build_eval = |theta: &Array1<f64>,
3064                      warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>,
3065                      need_hessian: bool|
3066     -> Result<
3067        (
3068            crate::custom_family::CustomFamilyJointHyperResult,
3069            TermCollectionSpec,
3070            TermCollectionDesign,
3071        ),
3072        String,
3073    > {
3074        let (resolvedspec, design, blocks, eta_derivs) = build_realized_blocks(theta)?;
3075        let eval = evaluate_custom_family_joint_hyper(
3076            &outer_family,
3077            &blocks,
3078            &outer_options,
3079            &theta.slice(s![0..rho_dim]).to_owned(),
3080            &[eta_derivs, Vec::new()],
3081            warm_cache,
3082            if need_hessian {
3083                gam_problem::EvalMode::ValueGradientHessian
3084            } else {
3085                gam_problem::EvalMode::ValueAndGradient
3086            },
3087        )?;
3088        Ok((eval, resolvedspec, design))
3089    };
3090
3091    let build_efs = |theta: &Array1<f64>,
3092                     warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>|
3093     -> Result<crate::custom_family::CustomFamilyJointHyperEfsResult, String> {
3094        let (_, _, blocks, eta_derivs) = build_realized_blocks(theta)?;
3095        evaluate_custom_family_joint_hyper_efs(
3096            &outer_family,
3097            &blocks,
3098            &outer_options,
3099            &theta.slice(s![0..rho_dim]).to_owned(),
3100            &[eta_derivs, Vec::new()],
3101            warm_cache,
3102        )
3103        .map_err(|e| e.to_string())
3104    };
3105
3106    use crate::model_types::EstimationError;
3107    use gam_solve::rho_optimizer::OuterEvalOrder;
3108    use gam_problem::{DeclaredHessianForm, Derivative, OuterEval};
3109
3110    // Exact first-order AND second-order [rho, psi] calculus is available
3111    // for all inverse links via the shared jet formulas plus the generic
3112    // exact-Newton D_βH / D²_βH closures routed through
3113    // evaluate_custom_family_joint_hyper -> joint_outer_evaluate ->
3114    // BorrowedJointDerivProvider. This enables the analytic-Hessian outer
3115    // plan for REML optimization instead of the downgraded gradient-only
3116    // outer strategies.
3117    //
3118    // Spatial log-kappa coordinates are ψ (design-moving) dimensions because
3119    // they rebuild the spatial basis and penalties at each outer proposal.
3120    let analytic_outer_hessian_available = true;
3121    let mut seed_heuristic = theta0.to_vec();
3122    for value in &mut seed_heuristic[..rho_dim] {
3123        *value = value.exp();
3124    }
3125    let problem = gam_solve::rho_optimizer::OuterProblem::new(theta_dim)
3126        .with_gradient(Derivative::Analytic)
3127        .with_hessian(if analytic_outer_hessian_available {
3128            DeclaredHessianForm::Either
3129        } else {
3130            DeclaredHessianForm::Unavailable
3131        })
3132        .with_psi_dim(theta_dim - rho_dim)
3133        .with_tolerance(options.outer_tol)
3134        .with_max_iter(options.outer_max_iter)
3135        .with_bounds(lower.clone(), upper.clone())
3136        .with_initial_rho(theta0.clone())
3137        .with_seed_config(crate::seeding::SeedConfig {
3138            max_seeds: 4,
3139            seed_budget: 2,
3140            risk_profile: crate::seeding::SeedRiskProfile::GeneralizedLinear,
3141            num_auxiliary_trailing: theta_dim - rho_dim,
3142            ..Default::default()
3143        })
3144        .with_screening_cap(Arc::clone(&screening_cap))
3145        .with_rho_bound(12.0)
3146        .with_heuristic_lambdas(seed_heuristic);
3147
3148    let eval_outer = |state: &mut MeanWiggleOuterState,
3149                      theta: &Array1<f64>,
3150                      order: OuterEvalOrder|
3151     -> Result<OuterEval, EstimationError> {
3152        if let Some((cached_theta, cached_cost, cached_grad, cached_hess, cached_warm)) =
3153            &state.last_eval
3154            && cached_theta == theta
3155            && (!matches!(order, OuterEvalOrder::ValueGradientHessian)
3156                || matches!(
3157                    cached_hess,
3158                    gam_problem::HessianResult::Analytic(_)
3159                        | gam_problem::HessianResult::Operator(_)
3160                ))
3161        {
3162            state.warm_cache = Some(cached_warm.clone());
3163            return Ok(OuterEval {
3164                cost: *cached_cost,
3165                gradient: cached_grad.clone(),
3166                hessian: cached_hess.clone(),
3167                inner_beta_hint: None,
3168            });
3169        }
3170        let need_hessian = matches!(order, OuterEvalOrder::ValueGradientHessian)
3171            && analytic_outer_hessian_available;
3172        let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), need_hessian)
3173            .map_err(EstimationError::InvalidInput)?;
3174        if !eval.inner_converged {
3175            state.warm_cache = Some(eval.warm_start);
3176            crate::bail_invalid_estim!(
3177                "binomial mean-wiggle exact spatial inner solve did not converge"
3178            );
3179        }
3180        let hessian_result = eval.outer_hessian.clone();
3181        state.last_eval = Some((
3182            theta.clone(),
3183            eval.objective,
3184            eval.gradient.clone(),
3185            eval.outer_hessian.clone(),
3186            eval.warm_start.clone(),
3187        ));
3188        state.warm_cache = Some(eval.warm_start);
3189        Ok(OuterEval {
3190            cost: eval.objective,
3191            gradient: eval.gradient,
3192            hessian: hessian_result,
3193            inner_beta_hint: None,
3194        })
3195    };
3196
3197    let mut obj = problem.build_objective_with_screening_proxy(
3198        MeanWiggleOuterState {
3199            warm_cache: None,
3200            last_eval: None,
3201        },
3202        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3203            if let Some((cached_theta, cached_cost, _, _, cached_warm)) = &state.last_eval
3204                && cached_theta == theta
3205            {
3206                state.warm_cache = Some(cached_warm.clone());
3207                return Ok(*cached_cost);
3208            }
3209            let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), false)
3210                .map_err(EstimationError::InvalidInput)?;
3211            if !eval.inner_converged {
3212                state.warm_cache = Some(eval.warm_start);
3213                crate::bail_invalid_estim!(
3214                    "binomial mean-wiggle exact spatial cost inner solve did not converge"
3215                        .to_string(),
3216                );
3217            }
3218            state.warm_cache = Some(eval.warm_start);
3219            Ok(eval.objective)
3220        },
3221        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3222            eval_outer(
3223                state,
3224                theta,
3225                if analytic_outer_hessian_available {
3226                    OuterEvalOrder::ValueGradientHessian
3227                } else {
3228                    OuterEvalOrder::ValueAndGradient
3229                },
3230            )
3231        },
3232        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>, order: OuterEvalOrder| {
3233            eval_outer(state, theta, order)
3234        },
3235        Some(|state: &mut MeanWiggleOuterState| {
3236            state.warm_cache = None;
3237            state.last_eval = None;
3238        }),
3239        Some(|state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3240            let eval = build_efs(theta, state.warm_cache.as_ref())
3241                .map_err(EstimationError::InvalidInput)?;
3242            if !eval.inner_converged {
3243                state.warm_cache = Some(eval.warm_start);
3244                crate::bail_invalid_estim!(
3245                    "binomial mean-wiggle exact spatial EFS inner solve did not converge"
3246                        .to_string(),
3247                );
3248            }
3249            state.warm_cache = Some(eval.warm_start);
3250            Ok(eval.efs_eval)
3251        }),
3252        // Seed-screening ranking proxy (#969). The cost closure above
3253        // hard-errors on a non-converged inner solve — correct for
3254        // line-search costs, but under the screening cap (wired into the
3255        // outer options and installed by the cascade) the inner solve is
3256        // truncated BY DESIGN, so screening through it rejects every seed
3257        // — the all-seeds-rejected front-door genus. Screening only RANKS
3258        // candidates: the truncated solve's penalized objective is the
3259        // ranking signal; convergence is demanded of the selected seed's
3260        // full-budget fit, not of capped probes.
3261        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3262            if let Some((cached_theta, cached_cost, _, _, cached_warm)) = &state.last_eval
3263                && cached_theta == theta
3264            {
3265                state.warm_cache = Some(cached_warm.clone());
3266                return Ok(*cached_cost);
3267            }
3268            let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), false)
3269                .map_err(EstimationError::InvalidInput)?;
3270            state.warm_cache = Some(eval.warm_start);
3271            Ok(eval.objective)
3272        },
3273    );
3274
3275    let outer = problem
3276        .run(&mut obj, "binomial mean wiggle exact spatial hyper")
3277        .map_err(|e| e.to_string())?;
3278    if !outer.converged {
3279        return Err(GamlssError::NumericalFailure { reason: format!(
3280            "binomial mean wiggle exact spatial hyper did not converge after {} iterations (final_objective={:.6e}, final_grad_norm={})",
3281            outer.iterations,
3282            outer.final_value,
3283            outer.final_grad_norm_report(),
3284        ) }.into());
3285    }
3286    let theta_star = outer.rho;
3287
3288    let log_kappa =
3289        SpatialLogKappaCoords::from_theta_tail_with_dims(&theta_star, rho_dim, dims_per_term);
3290    let resolvedspec = log_kappa
3291        .apply_tospec(&pilot_spec_cloned, &spatial_terms)
3292        .map_err(|e| e.to_string())?;
3293    let design = build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
3294    let resolvedspec =
3295        freeze_term_collection_from_design(&resolvedspec, &design).map_err(|e| e.to_string())?;
3296    let fit = fit_binomial_mean_wiggle(
3297        BinomialMeanWiggleSpec {
3298            y: y_cloned,
3299            weights: weights_cloned,
3300            link_kind: link_kind_cloned,
3301            wiggle_knots: wiggle_knots.clone(),
3302            wiggle_degree,
3303            eta_block: ParameterBlockInput {
3304                design: design.design.clone(),
3305                offset: Array1::zeros(y.len()),
3306                penalties: design
3307                    .penalties
3308                    .iter()
3309                    .map(crate::model_types::PenaltySpec::from_blockwise_ref)
3310                    .collect(),
3311                nullspace_dims: vec![],
3312                initial_log_lambdas: Some(theta_star.slice(s![0..eta_penalty_count]).to_owned()),
3313                initial_beta: Some(pilot_beta),
3314            },
3315            wiggle_block: ParameterBlockInput {
3316                design: wiggle_design,
3317                offset: wiggle_offset,
3318                penalties: wiggle_penalties,
3319                nullspace_dims: vec![],
3320                initial_log_lambdas: Some(
3321                    theta_star.slice(s![eta_penalty_count..rho_dim]).to_owned(),
3322                ),
3323                initial_beta: wiggle_initial_beta,
3324            },
3325        },
3326        options,
3327    )?;
3328    let (fit, saved_warp_beta) = fit;
3329
3330    Ok(BinomialMeanWiggleTermFitResult {
3331        fit,
3332        resolvedspec,
3333        design,
3334        wiggle_knots,
3335        wiggle_degree,
3336        saved_warp_beta,
3337    })
3338}