Skip to main content

gam_models/gamlss/
builders.rs

1// Real concern-organized submodule of the gamlss family stack.
2// Cross-module items are re-exported flat through the parent (`gamlss.rs`),
3// so `use super::*;` makes the sibling-concern symbols this module references
4// resolve through the parent namespace.
5use super::*;
6
7#[derive(Clone, Copy)]
8pub(crate) struct GamlssLambdaLayout {
9    pub(crate) k_mean: usize,
10    pub(crate) k_noise: usize,
11    pub(crate) kwiggle: usize,
12}
13
14impl GamlssLambdaLayout {
15    pub(crate) fn two_block(k_mean: usize, k_noise: usize) -> Self {
16        Self {
17            k_mean,
18            k_noise,
19            kwiggle: 0,
20        }
21    }
22
23    pub(crate) fn withwiggle(k_mean: usize, k_noise: usize, kwiggle: usize) -> Self {
24        Self {
25            k_mean,
26            k_noise,
27            kwiggle,
28        }
29    }
30
31    pub(crate) fn total(self) -> usize {
32        self.k_mean + self.k_noise + self.kwiggle
33    }
34
35    pub(crate) fn noise_start(self) -> usize {
36        self.k_mean
37    }
38
39    pub(crate) fn noise_end(self) -> usize {
40        self.k_mean + self.k_noise
41    }
42
43    pub(crate) fn wiggle_start(self) -> usize {
44        self.k_mean + self.k_noise
45    }
46
47    pub(crate) fn wiggle_end(self) -> usize {
48        self.k_mean + self.k_noise + self.kwiggle
49    }
50
51    pub(crate) fn validate_theta_len(self, theta_len: usize, context: &str) -> Result<(), String> {
52        let needed = self.total();
53        if theta_len < needed {
54            return Err(GamlssError::DimensionMismatch {
55                reason: format!(
56                    "{context} theta too short: got {}, need at least {}",
57                    theta_len, needed
58                ),
59            }
60            .into());
61        }
62        Ok(())
63    }
64
65    pub(crate) fn mean_from(self, theta: &Array1<f64>) -> Array1<f64> {
66        theta.slice(s![0..self.k_mean]).to_owned()
67    }
68
69    pub(crate) fn noise_from(self, theta: &Array1<f64>) -> Array1<f64> {
70        theta
71            .slice(s![self.noise_start()..self.noise_end()])
72            .to_owned()
73    }
74
75    pub(crate) fn wiggle_from(self, theta: &Array1<f64>) -> Array1<f64> {
76        theta
77            .slice(s![self.wiggle_start()..self.wiggle_end()])
78            .to_owned()
79    }
80}
81
82#[derive(Clone, Copy)]
83pub(crate) struct GamlssBetaLayout {
84    pub(crate) pt: usize,
85    pub(crate) pls: usize,
86    pub(crate) pw: usize,
87}
88
89impl GamlssBetaLayout {
90    pub(crate) fn withwiggle(pt: usize, pls: usize, pw: usize) -> Self {
91        Self { pt, pls, pw }
92    }
93
94    pub(crate) fn total(self) -> usize {
95        self.pt + self.pls + self.pw
96    }
97
98    pub(crate) fn split_three(
99        self,
100        flat: &Array1<f64>,
101        context: &str,
102    ) -> Result<(Array1<f64>, Array1<f64>, Array1<f64>), String> {
103        if flat.len() != self.total() {
104            return Err(GamlssError::DimensionMismatch {
105                reason: format!(
106                    "{context} length mismatch: got {}, expected {}",
107                    flat.len(),
108                    self.total()
109                ),
110            }
111            .into());
112        }
113        Ok((
114            flat.slice(s![0..self.pt]).to_owned(),
115            flat.slice(s![self.pt..self.pt + self.pls]).to_owned(),
116            flat.slice(s![self.pt + self.pls..self.total()]).to_owned(),
117        ))
118    }
119}
120
121#[derive(Clone, Debug)]
122pub struct FamilyMetadata {
123    pub name: &'static str,
124    pub parameternames: &'static [&'static str],
125    pub parameter_links: &'static [ParameterLink],
126}
127
128pub(crate) const DEFAULT_GAUGE_PRIORITY: u8 = 100;
129
130pub(crate) const LINK_WIGGLE_GAUGE_PRIORITY: u8 = 80;
131
132pub(crate) fn initial_log_lambdas_orzeros(
133    block: &ParameterBlockInput,
134) -> Result<Array1<f64>, String> {
135    let k = block.penalties.len();
136    let lambdas = block
137        .initial_log_lambdas
138        .clone()
139        .unwrap_or_else(|| Array1::<f64>::zeros(k));
140    if lambdas.len() != k {
141        return Err(GamlssError::DimensionMismatch {
142            reason: format!(
143                "initial_log_lambdas length mismatch: got {}, expected {}",
144                lambdas.len(),
145                k
146            ),
147        }
148        .into());
149    }
150    Ok(lambdas)
151}
152
153pub(crate) fn build_two_block_exact_joint_setup(
154    data: ArrayView2<'_, f64>,
155    meanspec: &TermCollectionSpec,
156    noisespec: &TermCollectionSpec,
157    mean_penalties: usize,
158    noise_penalties: usize,
159    extra_rho0: &[f64],
160    rho0_override: Option<&Array1<f64>>,
161    kappa_options: &SpatialLengthScaleOptimizationOptions,
162) -> ExactJointHyperSetup {
163    // GAMLSS-specific part: assemble the rho seed in [mean | noise | extra]
164    // penalty order, honoring a caller override when it matches the layout.
165    let rho_dim = mean_penalties + noise_penalties + extra_rho0.len();
166    let mut rho0vec = Array1::<f64>::zeros(rho_dim);
167    if let Some(rho0) = rho0_override.filter(|rho0| rho0.len() == rho_dim) {
168        rho0vec.assign(rho0);
169    } else {
170        for (i, &rho_init) in extra_rho0.iter().enumerate() {
171            rho0vec[mean_penalties + noise_penalties + i] = rho_init;
172        }
173    }
174
175    // Generic part: per-block log(kappa) seed/bounds and exact-joint assembly,
176    // with the two linear predictors (mean, noise) in theta order.
177    build_location_scale_exact_joint_setup(data, &[meanspec, noisespec], rho0vec, kappa_options)
178}
179
180pub(crate) fn gaussian_location_scalewarm_start(
181    y: &Array1<f64>,
182    weights: &Array1<f64>,
183    mu_block: &ParameterBlockSpec,
184    log_sigma_block: &ParameterBlockSpec,
185    ridge_floor: f64,
186    mean_beta_hint: Option<&Array1<f64>>,
187    noise_beta_hint: Option<&Array1<f64>>,
188) -> Result<(Array1<f64>, Array1<f64>, f64), String> {
189    let betamu = if let Some(beta) = mean_beta_hint {
190        beta.clone()
191    } else {
192        solve_penalizedweighted_projection(
193            &mu_block.design,
194            &mu_block.offset,
195            y,
196            weights,
197            &mu_block.penalties,
198            &mu_block.initial_log_lambdas,
199            ridge_floor,
200        )?
201    };
202    let mut mu_hat = mu_block.solver_design().matrixvectormultiply(&betamu);
203    mu_hat += mu_block.solver_offset();
204    let mut weighted_ss = 0.0;
205    let mut weight_sum = 0.0;
206    for i in 0..y.len() {
207        let wi = weights[i].max(0.0);
208        let resid = y[i] - mu_hat[i];
209        weighted_ss += wi * resid * resid;
210        weight_sum += wi;
211    }
212    if !weighted_ss.is_finite() || !weight_sum.is_finite() || weight_sum <= 0.0 {
213        return Err(
214            "gaussian location-scale warm start could not estimate residual scale".to_string(),
215        );
216    }
217    // Warm-start σ̂ must clear the logb floor so the inverse link
218    //   η = log(σ − b)
219    // is finite. Use a relative cushion above b so the warm-start is in the
220    // smooth interior of the link domain.
221    let sigma_hat = (weighted_ss / weight_sum)
222        .sqrt()
223        .max(LOGB_SIGMA_FLOOR * 1.5);
224    let beta_log_sigma = if let Some(beta) = noise_beta_hint {
225        beta.clone()
226    } else {
227        let eta_sigma = (sigma_hat - LOGB_SIGMA_FLOOR).ln();
228        let sigma_target = Array1::from_elem(y.len(), eta_sigma);
229        solve_penalizedweighted_projection(
230            &log_sigma_block.design,
231            &log_sigma_block.offset,
232            &sigma_target,
233            weights,
234            &log_sigma_block.penalties,
235            &log_sigma_block.initial_log_lambdas,
236            ridge_floor,
237        )?
238    };
239    Ok((betamu, beta_log_sigma, sigma_hat))
240}
241
242/// Total output count for every two-block location-scale family in this
243/// module (mu/log_sigma or threshold/log_sigma). The wiggle variants add a
244/// third zero-channel block but still drive only two output channels.
245pub(crate) const LOCATION_SCALE_N_OUTPUTS: usize = 2;
246
247/// Construct a fully wired location-scale parameter block.
248///
249/// This is the **only** way to build a LocationScale `ParameterBlockSpec` in
250/// this module — by construction the `AdditiveBlockJacobian` callback is
251/// always installed, so the channel-aware identifiability audit cannot be
252/// silently bypassed by a future `build_blocks` impl that forgets to wire
253/// the callback at the tail (re-introducing #319).
254///
255/// `own_output` is the zero-based output channel this block drives
256/// (e.g. 0 for `mu`/`threshold`, 1 for `log_sigma`). `n_family_outputs` is
257/// fixed at [`LOCATION_SCALE_N_OUTPUTS`] for every two-block family here
258/// but is exposed so the helper composes cleanly with any future
259/// k-block extension.
260pub(crate) fn build_location_scale_block(
261    name: impl Into<String>,
262    design: DesignMatrix,
263    offset: Array1<f64>,
264    penalties: Vec<PenaltyMatrix>,
265    nullspace_dims: Vec<usize>,
266    initial_log_lambdas: Array1<f64>,
267    initial_beta: Option<Array1<f64>>,
268    own_output: usize,
269    n_family_outputs: usize,
270    caller: &str,
271) -> Result<ParameterBlockSpec, String> {
272    if own_output >= n_family_outputs {
273        return Err(format!(
274            "{caller}: own_output={own_output} >= n_family_outputs={n_family_outputs}"
275        ));
276    }
277    let mut spec = ParameterBlockSpec {
278        name: name.into(),
279        design,
280        offset,
281        penalties,
282        nullspace_dims,
283        initial_log_lambdas,
284        initial_beta,
285        gauge_priority: 100,
286        jacobian_callback: None,
287        stacked_design: None,
288        stacked_offset: None,
289    };
290    let dense = spec.effective_design(caller)?;
291    spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
292        design: dense,
293        own_output,
294        n_family_outputs,
295    }));
296    Ok(spec)
297}
298
299/// Construct the wiggle block that accompanies a two-block location-scale
300/// family. The wiggle modulates the inverse link nonlinearly and
301/// contributes no linear effective Jacobian — the installed callback
302/// therefore exposes a zero `(n × p_w)` design under
303/// `n_family_outputs = LOCATION_SCALE_N_OUTPUTS`.
304pub(crate) fn build_location_scale_wiggle_block(
305    name: impl Into<String>,
306    design: DesignMatrix,
307    offset: Array1<f64>,
308    penalties: Vec<PenaltyMatrix>,
309    nullspace_dims: Vec<usize>,
310    initial_log_lambdas: Array1<f64>,
311    initial_beta: Option<Array1<f64>>,
312    n_rows: usize,
313) -> Result<ParameterBlockSpec, String> {
314    let p_w = design.ncols();
315    let mut spec = ParameterBlockSpec {
316        name: name.into(),
317        design,
318        offset,
319        penalties,
320        nullspace_dims,
321        initial_log_lambdas,
322        initial_beta,
323        gauge_priority: 100,
324        jacobian_callback: None,
325        stacked_design: None,
326        stacked_offset: None,
327    };
328    spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
329        design: ndarray::Array2::<f64>::zeros((n_rows, p_w)),
330        own_output: 0,
331        n_family_outputs: LOCATION_SCALE_N_OUTPUTS,
332    }));
333    Ok(spec)
334}
335
336pub(crate) fn prepared_gaussian_log_sigma_design(
337    mu_design: &DesignMatrix,
338    log_sigma_design: &DesignMatrix,
339) -> Result<DesignMatrix, String> {
340    if mu_design.nrows() != log_sigma_design.nrows() {
341        return Err(GamlssError::DimensionMismatch {
342            reason: format!(
343                "gaussian log-sigma design row mismatch: mean rows={}, log_sigma rows={}",
344                mu_design.nrows(),
345                log_sigma_design.nrows()
346            ),
347        }
348        .into());
349    }
350    // Gaussian location-scale remains identifiable even when μ and log σ use
351    // the same covariate basis:
352    //
353    //   L(μ, η) = 0.5 * Σ_i [ (y_i - μ_i)^2 exp(-2η_i) + 2η_i ],
354    //   μ = X_μ β_μ,  η = X_σ β_σ.
355    //
356    // Shared columns are not a frame mismatch. β_μ and β_σ enter through
357    // different sufficient statistics (residual and residual²), so replacing
358    // X_σ with (I - P_{X_μ}) X_σ would impose an extra constraint and can
359    // erase real heteroscedastic signal when the two blocks share a basis.
360    Ok(log_sigma_design.clone())
361}
362
363pub(crate) fn identified_binomial_log_sigma_design(
364    threshold_design: &TermCollectionDesign,
365    log_sigma_design: &TermCollectionDesign,
366    weights: &Array1<f64>,
367) -> Result<DesignMatrix, String> {
368    let non_intercept_start = log_sigma_design
369        .intercept_range
370        .end
371        .min(log_sigma_design.design.ncols());
372    let transform = build_scale_deviation_transform_design(
373        &threshold_design.design,
374        &log_sigma_design.design,
375        weights,
376        non_intercept_start,
377    )?;
378    build_scale_deviation_operator(
379        threshold_design.design.clone(),
380        log_sigma_design.design.clone(),
381        &transform,
382    )
383}
384
385pub(crate) fn identity_penalty(dim: usize) -> Array2<f64> {
386    let mut penalty = Array2::<f64>::zeros((dim, dim));
387    for i in 0..dim {
388        penalty[[i, i]] = 1.0;
389    }
390    penalty
391}
392
393/// Orthogonal projector `P₀ = U₀U₀ᵀ` onto the joint null space of the supplied
394/// penalty blocks over a `dim`-column coefficient space.
395///
396/// Used as the log-σ *shrinkage* penalty for the Gaussian location-scale scale
397/// block. The smooth's own wiggliness penalty already governs its range space
398/// (the curvature directions REML trades off against fit); its null space —
399/// the constant + low-order polynomial log-σ trend that carries the dominant
400/// heteroscedastic signal — is left unpenalized and is only weakly identified
401/// in the coupled (μ, log σ) likelihood, which lets the inner Newton wander
402/// (#1073's "flat/ill-conditioned surface"). A *full-space* identity ridge
403/// fixed that instability but DOUBLE-penalized the range space: REML then drove
404/// the shrinkage λ up, crushing the genuine heteroscedastic curve back to a
405/// constant σ (the underfit this issue reports). Penalizing the null space
406/// ALONE keeps the weakly-identified polynomial trend from blowing up without
407/// touching the wiggliness directions the smooth penalty already controls —
408/// exactly mgcv's `select = TRUE` null-space penalty.
409///
410/// When the supplied penalties already span the whole space (null space empty),
411/// the projector is the zero matrix and the shrinkage term is inert; when there
412/// are no penalties at all (e.g. a purely parametric log-σ design), the null
413/// space is everything and this returns the identity — recovering the previous
414/// full-space ridge exactly where it was the right thing to do.
415pub(crate) fn penalty_nullspace_projector(penalties: &[PenaltyMatrix], dim: usize) -> Array2<f64> {
416    use gam_linalg::faer_ndarray::FaerEigh;
417    use faer::Side;
418
419    if dim == 0 {
420        return Array2::<f64>::zeros((0, 0));
421    }
422    // Combined penalty S = Σ_k S_k over the dim-column scale space. Each block
423    // penalty is already expressed on this space (the scale design's columns).
424    let mut combined = Array2::<f64>::zeros((dim, dim));
425    for pen in penalties {
426        let dense = pen.to_dense();
427        assert_eq!(
428            dense.nrows(),
429            dim,
430            "scale penalty block dim {} != scale design cols {dim}",
431            dense.nrows()
432        );
433        if dense.nrows() == dim && dense.ncols() == dim {
434            combined += &dense;
435        }
436    }
437    // Symmetrize defensively (eigendecomposition assumes self-adjoint input).
438    let combined_sym = 0.5 * (&combined + &combined.t());
439    let (eigvals, eigvecs) = match combined_sym.eigh(Side::Lower) {
440        Ok(decomp) => decomp,
441        // A failed decomposition (degenerate / non-finite) should not silently
442        // drop the stabilizing shrinkage; fall back to the full-space ridge,
443        // which is the conservative (always-positive-definite) choice.
444        Err(_) => return identity_penalty(dim),
445    };
446    // Null space = eigenvectors whose eigenvalue is ≈ 0 relative to the largest.
447    // The combined wiggliness penalty's range-space eigenvalues are O(1) after
448    // basis normalization, so a relative floor cleanly separates the genuine
449    // null directions (constant / low-order polynomial) from the penalized
450    // curvature directions.
451    let max_eig = eigvals.iter().cloned().fold(0.0_f64, f64::max);
452    let tol = (max_eig * 1e-8).max(1e-12);
453    let mut projector = Array2::<f64>::zeros((dim, dim));
454    for (j, &lambda) in eigvals.iter().enumerate() {
455        if lambda <= tol {
456            let v = eigvecs.column(j);
457            // Accumulate v vᵀ into the projector.
458            for a in 0..dim {
459                let va = v[a];
460                if va == 0.0 {
461                    continue;
462                }
463                for b in 0..dim {
464                    projector[[a, b]] += va * v[b];
465                }
466            }
467        }
468    }
469    projector
470}
471
472pub(crate) fn append_binomial_log_sigma_shrinkage_penalty_design(
473    design: &mut TermCollectionDesign,
474) {
475    let p = design.design.ncols();
476    design
477        .penalties
478        .push(BlockwisePenalty::new(0..p, identity_penalty(p)));
479    // Identity penalty penalizes the full space → nullspace dimension is 0.
480    design.nullspace_dims.push(0);
481    design.penaltyinfo.push(PenaltyBlockInfo {
482        global_index: design.penaltyinfo.len(),
483        termname: Some("log_sigma_shrinkage".to_string()),
484        penalty: PenaltyInfo {
485            source: PenaltySource::Other("shrinkage".to_string()),
486            original_index: 0,
487            active: true,
488            effective_rank: p,
489            dropped_reason: None,
490            nullspace_dim_hint: 0,
491            normalization_scale: 1.0,
492            kronecker_factors: None,
493        },
494    });
495}
496
497/// Build the (mean, log-σ) parameter-block pair for a Gaussian location-scale
498/// family. Shared verbatim by the non-wiggle and wiggle Gaussian builders so the
499/// scale-block construction — prepared log-σ design, the REML-selected full-span
500/// shrinkage penalty on the scale nullspace, and the joint Gaussian warm start —
501/// lives in exactly one place. Callers supply the per-block log-λ vectors sliced
502/// from their own layout (two-block vs with-wiggle) and append any extra blocks.
503pub(crate) fn build_gaussian_mean_and_scale_blocks(
504    y: &Array1<f64>,
505    weights: &Array1<f64>,
506    mean_design: &TermCollectionDesign,
507    noise_design: &TermCollectionDesign,
508    mean_offset: &Array1<f64>,
509    noise_offset: &Array1<f64>,
510    mean_log_lambdas: Array1<f64>,
511    noise_log_lambdas: Array1<f64>,
512    mean_beta_hint: Option<Array1<f64>>,
513    noise_beta_hint: Option<Array1<f64>>,
514    context: &str,
515) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
516    let mut meanspec = build_location_scale_block(
517        "mu",
518        mean_design.design.clone(),
519        mean_offset.clone(),
520        mean_design.penalties_as_penalty_matrix(),
521        mean_design.nullspace_dims.clone(),
522        mean_log_lambdas,
523        mean_beta_hint,
524        0,
525        LOCATION_SCALE_N_OUTPUTS,
526        &format!("{context}: mu"),
527    )?;
528    let prepared_noise_design =
529        prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)?;
530    let p_noise = prepared_noise_design.ncols();
531    let mut log_sigma_penalty_matrices = noise_design.penalties_as_penalty_matrix();
532    // Shrinkage penalty on the scale block's *null space only* (mgcv
533    // `select = TRUE`): it stabilizes the weakly-identified constant/polynomial
534    // log-σ trend without double-penalizing the wiggliness directions the
535    // smooth penalty already governs. A full-space identity here over-shrinks
536    // the genuine heteroscedastic curve back to a constant σ (#1073).
537    let shrinkage = penalty_nullspace_projector(&log_sigma_penalty_matrices, p_noise);
538    // The rank of an orthogonal projector equals its trace (P = P² for a projector,
539    // so trace(P) = trace(P²) = ||P||_F² = sum of squared singular values = rank).
540    // The diagonal-threshold test `diag[i] > 0.5` used previously was wrong: for a
541    // rank-d projector onto a low-dimensional subspace (e.g. d=2 null directions of
542    // a TP spline with p=10 columns), each diagonal entry is O(d/p) << 0.5, so the
543    // threshold always returned 0 — misreporting the shrinkage penalty as having
544    // zero penalized dimensions. Trace-based rank is exact for a symmetric
545    // idempotent matrix (rounded to the nearest integer to absorb floating-point
546    // rounding in the eigendecomposition).
547    let shrinkage_rank = (0..p_noise).map(|i| shrinkage[[i, i]]).sum::<f64>().round() as usize;
548    log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(shrinkage));
549    let mut log_sigma_nullspace_dims = noise_design.nullspace_dims.clone();
550    // The null-space projector penalizes a rank-`shrinkage_rank` subspace, so
551    // the remaining unpenalized directions number `p_noise − shrinkage_rank`.
552    log_sigma_nullspace_dims.push(p_noise.saturating_sub(shrinkage_rank));
553    let mut noisespec = build_location_scale_block(
554        "log_sigma",
555        prepared_noise_design,
556        noise_offset.clone(),
557        log_sigma_penalty_matrices,
558        log_sigma_nullspace_dims,
559        noise_log_lambdas,
560        noise_beta_hint,
561        1,
562        LOCATION_SCALE_N_OUTPUTS,
563        &format!("{context}: log_sigma"),
564    )?;
565    if meanspec.initial_beta.is_none() || noisespec.initial_beta.is_none() {
566        let (betamu0, beta_ls0, _) = gaussian_location_scalewarm_start(
567            y,
568            weights,
569            &meanspec,
570            &noisespec,
571            1e-10,
572            meanspec.initial_beta.as_ref(),
573            noisespec.initial_beta.as_ref(),
574        )?;
575        if meanspec.initial_beta.is_none() {
576            meanspec.initial_beta = Some(betamu0);
577        }
578        if noisespec.initial_beta.is_none() {
579            noisespec.initial_beta = Some(beta_ls0);
580        }
581    }
582    Ok((meanspec, noisespec))
583}
584
585/// Build the (threshold, log-σ) parameter-block pair for a Binomial
586/// location-scale family. Shared by the non-wiggle and wiggle Binomial builders;
587/// mirrors [`build_gaussian_mean_and_scale_blocks`] but with the binomial-
588/// identified log-σ design, the link-aware joint warm start, and the same
589/// REML-selected full-span scale shrinkage penalty.
590pub(crate) fn build_binomial_threshold_and_scale_blocks(
591    y: &Array1<f64>,
592    weights: &Array1<f64>,
593    link_kind: &InverseLink,
594    mean_design: &TermCollectionDesign,
595    noise_design: &TermCollectionDesign,
596    mean_offset: &Array1<f64>,
597    noise_offset: &Array1<f64>,
598    mean_log_lambdas: Array1<f64>,
599    noise_log_lambdas: Array1<f64>,
600    mean_beta_hint: Option<Array1<f64>>,
601    noise_beta_hint: Option<Array1<f64>>,
602    context: &str,
603) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
604    let identifiednoise_design =
605        identified_binomial_log_sigma_design(mean_design, noise_design, weights)?;
606    let p_noise = identifiednoise_design.ncols();
607    let mut log_sigma_penalty_matrices: Vec<PenaltyMatrix> =
608        noise_design.penalties_as_penalty_matrix();
609    log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(identity_penalty(p_noise)));
610    let mut thresholdspec = build_location_scale_block(
611        "threshold",
612        mean_design.design.clone(),
613        mean_offset.clone(),
614        mean_design.penalties_as_penalty_matrix(),
615        vec![],
616        mean_log_lambdas,
617        mean_beta_hint,
618        0,
619        LOCATION_SCALE_N_OUTPUTS,
620        &format!("{context}: threshold"),
621    )?;
622    let mut log_sigmaspec = build_location_scale_block(
623        "log_sigma",
624        identifiednoise_design,
625        noise_offset.clone(),
626        log_sigma_penalty_matrices,
627        vec![],
628        noise_log_lambdas,
629        noise_beta_hint,
630        1,
631        LOCATION_SCALE_N_OUTPUTS,
632        &format!("{context}: log_sigma"),
633    )?;
634    if thresholdspec.initial_beta.is_none() || log_sigmaspec.initial_beta.is_none() {
635        let (beta_t0, beta_ls0) = binomial_location_scalewarm_start(
636            y,
637            weights,
638            link_kind,
639            &thresholdspec,
640            &log_sigmaspec,
641            thresholdspec.initial_beta.as_ref(),
642            log_sigmaspec.initial_beta.as_ref(),
643        )?;
644        if thresholdspec.initial_beta.is_none() {
645            thresholdspec.initial_beta = Some(beta_t0);
646        }
647        if log_sigmaspec.initial_beta.is_none() {
648            log_sigmaspec.initial_beta = Some(beta_ls0);
649        }
650    }
651    Ok((thresholdspec, log_sigmaspec))
652}
653
654/// Convert a wiggle block's `PenaltySpec`s into the `PenaltyMatrix` list the
655/// location-scale wiggle block expects. Shared by the Gaussian and Binomial
656/// wiggle builders, which previously inlined the identical match.
657pub(crate) fn wiggle_block_penalty_matrices(
658    wiggle_block: &ParameterBlockInput,
659) -> Vec<PenaltyMatrix> {
660    let p_wiggle = wiggle_block.design.ncols();
661    wiggle_block
662        .penalties
663        .iter()
664        .map(|spec| match spec {
665            crate::model_types::PenaltySpec::Block {
666                local, col_range, ..
667            } => PenaltyMatrix::Blockwise {
668                local: local.clone(),
669                col_range: col_range.clone(),
670                total_dim: p_wiggle,
671            },
672            crate::model_types::PenaltySpec::Dense(m)
673            | crate::model_types::PenaltySpec::DenseWithMean { matrix: m, .. } => {
674                PenaltyMatrix::Dense(m.clone())
675            }
676        })
677        .collect()
678}
679
680pub(crate) fn binomial_location_scale_link_eta_from_probability(
681    link_kind: &InverseLink,
682    probability: f64,
683) -> Result<f64, String> {
684    let target = probability.clamp(1e-6, 1.0 - 1e-6);
685    match link_kind {
686        InverseLink::Standard(StandardLink::Logit) => Ok((target / (1.0 - target)).ln()),
687        InverseLink::Standard(StandardLink::Probit) => standard_normal_quantile(target)
688            .map_err(|err| format!("failed to invert probit warm-start probability: {err}")),
689        InverseLink::Standard(StandardLink::CLogLog) => Ok((-((1.0 - target).ln())).ln()),
690        other => Err(GamlssError::UnsupportedConfiguration { reason: format!(
691            "binomial location-scale warm start requires logit, probit, or cloglog link, got {other:?}"
692        ) }.into()),
693    }
694}
695
696pub(crate) fn weighted_binomial_prevalence(
697    y: &Array1<f64>,
698    weights: &Array1<f64>,
699) -> Result<f64, String> {
700    if y.len() != weights.len() {
701        return Err(GamlssError::DimensionMismatch { reason: format!(
702            "binomial location-scale warm start dimension mismatch: y has length {}, weights have length {}",
703            y.len(),
704            weights.len()
705        ) }.into());
706    }
707    let mut weight_sum = 0.0;
708    let mut success_sum = 0.0;
709    for (&yi, &wi) in y.iter().zip(weights.iter()) {
710        if !yi.is_finite() {
711            return Err(GamlssError::NonFinite {
712                reason: format!(
713                    "binomial location-scale warm start encountered non-finite response {yi}"
714                ),
715            }
716            .into());
717        }
718        let weight = floor_positiveweight(wi, MIN_WEIGHT);
719        if weight > 0.0 {
720            weight_sum += weight;
721            success_sum += weight * yi;
722        }
723    }
724    if !weight_sum.is_finite() || weight_sum <= 0.0 {
725        return Err(
726            "binomial location-scale warm start requires positive total weight".to_string(),
727        );
728    }
729    Ok(success_sum / weight_sum)
730}
731
732pub(crate) fn project_constant_eta_into_block(
733    block: &ParameterBlockSpec,
734    weights: &Array1<f64>,
735    eta: f64,
736) -> Result<Array1<f64>, String> {
737    let target_eta = Array1::from_elem(block.design.nrows(), eta);
738    solve_penalizedweighted_projection(
739        &block.design,
740        &block.offset,
741        &target_eta,
742        weights,
743        &block.penalties,
744        &block.initial_log_lambdas,
745        1e-10,
746    )
747}
748
749// Deterministic warm start for the binomial location-scale model. This stays
750// out of the optimizer: it projects a prevalence-matched threshold and neutral
751// log-sigma value into the actual penalized block spaces.
752pub(crate) fn binomial_location_scalewarm_start(
753    y: &Array1<f64>,
754    weights: &Array1<f64>,
755    link_kind: &InverseLink,
756    threshold_block: &ParameterBlockSpec,
757    log_sigma_block: &ParameterBlockSpec,
758    mean_beta_hint: Option<&Array1<f64>>,
759    noise_beta_hint: Option<&Array1<f64>>,
760) -> Result<(Array1<f64>, Array1<f64>), String> {
761    if let (Some(mean_beta), Some(noise_beta)) = (mean_beta_hint, noise_beta_hint) {
762        return Ok((mean_beta.clone(), noise_beta.clone()));
763    }
764
765    let beta_threshold = match mean_beta_hint {
766        Some(beta) => beta.clone(),
767        None => {
768            let prevalence = weighted_binomial_prevalence(y, weights)?;
769            let eta = binomial_location_scale_link_eta_from_probability(link_kind, prevalence)?;
770            project_constant_eta_into_block(threshold_block, weights, eta)?
771        }
772    };
773    let beta_log_sigma = match noise_beta_hint {
774        Some(beta) => beta.clone(),
775        None => project_constant_eta_into_block(log_sigma_block, weights, 0.0)?,
776    };
777    Ok((beta_threshold, beta_log_sigma))
778}
779
780#[derive(Clone)]
781pub(crate) struct BinomialMeanWiggleSpec {
782    pub y: Array1<f64>,
783    pub weights: Array1<f64>,
784    pub link_kind: InverseLink,
785    pub wiggle_knots: Array1<f64>,
786    pub wiggle_degree: usize,
787    pub eta_block: ParameterBlockInput,
788    pub wiggle_block: ParameterBlockInput,
789}
790
791#[derive(Clone)]
792pub struct GaussianLocationScaleTermSpec {
793    pub y: Array1<f64>,
794    pub weights: Array1<f64>,
795    pub meanspec: TermCollectionSpec,
796    pub log_sigmaspec: TermCollectionSpec,
797    pub mean_offset: Array1<f64>,
798    pub log_sigma_offset: Array1<f64>,
799}
800
801#[derive(Clone)]
802pub struct GaussianLocationScaleWiggleTermSpec {
803    pub y: Array1<f64>,
804    pub weights: Array1<f64>,
805    pub meanspec: TermCollectionSpec,
806    pub log_sigmaspec: TermCollectionSpec,
807    pub mean_offset: Array1<f64>,
808    pub log_sigma_offset: Array1<f64>,
809    pub wiggle_knots: Array1<f64>,
810    pub wiggle_degree: usize,
811    pub wiggle_block: ParameterBlockInput,
812}
813
814#[derive(Clone)]
815pub struct BinomialLocationScaleTermSpec {
816    pub y: Array1<f64>,
817    pub weights: Array1<f64>,
818    pub link_kind: InverseLink,
819    pub thresholdspec: TermCollectionSpec,
820    pub log_sigmaspec: TermCollectionSpec,
821    pub threshold_offset: Array1<f64>,
822    pub log_sigma_offset: Array1<f64>,
823}
824
825#[derive(Clone)]
826pub struct BinomialLocationScaleWiggleTermSpec {
827    pub y: Array1<f64>,
828    pub weights: Array1<f64>,
829    pub link_kind: InverseLink,
830    pub thresholdspec: TermCollectionSpec,
831    pub log_sigmaspec: TermCollectionSpec,
832    pub threshold_offset: Array1<f64>,
833    pub log_sigma_offset: Array1<f64>,
834    pub wiggle_knots: Array1<f64>,
835    pub wiggle_degree: usize,
836    pub wiggle_block: ParameterBlockInput,
837}
838
839#[derive(Clone, Debug)]
840pub struct BlockwiseTermFitResult {
841    pub fit: UnifiedFitResult,
842    pub meanspec_resolved: TermCollectionSpec,
843    pub noisespec_resolved: TermCollectionSpec,
844    pub mean_design: TermCollectionDesign,
845    pub noise_design: TermCollectionDesign,
846}
847
848pub(crate) struct BlockwiseTermFitResultParts {
849    pub fit: UnifiedFitResult,
850    pub meanspec_resolved: TermCollectionSpec,
851    pub noisespec_resolved: TermCollectionSpec,
852    pub mean_design: TermCollectionDesign,
853    pub noise_design: TermCollectionDesign,
854}
855
856pub struct BlockwiseTermWiggleFitResult {
857    pub fit: BlockwiseTermFitResult,
858    pub wiggle_knots: Array1<f64>,
859    pub wiggle_degree: usize,
860}
861
862pub struct BinomialMeanWiggleTermFitResult {
863    pub fit: UnifiedFitResult,
864    pub resolvedspec: TermCollectionSpec,
865    pub design: TermCollectionDesign,
866    pub wiggle_knots: Array1<f64>,
867    pub wiggle_degree: usize,
868    /// Standard-basis warp coefficients `β_w = Z·γ` for the saved-model predict
869    /// runtime, when the frozen-basis de-aliasing engaged (#1596). The fit's own
870    /// coefficients stay in the reduced, identifiable `γ` coordinate; this is the
871    /// out-of-band full-width lift consumed by `beta_link_wiggle`.
872    pub saved_warp_beta: Option<Vec<f64>>,
873}
874
875pub(crate) struct BlockwiseTermWiggleFitResultParts {
876    pub fit: BlockwiseTermFitResult,
877    pub wiggle_knots: Array1<f64>,
878    pub wiggle_degree: usize,
879}
880
881pub(crate) fn validate_term_collection_design(
882    label: &str,
883    design: &TermCollectionDesign,
884) -> Result<(), String> {
885    let p = design.design.ncols();
886    let n = design.design.nrows();
887    for rows in exact_design_row_chunks(n, p) {
888        let chunk = design
889            .design
890            .try_row_chunk(rows)
891            .map_err(|e| format!("{label}.design row chunk materialization failed: {e}"))?;
892        validate_all_finite_estimation(&format!("{label}.design"), chunk.iter().copied())
893            .map_err(|e| e.to_string())?;
894    }
895    if design.nullspace_dims.len() != design.penalties.len() {
896        return Err(GamlssError::DimensionMismatch {
897            reason: format!(
898                "{label}.nullspace_dims length mismatch: got {}, expected {}",
899                design.nullspace_dims.len(),
900                design.penalties.len()
901            ),
902        }
903        .into());
904    }
905    if design.penaltyinfo.len() != design.penalties.len() {
906        return Err(GamlssError::DimensionMismatch {
907            reason: format!(
908                "{label}.penaltyinfo length mismatch: got {}, expected {}",
909                design.penaltyinfo.len(),
910                design.penalties.len()
911            ),
912        }
913        .into());
914    }
915    for (idx, bp) in design.penalties.iter().enumerate() {
916        validate_all_finite_estimation(
917            &format!("{label}.penalties[{idx}]"),
918            bp.local.iter().copied(),
919        )
920        .map_err(|e| e.to_string())?;
921        if bp.col_range.end > p {
922            return Err(GamlssError::DimensionMismatch {
923                reason: format!(
924                    "{label}.penalties[{idx}] col_range {}..{} exceeds design width {}",
925                    bp.col_range.start, bp.col_range.end, p
926                ),
927            }
928            .into());
929        }
930    }
931    if let Some(bounds) = design.coefficient_lower_bounds.as_ref() {
932        if bounds.len() != p {
933            return Err(GamlssError::ConstraintViolation {
934                reason: format!(
935                    "{label}.coefficient_lower_bounds length mismatch: got {}, expected {p}",
936                    bounds.len()
937                ),
938            }
939            .into());
940        }
941        for (idx, &bound) in bounds.iter().enumerate() {
942            if !(bound.is_finite() || bound == f64::NEG_INFINITY) {
943                return Err(GamlssError::NonFinite { reason: format!(
944                    "{label}.coefficient_lower_bounds[{idx}] must be finite or -inf, got {bound}",
945                ) }.into());
946            }
947        }
948    }
949    if let Some(constraints) = design.linear_constraints.as_ref() {
950        validate_all_finite_estimation(
951            &format!("{label}.linear_constraints.a"),
952            constraints.a.iter().copied(),
953        )
954        .map_err(|e| e.to_string())?;
955        validate_all_finite_estimation(
956            &format!("{label}.linear_constraints.b"),
957            constraints.b.iter().copied(),
958        )
959        .map_err(|e| e.to_string())?;
960        if constraints.a.ncols() != p {
961            return Err(GamlssError::DimensionMismatch {
962                reason: format!(
963                    "{label}.linear_constraints.a column mismatch: got {}, expected {p}",
964                    constraints.a.ncols()
965                ),
966            }
967            .into());
968        }
969        if constraints.a.nrows() != constraints.b.len() {
970            return Err(GamlssError::DimensionMismatch {
971                reason: format!(
972                    "{label}.linear_constraints row mismatch: a has {}, b has {}",
973                    constraints.a.nrows(),
974                    constraints.b.len()
975                ),
976            }
977            .into());
978        }
979    }
980    if design.intercept_range.start > design.intercept_range.end || design.intercept_range.end > p {
981        return Err(GamlssError::ConstraintViolation {
982            reason: format!(
983                "{label}.intercept_range out of bounds: {:?} for {} columns",
984                design.intercept_range, p
985            ),
986        }
987        .into());
988    }
989    Ok(())
990}
991
992impl BlockwiseTermFitResult {
993    pub(crate) fn try_from_parts(parts: BlockwiseTermFitResultParts) -> Result<Self, String> {
994        let BlockwiseTermFitResultParts {
995            fit,
996            meanspec_resolved,
997            noisespec_resolved,
998            mean_design,
999            noise_design,
1000        } = parts;
1001
1002        fit.validate_numeric_finiteness()
1003            .map_err(|e| format!("{e}"))?;
1004        if fit.block_states.len() < 2 {
1005            return Err(GamlssError::DimensionMismatch {
1006                reason: format!(
1007                    "BlockwiseTermFitResult requires at least 2 block states, got {}",
1008                    fit.block_states.len()
1009                ),
1010            }
1011            .into());
1012        }
1013        validate_term_collection_design("blockwise_term.mean_design", &mean_design)?;
1014        validate_term_collection_design("blockwise_term.noise_design", &noise_design)?;
1015        if mean_design.design.nrows() != noise_design.design.nrows() {
1016            return Err(GamlssError::DimensionMismatch {
1017                reason: format!(
1018                    "BlockwiseTermFitResult row mismatch: mean_design={}, noise_design={}",
1019                    mean_design.design.nrows(),
1020                    noise_design.design.nrows()
1021                ),
1022            }
1023            .into());
1024        }
1025        if fit.block_states[0].beta.len() != mean_design.design.ncols() {
1026            return Err(GamlssError::DimensionMismatch {
1027                reason: format!(
1028                    "BlockwiseTermFitResult mean beta length mismatch: got {}, expected {}",
1029                    fit.block_states[0].beta.len(),
1030                    mean_design.design.ncols()
1031                ),
1032            }
1033            .into());
1034        }
1035        if fit.block_states[1].beta.len() != noise_design.design.ncols() {
1036            return Err(GamlssError::DimensionMismatch {
1037                reason: format!(
1038                    "BlockwiseTermFitResult noise beta length mismatch: got {}, expected {}",
1039                    fit.block_states[1].beta.len(),
1040                    noise_design.design.ncols()
1041                ),
1042            }
1043            .into());
1044        }
1045        if fit.block_states[0].eta.len() != mean_design.design.nrows() {
1046            return Err(GamlssError::DimensionMismatch {
1047                reason: format!(
1048                    "BlockwiseTermFitResult mean eta length mismatch: got {}, expected {}",
1049                    fit.block_states[0].eta.len(),
1050                    mean_design.design.nrows()
1051                ),
1052            }
1053            .into());
1054        }
1055        if fit.block_states[1].eta.len() != noise_design.design.nrows() {
1056            return Err(GamlssError::DimensionMismatch {
1057                reason: format!(
1058                    "BlockwiseTermFitResult noise eta length mismatch: got {}, expected {}",
1059                    fit.block_states[1].eta.len(),
1060                    noise_design.design.nrows()
1061                ),
1062            }
1063            .into());
1064        }
1065
1066        Ok(Self {
1067            fit,
1068            meanspec_resolved,
1069            noisespec_resolved,
1070            mean_design,
1071            noise_design,
1072        })
1073    }
1074
1075    pub(crate) fn validate_numeric_finiteness(&self) -> Result<(), String> {
1076        Self::try_from_parts(BlockwiseTermFitResultParts {
1077            fit: self.fit.clone(),
1078            meanspec_resolved: self.meanspec_resolved.clone(),
1079            noisespec_resolved: self.noisespec_resolved.clone(),
1080            mean_design: self.mean_design.clone(),
1081            noise_design: self.noise_design.clone(),
1082        })
1083        .map(|_| ())
1084    }
1085}
1086
1087impl BlockwiseTermWiggleFitResult {
1088    pub(crate) fn try_from_parts(parts: BlockwiseTermWiggleFitResultParts) -> Result<Self, String> {
1089        let BlockwiseTermWiggleFitResultParts {
1090            fit,
1091            wiggle_knots,
1092            wiggle_degree,
1093        } = parts;
1094
1095        fit.validate_numeric_finiteness()
1096            .map_err(|e| e.to_string())?;
1097        if fit.fit.block_states.len() < 3 {
1098            return Err(GamlssError::DimensionMismatch {
1099                reason: format!(
1100                    "BlockwiseTermWiggleFitResult requires at least 3 block states, got {}",
1101                    fit.fit.block_states.len()
1102                ),
1103            }
1104            .into());
1105        }
1106        if wiggle_knots.is_empty() {
1107            return Err(GamlssError::UnsupportedConfiguration {
1108                reason: "BlockwiseTermWiggleFitResult requires non-empty wiggle_knots".to_string(),
1109            }
1110            .into());
1111        }
1112        validate_all_finite_estimation(
1113            "blockwise_term_wiggle.wiggle_knots",
1114            wiggle_knots.iter().copied(),
1115        )
1116        .map_err(|e| e.to_string())?;
1117
1118        Ok(Self {
1119            fit,
1120            wiggle_knots,
1121            wiggle_degree,
1122        })
1123    }
1124}
1125
1126pub struct BinomialLocationScaleFitResult {
1127    pub fit: BlockwiseTermFitResult,
1128    pub wiggle_knots: Option<Array1<f64>>,
1129    pub wiggle_degree: Option<usize>,
1130    pub beta_link_wiggle: Option<Vec<f64>>,
1131}
1132
1133pub struct GaussianLocationScaleFitResult {
1134    pub fit: BlockwiseTermFitResult,
1135    pub wiggle_knots: Option<Array1<f64>>,
1136    pub wiggle_degree: Option<usize>,
1137    pub beta_link_wiggle: Option<Vec<f64>>,
1138    /// Response standardization factor applied internally during fitting.
1139    ///
1140    /// The Gaussian location-scale path fits on `y / response_scale` so the
1141    /// fixed log-σ soft floor `LOGB_SIGMA_FLOOR = 0.01` is *operationally*
1142    /// scale-relative (1 % of the response spread) rather than absolute,
1143    /// keeping κ = dlogσ/dη ≈ 1 across the realistic σ range and informing the
1144    /// scale block like gamlss. The returned coefficient `blocks`, `beta`, and
1145    /// link-wiggle knots/coefficients are already mapped back to **raw response
1146    /// units** (the Location/Mean block scaled by `response_scale`, the Scale
1147    /// block intercept shifted by `+ln(response_scale)`), so downstream
1148    /// reconstruction `μ = X_mean·β` comes out in raw units with no further
1149    /// rescaling.
1150    ///
1151    /// The σ reconstruction, however, **must scale the floor too** to stay
1152    /// response-scale-equivariant (#884):
1153    ///
1154    /// ```text
1155    /// σ = response_scale·LOGB_SIGMA_FLOOR + exp(X_scale·β)
1156    ///   = response_scale·(LOGB_SIGMA_FLOOR + exp(η_internal)).
1157    /// ```
1158    ///
1159    /// The intercept shift carries only the `exp(η)` term; reconstructing with a
1160    /// raw `LOGB_SIGMA_FLOOR` instead of `response_scale·LOGB_SIGMA_FLOOR` leaves
1161    /// the non-equivariant residual `LOGB_SIGMA_FLOOR·(1 − response_scale)`.
1162    ///
1163    /// This field records the factor that was applied for transparency,
1164    /// covariance bookkeeping, and the equivariant σ-floor reconstruction; it is
1165    /// `1.0` when no standardization was needed (degenerate constant response).
1166    pub response_scale: f64,
1167}
1168
1169/// Fit the binomial mean link-wiggle model. Returns the γ-space fit and, when
1170/// the frozen-basis de-aliasing engaged (#1596), the standard-basis warp
1171/// coefficients `β_w = Z·γ` for the saved-model predict runtime.
1172pub(crate) fn fit_binomial_mean_wiggle(
1173    spec: BinomialMeanWiggleSpec,
1174    options: &BlockwiseFitOptions,
1175) -> Result<(UnifiedFitResult, Option<Vec<f64>>), String> {
1176    let n = spec.y.len();
1177    validate_len_match("weights vs y", n, spec.weights.len())?;
1178    validateweights(&spec.weights, "fit_binomial_mean_wiggle")?;
1179    validate_binomial_response(&spec.y, "fit_binomial_mean_wiggle")?;
1180    validate_blockrows("eta", n, &spec.eta_block)?;
1181    validate_blockrows("wiggle", n, &spec.wiggle_block)?;
1182    if matches!(
1183        spec.link_kind,
1184        InverseLink::Standard(StandardLink::Identity)
1185    ) {
1186        return Err(GamlssError::UnsupportedConfiguration {
1187            reason: "fit_binomial_mean_wiggle does not support identity link".to_string(),
1188        }
1189        .into());
1190    }
1191    gam_terms::inference::formula_dsl::require_binomial_inverse_link_supports_joint_wiggle(
1192        &spec.link_kind,
1193        "fit_binomial_mean_wiggle",
1194    )?;
1195    if spec.wiggle_degree < 2 {
1196        return Err(GamlssError::ConstraintViolation {
1197            reason: format!(
1198                "fit_binomial_mean_wiggle: wiggle_degree must be >= 2, got {}",
1199                spec.wiggle_degree
1200            ),
1201        }
1202        .into());
1203    }
1204    let minimum_knots = minimum_monotone_wiggle_knot_count(spec.wiggle_degree)?;
1205    if spec.wiggle_knots.len() < minimum_knots {
1206        return Err(GamlssError::DimensionMismatch { reason: format!(
1207            "fit_binomial_mean_wiggle: wiggle_knots length {} is too short for degree {} (need at least {})",
1208            spec.wiggle_knots.len(),
1209            spec.wiggle_degree,
1210            minimum_knots
1211        ) }.into());
1212    }
1213
1214    // ----- Frozen-basis Gauss-Newton link-warp fit (#1596) -----
1215    //
1216    // The warp basis `B(η)` is frozen at the current index `η̂` so that
1217    // `q = η + B(η̂)·β_w` is linear in `(β_η, β_w)` (`∂q/∂η = 1`). To keep the
1218    // mean block `X` full and identifiable we fit the warp through the
1219    // observation-space residualized design `B⊥ = (I - P_X)B(η̂)`. We re-freeze
1220    // at the refit `η̂` across a few outer iterations until it stops moving.
1221    let x_dense: Array2<f64> = spec.eta_block.design.to_dense();
1222    let pilot_eta: Array1<f64> = {
1223        let pilot_beta = spec.eta_block.initial_beta.clone().ok_or_else(|| {
1224            "fit_binomial_mean_wiggle: eta block carries no pilot β to seed the \
1225             frozen-basis warp index"
1226                .to_string()
1227        })?;
1228        if x_dense.ncols() != pilot_beta.len() {
1229            return Err(GamlssError::DimensionMismatch {
1230                reason: format!(
1231                    "fit_binomial_mean_wiggle: eta design has {} columns but pilot β has {} \
1232                     coefficients",
1233                    x_dense.ncols(),
1234                    pilot_beta.len()
1235                ),
1236            }
1237            .into());
1238        }
1239        let mut eta = x_dense.dot(&pilot_beta);
1240        if spec.eta_block.offset.len() == eta.len() {
1241            eta += &spec.eta_block.offset;
1242        }
1243        eta
1244    };
1245
1246    // Original (full-width) warp penalties / nullspace metadata, captured before
1247    // `spec.wiggle_block` is consumed. The residualized block keeps the same
1248    // coefficient coordinate and therefore the same penalties.
1249    let wiggle_penalties_full = spec.wiggle_block.penalties.clone();
1250    let wiggle_log_lambdas = spec.wiggle_block.initial_log_lambdas.clone();
1251    let eta_block_input = spec.eta_block.clone();
1252
1253    let family = BinomialMeanWiggleFamily {
1254        y: spec.y,
1255        weights: spec.weights,
1256        link_kind: spec.link_kind,
1257        wiggle_knots: spec.wiggle_knots,
1258        wiggle_degree: spec.wiggle_degree,
1259        policy: gam_runtime::resource::ResourcePolicy::default_library(),
1260        frozen_warp_design: None,
1261    };
1262
1263    // Build the de-aliased warp block at a frozen index.  The identifiable
1264    // warp is the part of `B(η̂)` outside the mean column space:
1265    //
1266    //     B⊥ = (I - P_X) B = B - X A,     A = (XᵀX)^+ XᵀB.
1267    //
1268    // The previous implementation used `Z = null(XᵀB)` and fitted `B Z`.
1269    // That is too strong: when the warp basis has no coefficient combination
1270    // exactly orthogonal to `X` (for example a two-column flexible link beside
1271    // an intercept+slope mean), it drops every warp coefficient even though
1272    // the nonlinear columns of `B(η̂)` have a nonzero residual after projection
1273    // onto `X`.  Residualizing in observation space removes only the truly
1274    // mean-aliased component and leaves the curved, identifiable link-shape
1275    // signal available to the joint solve.
1276    //
1277    // The returned `A` is used after fitting: because the inner problem used
1278    // `Xβ + (B - XA)γ`, while prediction reconstructs the saved warp as
1279    // `Xβ_saved + Bγ`, we save `β_saved = β - Aγ`.
1280    let build_dealiased = |frozen: &Array1<f64>| -> Result<
1281        (
1282            ParameterBlockInput,
1283            Array2<f64>,
1284            Array2<f64>,
1285            std::sync::Arc<Array2<f64>>,
1286        ),
1287        String,
1288    > {
1289        use faer::Side;
1290        use gam_linalg::faer_ndarray::FaerEigh;
1291
1292        let b_full = family.wiggle_design(frozen.view())?;
1293        let xtx = x_dense.t().dot(&x_dense);
1294        let xtb = x_dense.t().dot(&b_full);
1295        let (evals, evecs) = xtx
1296            .eigh(Side::Lower)
1297            .map_err(|e| format!("frozen-basis warp de-aliasing mean QR failed: {e}"))?;
1298        let max_eval = evals.iter().map(|v| v.abs()).fold(0.0_f64, f64::max);
1299        let cutoff = 1.0e3
1300            * f64::EPSILON
1301            * (xtx.nrows().max(1) as f64)
1302            * max_eval.max(1.0);
1303        let mut alias = Array2::<f64>::zeros((x_dense.ncols(), b_full.ncols()));
1304        for k in 0..evals.len() {
1305            let lam = evals[k];
1306            if !lam.is_finite() || lam.abs() <= cutoff {
1307                continue;
1308            }
1309            let uk = evecs.column(k);
1310            let uk_xtb = uk.t().dot(&xtb);
1311            for i in 0..alias.nrows() {
1312                for j in 0..alias.ncols() {
1313                    alias[[i, j]] += uk[i] * uk_xtb[j] / lam;
1314                }
1315            }
1316        }
1317        let bda = &b_full - &x_dense.dot(&alias);
1318        let max_b = b_full.iter().map(|v| v.abs()).fold(0.0_f64, f64::max);
1319        let max_resid = bda.iter().map(|v| v.abs()).fold(0.0_f64, f64::max);
1320        let resid_tol = 1.0e3
1321            * f64::EPSILON
1322            * (bda.nrows().max(bda.ncols()).max(1) as f64)
1323            * max_b.max(1.0);
1324        if max_resid <= resid_tol {
1325            return Err("frozen-basis warp de-aliasing left no identifiable warp \
1326                        direction (the mean block already spans the warp in \
1327                        observation space)"
1328                .to_string());
1329        }
1330        let penalties: Vec<crate::model_types::PenaltySpec> = wiggle_penalties_full
1331            .iter()
1332            .map(|p| {
1333                let s = penalty_spec_to_dense(p, b_full.ncols())?;
1334                Ok(crate::model_types::PenaltySpec::Dense(s))
1335            })
1336            .collect::<Result<_, String>>()?;
1337        let q = bda.ncols();
1338        let block = ParameterBlockInput {
1339            design: DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(bda.clone())),
1340            offset: Array1::zeros(frozen.len()),
1341            penalties,
1342            nullspace_dims: vec![],
1343            initial_log_lambdas: wiggle_log_lambdas.clone(),
1344            initial_beta: Some(Array1::zeros(q)),
1345        };
1346        Ok((block, Array2::<f64>::eye(q), alias, std::sync::Arc::new(bda)))
1347    };
1348
1349    // True iff the warped link `q = η + B(η)·β_w` is strictly increasing across
1350    // `[lo, hi]` (`dq/dη = 1 + B'(η)·β_w > 0`), evaluated on a dense grid.
1351    let link_monotone = |beta_w: &[f64], lo: f64, hi: f64| -> Result<bool, String> {
1352        if !(lo.is_finite() && hi.is_finite()) || hi <= lo {
1353            return Ok(true);
1354        }
1355        let grid = Array1::linspace(lo, hi, 257);
1356        let d1 = crate::wiggle::monotone_wiggle_basis_with_derivative_order(
1357            grid.view(),
1358            &family.wiggle_knots,
1359            family.wiggle_degree,
1360            1,
1361        )?;
1362        let beta = Array1::from_vec(beta_w.to_vec());
1363        if d1.ncols() != beta.len() {
1364            return Ok(false);
1365        }
1366        let dq = d1.dot(&beta) + 1.0;
1367        Ok(dq.iter().all(|v| *v > 0.0))
1368    };
1369
1370    // Outer Gauss-Newton / backfitting loop over the frozen warp index. One
1371    // iteration already recovers the warp; a few more align `η̂` with the refit
1372    // `η` so the in-sample warp matches what `predict` reconstructs.
1373    const MAX_FROZEN_OUTER: usize = 6;
1374    const FROZEN_ETA_TOL: f64 = 1e-5;
1375    let mut frozen_eta = pilot_eta;
1376    let mut last_fit: Option<UnifiedFitResult> = None;
1377    let mut last_reduction: Option<Array2<f64>> = None;
1378    let mut last_alias: Option<Array2<f64>> = None;
1379    for _outer in 0..MAX_FROZEN_OUTER {
1380        let (wiggle_block, z, alias, bda) = build_dealiased(&frozen_eta)?;
1381        let blocks = vec![
1382            eta_block_input.clone().intospec("eta")?,
1383            wiggle_block.intospec("wiggle")?,
1384        ];
1385        let mut fam = family.clone();
1386        fam.frozen_warp_design = Some(bda);
1387        let fit = fit_custom_family(&fam, &blocks, options).map_err(|e| e.to_string())?;
1388        let new_eta = fit
1389            .block_states
1390            .get(BinomialMeanWiggleFamily::BLOCK_ETA)
1391            .map(|state| state.eta.clone())
1392            .filter(|eta| eta.len() == frozen_eta.len())
1393            .ok_or_else(|| {
1394                "fit_binomial_mean_wiggle: frozen-basis refit did not expose a fitted eta block"
1395                    .to_string()
1396            })?;
1397        // The inner fit uses the residualized warp design.  `z` is currently the
1398        // identity because residualization, not coefficient dropping, removes the
1399        // mean-aliased component; it is still threaded through the existing
1400        // saved-warp path so prediction reconstructs from the full I-spline
1401        // basis.
1402        last_reduction = Some(z.clone());
1403        last_alias = Some(alias.clone());
1404        let scale = 1.0
1405            + frozen_eta
1406                .iter()
1407                .map(|v: &f64| v.abs())
1408                .fold(0.0_f64, f64::max);
1409        let delta = new_eta
1410            .iter()
1411            .zip(frozen_eta.iter())
1412            .map(|(a, b)| (a - b).abs())
1413            .fold(0.0_f64, f64::max);
1414        last_fit = Some(fit);
1415        if delta <= FROZEN_ETA_TOL * scale {
1416            break;
1417        }
1418        frozen_eta = new_eta;
1419    }
1420    let mut fit = last_fit.ok_or_else(|| {
1421        "fit_binomial_mean_wiggle: frozen-basis outer loop produced no fit".to_string()
1422    })?;
1423    // Widen the reduced warp coefficient `γ` to the standard I-spline basis
1424    // `β_w = Z·γ` for the saved-model predict runtime (which reconstructs the
1425    // warp from the full-width basis).
1426    let warp_beta_from = |fit: &UnifiedFitResult, z: &Array2<f64>| -> Option<Vec<f64>> {
1427        fit.block_states
1428            .get(BinomialMeanWiggleFamily::BLOCK_WIGGLE)
1429            .filter(|state| state.beta.len() == z.ncols())
1430            .map(|state| z.dot(&state.beta).to_vec())
1431    };
1432    let mut saved_warp_beta = match last_reduction.as_ref() {
1433        Some(z) => warp_beta_from(&fit, z),
1434        None => None,
1435    };
1436
1437    // Monotone-link guarantee (#1596). The reduced curvature fit is
1438    // unconstrained, so the REML-selected warp can turn the link non-monotone
1439    // (non-invertible) over the fitted predictor range. The warp NESTS the base
1440    // link at λ → ∞ (`γ → 0`, `dq/dη ≡ 1`), so escalating the warp's smoothing
1441    // always reaches a strictly-increasing link while preserving as much of the
1442    // learned curvature as monotonicity allows. We fixed-λ re-fit at the
1443    // converged frozen index with a geometrically increasing penalty until the
1444    // link is monotone — a monotonicity-driven smoothing floor, not a hard
1445    // active-set constraint (which the coupled `Z·γ ≥ 0` inequality would
1446    // reintroduce). The base link itself is the λ → ∞ fixed point, so this
1447    // terminates.
1448    if last_reduction.is_some() {
1449        let lo = frozen_eta.iter().copied().fold(f64::INFINITY, f64::min);
1450        let hi = frozen_eta.iter().copied().fold(f64::NEG_INFINITY, f64::max);
1451        let already_monotone = saved_warp_beta
1452            .as_ref()
1453            .map(|bw| link_monotone(bw, lo, hi))
1454            .transpose()?
1455            .unwrap_or(true);
1456        if !already_monotone {
1457            let eta_penalty_count = eta_block_input.penalties.len();
1458            let reml_log = fit.log_lambdas.clone();
1459            const MAX_MONO_STEPS: usize = 16;
1460            const MONO_LOG_LAMBDA_STEP: f64 = 0.75;
1461            for step in 1..=MAX_MONO_STEPS {
1462                let bump = MONO_LOG_LAMBDA_STEP * step as f64;
1463                let (mut wb, z2, alias2, bda2) = build_dealiased(&frozen_eta)?;
1464                let wlen = wb.penalties.len();
1465                let wiggle_base: Array1<f64> = if reml_log.len() >= eta_penalty_count + wlen {
1466                    reml_log
1467                        .slice(s![eta_penalty_count..eta_penalty_count + wlen])
1468                        .to_owned()
1469                } else {
1470                    Array1::zeros(wlen)
1471                };
1472                wb.initial_log_lambdas = Some(wiggle_base.mapv(|v| v + bump));
1473                let mut eta_in = eta_block_input.clone();
1474                if reml_log.len() >= eta_penalty_count && eta_penalty_count > 0 {
1475                    eta_in.initial_log_lambdas =
1476                        Some(reml_log.slice(s![0..eta_penalty_count]).to_owned());
1477                }
1478                let blocks = vec![eta_in.intospec("eta")?, wb.intospec("wiggle")?];
1479                let mut fam = family.clone();
1480                fam.frozen_warp_design = Some(bda2);
1481                let refit = crate::custom_family::fit_custom_family_fixed_log_lambdas(
1482                    &fam, &blocks, options, None, 0, None, true,
1483                )
1484                .map_err(|e| e.to_string())?;
1485                let refit_beta = warp_beta_from(&refit, &z2);
1486                let monotone = refit_beta
1487                    .as_ref()
1488                    .map(|bw| link_monotone(bw, lo, hi))
1489                    .transpose()?
1490                    .unwrap_or(true);
1491                if monotone || step == MAX_MONO_STEPS {
1492                    fit = refit;
1493                    saved_warp_beta = refit_beta;
1494                    last_alias = Some(alias2);
1495                    break;
1496                }
1497            }
1498            // Final certification: the λ → ∞ base link is monotone, so this
1499            // should hold; surface loudly if a pathological basis defeats it.
1500            let final_monotone = saved_warp_beta
1501                .as_ref()
1502                .map(|bw| link_monotone(bw, lo, hi))
1503                .transpose()?
1504                .unwrap_or(true);
1505            if !final_monotone {
1506                return Err("binomial flexible link could not be smoothed to a monotone \
1507                            (invertible) link over the fitted predictor range"
1508                    .to_string());
1509            }
1510        }
1511    }
1512    if let (Some(alias), Some(beta_w)) = (last_alias.as_ref(), saved_warp_beta.as_ref()) {
1513        let gamma = Array1::from_vec(beta_w.clone());
1514        if alias.ncols() == gamma.len() && alias.nrows() == eta_block_input.design.ncols() {
1515            let shift = alias.dot(&gamma);
1516            if let Some(block) = fit.blocks.get_mut(BinomialMeanWiggleFamily::BLOCK_ETA) {
1517                if block.beta.len() == shift.len() {
1518                    block.beta -= &shift;
1519                }
1520            }
1521            if let Some(state) = fit
1522                .block_states
1523                .get_mut(BinomialMeanWiggleFamily::BLOCK_ETA)
1524            {
1525                if state.beta.len() == shift.len() {
1526                    state.beta -= &shift;
1527                    state.eta = x_dense.dot(&state.beta) + &eta_block_input.offset;
1528                }
1529            }
1530            if fit.beta.len() >= shift.len() {
1531                for i in 0..shift.len() {
1532                    fit.beta[i] -= shift[i];
1533                }
1534            }
1535        }
1536    }
1537    Ok((fit, saved_warp_beta))
1538}
1539
1540/// Densify a wiggle-block penalty spec to its full `p×p` matrix for the
1541/// `ZᵀSZ` de-aliasing transform (#1596). The link-warp block carries only
1542/// `Dense`/`DenseWithMean` difference (and optional ridge) penalties.
1543fn penalty_spec_to_dense(
1544    spec: &crate::model_types::PenaltySpec,
1545    p: usize,
1546) -> Result<Array2<f64>, String> {
1547    use crate::model_types::PenaltySpec;
1548    match spec {
1549        PenaltySpec::Dense(m) | PenaltySpec::DenseWithMean { matrix: m, .. } => {
1550            if m.nrows() != p || m.ncols() != p {
1551                return Err(format!(
1552                    "frozen-basis warp penalty must be {p}x{p}, got {}x{}",
1553                    m.nrows(),
1554                    m.ncols()
1555                ));
1556            }
1557            Ok(m.clone())
1558        }
1559        PenaltySpec::Block {
1560            local, col_range, ..
1561        } => {
1562            let mut full = Array2::<f64>::zeros((p, p));
1563            if col_range.end > p || local.nrows() != col_range.len() {
1564                return Err("frozen-basis warp penalty block range out of bounds".to_string());
1565            }
1566            full.slice_mut(s![col_range.clone(), col_range.clone()])
1567                .assign(local);
1568            Ok(full)
1569        }
1570    }
1571}
1572
1573pub(crate) trait LocationScaleFamilyBuilder {
1574    type Family: CustomFamily + Clone + Send + Sync + 'static;
1575
1576    fn meanspec(&self) -> &TermCollectionSpec;
1577    fn noisespec(&self) -> &TermCollectionSpec;
1578
1579    fn build_blocks(
1580        &self,
1581        theta: &Array1<f64>,
1582        mean_design: &TermCollectionDesign,
1583        noise_design: &TermCollectionDesign,
1584        mean_beta_hint: Option<Array1<f64>>,
1585        noise_beta_hint: Option<Array1<f64>>,
1586    ) -> Result<Vec<ParameterBlockSpec>, String>;
1587
1588    fn build_family(
1589        &self,
1590        mean_design: &TermCollectionDesign,
1591        noise_design: &TermCollectionDesign,
1592    ) -> Self::Family;
1593
1594    fn extract_primary_betas(
1595        &self,
1596        fit: &UnifiedFitResult,
1597    ) -> Result<(Array1<f64>, Array1<f64>), String>;
1598
1599    fn mean_penalty_count(&self, mean_design: &TermCollectionDesign) -> usize {
1600        mean_design.penalties.len()
1601    }
1602
1603    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
1604        noise_design.penalties.len()
1605    }
1606
1607    fn exact_spatial_joint_supported(&self) -> bool {
1608        false
1609    }
1610
1611    fn require_exact_spatial_joint(&self) -> bool {
1612        false
1613    }
1614
1615    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
1616        crate::seeding::SeedRiskProfile::GeneralizedLinear
1617    }
1618
1619    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
1620        Ok(Array1::zeros(0))
1621    }
1622
1623    fn build_psiderivative_blocks(
1624        &self,
1625        arr: ndarray::ArrayView2<'_, f64>,
1626        term_spec: &TermCollectionSpec,
1627        term_spec2: &TermCollectionSpec,
1628        term_design: &TermCollectionDesign,
1629        term_design2: &TermCollectionDesign,
1630    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String>;
1631}
1632
1633pub(crate) fn fit_location_scale_terms<B: LocationScaleFamilyBuilder>(
1634    data: ndarray::ArrayView2<'_, f64>,
1635    builder: B,
1636    options: &BlockwiseFitOptions,
1637    kappa_options: &SpatialLengthScaleOptimizationOptions,
1638) -> Result<BlockwiseTermFitResult, String> {
1639    // Large-n location-scale fits keep the caller's explicit Hessian request.
1640    // The unified REML evaluator chooses a dense or matrix-free exact
1641    // representation from the realized (n, p, K) work model, so there is no
1642    // large-scale downgrade to BFGS here.
1643
1644    let mut mean_beta_hint: Option<Array1<f64>> = None;
1645    let mut noise_beta_hint: Option<Array1<f64>> = None;
1646    let extra_rho0 = builder.extra_rho0()?;
1647
1648    let mean_boot_design =
1649        build_term_collection_design(data, builder.meanspec()).map_err(|e| e.to_string())?;
1650    let noise_boot_design =
1651        build_term_collection_design(data, builder.noisespec()).map_err(|e| e.to_string())?;
1652    let mean_bootspec = freeze_term_collection_from_design(builder.meanspec(), &mean_boot_design)
1653        .map_err(|e| e.to_string())?;
1654    let noise_bootspec =
1655        freeze_term_collection_from_design(builder.noisespec(), &noise_boot_design)
1656            .map_err(|e| e.to_string())?;
1657
1658    let require_exact_spatial_joint = builder.require_exact_spatial_joint();
1659    let analytic_joint_derivatives_check = if builder.exact_spatial_joint_supported() {
1660        builder
1661            .build_psiderivative_blocks(
1662                data,
1663                &mean_bootspec,
1664                &noise_bootspec,
1665                &mean_boot_design,
1666                &noise_boot_design,
1667            )
1668            .map(|_| ())
1669    } else {
1670        Err(
1671            "analytic spatial psi derivatives are unavailable for this location-scale family"
1672                .to_string(),
1673        )
1674    };
1675    let analytic_joint_derivatives_available = analytic_joint_derivatives_check.is_ok();
1676    if require_exact_spatial_joint {
1677        analytic_joint_derivatives_check.map_err(|err| {
1678            format!("exact two-block spatial path requires analytic psi derivatives: {err}")
1679        })?;
1680    }
1681    let mean_penalty_count = builder.mean_penalty_count(&mean_boot_design);
1682    let noise_penalty_count = builder.noise_penalty_count(&noise_boot_design);
1683
1684    // Honor an explicit user-supplied `length_scale=X` on every spatial term
1685    // in both the mean and noise blocks: when every term is κ-locked (no
1686    // anisotropy, no per-axis ψ contrasts), the joint-spatial outer optimizer
1687    // has nothing to optimize. Routing through it anyway wraps the full
1688    // two-block coefficient solve inside an unnecessary outer loop where
1689    // each evaluation runs the inner Newton from scratch. This is the same
1690    // short-circuit the Bernoulli marginal-slope entry point performs at
1691    // bernoulli_marginal_slope.rs:16432-16442; mirroring it here makes the
1692    // GAMLSS path skip straight to the `(!enabled || log_kappa_dim == 0)`
1693    // fast path in `optimize_spatial_length_scale_exact_joint`.
1694    let mut effective_kappa_options = kappa_options.clone();
1695    if effective_kappa_options.enabled
1696        && gam_terms::smooth::all_spatial_terms_kappa_fixed(&mean_bootspec)
1697        && gam_terms::smooth::all_spatial_terms_kappa_fixed(&noise_bootspec)
1698    {
1699        log::info!(
1700            "[GAMLSS spatial] disabling κ/ψ optimization: every spatial term in \
1701             both blocks has an explicit length_scale and no anisotropy; \
1702             user-supplied kernel scale is fixed"
1703        );
1704        effective_kappa_options.enabled = false;
1705    }
1706    let kappa_options: &SpatialLengthScaleOptimizationOptions = &effective_kappa_options;
1707
1708    // Macro to invoke the exact-joint spatial optimizer with shared closures.
1709    // The exact path evaluates the full profiled/Laplace objective over
1710    // theta = [rho, psi] with the real joint Hessian required by NewtonTR/ARC.
1711    macro_rules! run_exact_joint_spatial {
1712        () => {{
1713            let joint_setup = build_two_block_exact_joint_setup(
1714                data,
1715                builder.meanspec(),
1716                builder.noisespec(),
1717                mean_penalty_count,
1718                noise_penalty_count,
1719                extra_rho0.as_slice().unwrap_or(&[]),
1720                None,
1721                kappa_options,
1722            );
1723            let mean_terms = spatial_length_scale_term_indices(builder.meanspec());
1724            let noise_terms = spatial_length_scale_term_indices(builder.noisespec());
1725            let mean_beta_hint_cell = std::cell::RefCell::new(mean_beta_hint.clone());
1726            let noise_beta_hint_cell = std::cell::RefCell::new(noise_beta_hint.clone());
1727            let hyper_warm_start_cell =
1728                std::cell::RefCell::new(None::<CustomFamilyWarmStart>);
1729            // Two-block GAMLSS/location-scale joint likelihoods have a
1730            // β-dependent cross-block Hessian (the (μ,log σ) / (t,log σ)
1731            // off-diagonal blocks involve residual/response scalars that
1732            // shift when β moves). The Wood-Fasiolo structural property
1733            // `H^{-1/2} B_k H^{-1/2} ≽ 0` plus parameter-independent
1734            // nullspace — the mathematical basis for EFS convergence —
1735            // fails here, so EFS/HybridEFS must be excluded at plan time
1736            // rather than retried as a silent first attempt that stalls
1737            // for hundreds of seconds before the runner falls back.
1738            let gamlss_disable_fixed_point = true;
1739            let outer_policy = {
1740                // GAMLSS spatial path: psi_dim = log_kappa_dim + auxiliary_dim,
1741                // matching the (theta_dim - rho_dim) decomposition the
1742                // optimizer uses internally. Build realized ParameterBlockSpecs
1743                // at the seed rho so the family's own cost model — which
1744                // multiplies coefficient-gradient / coefficient-Hessian
1745                // per-row cost by the joint outer-coordinate dimension and
1746                // total p — produces honest `predicted_*_work` estimates.
1747                // Previously this fed `predicted_*_work: 0` to the planner,
1748                // which then ungated dense outer Hessian work that costs
1749                // hundreds of seconds per eval at large scale (see
1750                // `OuterDerivativePolicy::OUTER_HESSIAN_WORK_BUDGET`).
1751                let theta_seed = joint_setup.theta0();
1752                let rho_dim = joint_setup.rho_dim();
1753                let psi_dim = theta_seed.len() - rho_dim;
1754                let rho_seed = theta_seed.slice(s![..rho_dim]).to_owned();
1755                let policy_blocks_res = builder.build_blocks(
1756                    &rho_seed,
1757                    &mean_boot_design,
1758                    &noise_boot_design,
1759                    mean_beta_hint_cell.borrow().clone(),
1760                    noise_beta_hint_cell.borrow().clone(),
1761                );
1762                let mut policy = match policy_blocks_res {
1763                    Ok(policy_blocks) => {
1764                        let policy_family =
1765                            builder.build_family(&mean_boot_design, &noise_boot_design);
1766                        crate::custom_family::CustomFamily::outer_derivative_policy(
1767                            &policy_family,
1768                            &policy_blocks,
1769                            psi_dim,
1770                            options,
1771                        )
1772                    }
1773                    Err(err) => {
1774                        // Block construction at the seed should not fail for
1775                        // any in-tree family, but if it does, fall back to a
1776                        // policy that names the capability honestly and
1777                        // declines to predict cost. Setting work to
1778                        // `u128::MAX` routes the planner through gradient-only
1779                        // BFGS (the universal Hessian-work budget is
1780                        // saturating, so a sentinel is fine here).
1781                        log::warn!(
1782                            "[GAMLSS spatial] failed to realize policy blocks at seed rho ({err}); \
1783                             routing outer optimizer through gradient-only BFGS"
1784                        );
1785                        let capability = if analytic_joint_derivatives_available {
1786                            crate::custom_family::ExactOuterDerivativeOrder::Second
1787                        } else {
1788                            crate::custom_family::ExactOuterDerivativeOrder::First
1789                        };
1790                        crate::custom_family::OuterDerivativePolicy {
1791                            capability,
1792                            predicted_gradient_work: u128::MAX,
1793                            predicted_hessian_work: u128::MAX,
1794                            // No GAMLSS family today overrides its
1795                            // outer-only `_with_options` hooks to consume
1796                            // `outer_score_subsample`; staged-κ would
1797                            // build pilot masks the family then ignores.
1798                            subsample_capable: false,
1799                        }
1800                    }
1801                };
1802                if !analytic_joint_derivatives_available {
1803                    // Capability must not exceed what the analytic derivatives
1804                    // path can supply — the macro's hyper evaluator returns
1805                    // an error otherwise.
1806                    policy.capability =
1807                        crate::custom_family::ExactOuterDerivativeOrder::First;
1808                }
1809                policy
1810            };
1811            optimize_spatial_length_scale_exact_joint(
1812                data,
1813                &[builder.meanspec().clone(), builder.noisespec().clone()],
1814                &[mean_terms, noise_terms],
1815                kappa_options,
1816                &joint_setup,
1817                builder.exact_spatial_seed_risk_profile(),
1818                analytic_joint_derivatives_available,
1819                analytic_joint_derivatives_available,
1820                gamlss_disable_fixed_point,
1821                None,
1822                outer_policy,
1823                |theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
1824                    assert_eq!(
1825                        specs.len(),
1826                        2,
1827                        "joint spatial closure expects exactly two block specs (mean, noise); got {}",
1828                        specs.len(),
1829                    );
1830                    assert_eq!(
1831                        designs.len(),
1832                        2,
1833                        "joint spatial closure expects exactly two block designs (mean, noise); got {}",
1834                        designs.len(),
1835                    );
1836                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
1837                    let fit = {
1838                        let blocks = builder.build_blocks(
1839                            &rho,
1840                            &designs[0],
1841                            &designs[1],
1842                            mean_beta_hint_cell.borrow().clone(),
1843                            noise_beta_hint_cell.borrow().clone(),
1844                        )?;
1845                        if mean_beta_hint_cell.borrow().is_none()
1846                            && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
1847                        {
1848                            *mean_beta_hint_cell.borrow_mut() = Some(beta);
1849                        }
1850                        if noise_beta_hint_cell.borrow().is_none()
1851                            && let Some(beta) =
1852                                blocks.get(1).and_then(|block| block.initial_beta.clone())
1853                        {
1854                            *noise_beta_hint_cell.borrow_mut() = Some(beta);
1855                        }
1856                        let family = builder.build_family(&designs[0], &designs[1]);
1857                        // Branch on whether the κ optimizer drives rho.
1858                        //
1859                        // * `log_kappa_dim() > 0 && kappa_options.enabled` ⇒
1860                        //   the outer (ρ, ψ) optimizer is active and
1861                        //   passes each candidate ρ to this closure;
1862                        //   the inner fit must hold log-lambdas fixed
1863                        //   at the supplied ρ so the outer derivative
1864                        //   has a well-defined directional gradient.
1865                        //
1866                        // * Otherwise (κ disabled via the locked-κ
1867                        //   short-circuit, or no spatial terms at all)
1868                        //   the fast path in
1869                        //   `optimize_spatial_length_scale_exact_joint`
1870                        //   calls this closure exactly once at
1871                        //   `theta = theta0`; ρ must still be optimized
1872                        //   from data because the user never pinned it.
1873                        //   `fit_custom_family` performs the joint
1874                        //   ρ + coefficient REML fit at the user's
1875                        //   (now-fixed) kernel scale, which is the
1876                        //   intended behaviour when `length_scale=…` is
1877                        //   set on every spatial term.
1878                        if joint_setup.log_kappa_dim() > 0 && kappa_options.enabled {
1879                            let warm_start = hyper_warm_start_cell.borrow().clone();
1880                            fit_custom_family_fixed_log_lambdas(
1881                                &family,
1882                                &blocks,
1883                                options,
1884                                warm_start.as_ref(),
1885                                0,
1886                                None,
1887                                true,
1888                            )?
1889                        } else {
1890                            fit_custom_family(&family, &blocks, options)?
1891                        }
1892                    };
1893                    let (mean_beta, noise_beta) = builder.extract_primary_betas(&fit)?;
1894                    mean_beta_hint = Some(mean_beta);
1895                    noise_beta_hint = Some(noise_beta);
1896                    *mean_beta_hint_cell.borrow_mut() = mean_beta_hint.clone();
1897                    *noise_beta_hint_cell.borrow_mut() = noise_beta_hint.clone();
1898                    Ok(fit)
1899                },
1900                |theta,
1901                 specs: &[TermCollectionSpec],
1902                 designs: &[TermCollectionDesign],
1903                 eval_mode,
1904                 row_set: &crate::row_kernel::RowSet| {
1905                    use gam_problem::EvalMode;
1906                    if !analytic_joint_derivatives_available {
1907                        return Err(
1908                            "analytic spatial psi derivatives are unavailable for this exact two-block path"
1909                                .to_string(),
1910                        );
1911                    }
1912                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
1913                    let blocks = builder.build_blocks(
1914                        &rho,
1915                        &designs[0],
1916                        &designs[1],
1917                        mean_beta_hint_cell.borrow().clone(),
1918                        noise_beta_hint_cell.borrow().clone(),
1919                    )?;
1920                    if mean_beta_hint_cell.borrow().is_none()
1921                        && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
1922                    {
1923                        *mean_beta_hint_cell.borrow_mut() = Some(beta);
1924                    }
1925                    if noise_beta_hint_cell.borrow().is_none()
1926                        && let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
1927                    {
1928                        *noise_beta_hint_cell.borrow_mut() = Some(beta);
1929                    }
1930                    let family = builder.build_family(&designs[0], &designs[1]);
1931                    let psiderivative_blocks = if matches!(eval_mode, EvalMode::ValueOnly) {
1932                        // Cost-only line-search probes need only the profiled
1933                        // likelihood at the already-realized θ. The ψ
1934                        // derivative payloads are O(n) to assemble and are
1935                        // consumed solely by the IFT gradient/Hessian path;
1936                        // building them for every backtracking probe made the
1937                        // Gaussian location-scale path scale with sample size
1938                        // even when the optimizer discarded the derivative
1939                        // pieces. Pass empty blocks so the shared evaluator
1940                        // performs the same fixed-design inner REML solve
1941                        // without derivative-only setup work.
1942                        (0..specs.len()).map(|_| Vec::new()).collect()
1943                    } else {
1944                        builder.build_psiderivative_blocks(
1945                            data,
1946                            &specs[0],
1947                            &specs[1],
1948                            &designs[0],
1949                            &designs[1],
1950                        )?
1951                    };
1952                    let warm_start = hyper_warm_start_cell.borrow().clone();
1953                    // Forward the κ-staging row set to the family by installing it
1954                    // on the canonical `outer_score_subsample` option. Inner-PIRLS
1955                    // and final covariance still run on full data (the per-row
1956                    // weight is consulted only by outer-only paths inside the
1957                    // family). When the staging schedule is full-data the option
1958                    // stays `None` and the call is equivalent to the prior path.
1959                    let eval_options = match row_set {
1960                        crate::row_kernel::RowSet::All => {
1961                            std::borrow::Cow::Borrowed(options)
1962                        }
1963                        crate::row_kernel::RowSet::Subsample {
1964                            rows,
1965                            n_full,
1966                        } => {
1967                            let subsample = crate::outer_subsample::
1968                                OuterScoreSubsample::from_weighted_rows(
1969                                    (**rows).clone(),
1970                                    *n_full,
1971                                    *n_full as u64,
1972                                );
1973                            let mut cloned = options.clone();
1974                            cloned.outer_score_subsample =
1975                                Some(std::sync::Arc::new(subsample));
1976                            std::borrow::Cow::Owned(cloned)
1977                        }
1978                    };
1979                    let eval = evaluate_custom_family_joint_hyper(
1980                        &family,
1981                        &blocks,
1982                        eval_options.as_ref(),
1983                        &rho,
1984                        &psiderivative_blocks,
1985                        warm_start.as_ref(),
1986                        eval_mode,
1987                    )?;
1988                    *hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
1989                    if !eval.inner_converged {
1990                        return Err(
1991                            "exact two-block spatial inner solve did not converge".to_string(),
1992                        );
1993                    }
1994                    if matches!(eval_mode, EvalMode::ValueGradientHessian)
1995                        && !eval.outer_hessian.is_analytic()
1996                    {
1997                        return Err(
1998                            "exact two-block spatial objective requires a full joint [rho, psi] hessian"
1999                                .to_string(),
2000                        );
2001                    }
2002                    Ok((eval.objective, eval.gradient, eval.outer_hessian))
2003                },
2004                |theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
2005                    if !analytic_joint_derivatives_available {
2006                        return Err(
2007                            "analytic spatial psi derivatives are unavailable for this exact two-block path"
2008                                .to_string(),
2009                        );
2010                    }
2011                    let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
2012                    let blocks = builder.build_blocks(
2013                        &rho,
2014                        &designs[0],
2015                        &designs[1],
2016                        mean_beta_hint_cell.borrow().clone(),
2017                        noise_beta_hint_cell.borrow().clone(),
2018                    )?;
2019                    if mean_beta_hint_cell.borrow().is_none()
2020                        && let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
2021                    {
2022                        *mean_beta_hint_cell.borrow_mut() = Some(beta);
2023                    }
2024                    if noise_beta_hint_cell.borrow().is_none()
2025                        && let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
2026                    {
2027                        *noise_beta_hint_cell.borrow_mut() = Some(beta);
2028                    }
2029                    let family = builder.build_family(&designs[0], &designs[1]);
2030                    let psiderivative_blocks = builder.build_psiderivative_blocks(
2031                        data,
2032                        &specs[0],
2033                        &specs[1],
2034                        &designs[0],
2035                        &designs[1],
2036                    )?;
2037                    let warm_start = hyper_warm_start_cell.borrow().clone();
2038                    let eval = evaluate_custom_family_joint_hyper_efs(
2039                        &family,
2040                        &blocks,
2041                        options,
2042                        &rho,
2043                        &psiderivative_blocks,
2044                        warm_start.as_ref(),
2045                    )?;
2046                    *hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
2047                    if !eval.inner_converged {
2048                        return Err(
2049                            "exact two-block spatial EFS inner solve did not converge".to_string(),
2050                        );
2051                    }
2052                    Ok(eval.efs_eval)
2053                },
2054                |_beta: &Array1<f64>| Ok(gam_solve::rho_optimizer::SeedOutcome::NoSlot),
2055            )
2056        }};
2057    }
2058
2059    let mut solved = run_exact_joint_spatial!()
2060        .map_err(|err| format!("exact two-block spatial optimization failed: {err}"))?;
2061
2062    let expected_noise_penalty_count = builder.noise_penalty_count(&solved.designs[1]);
2063    let actual_noise_penalty_count = solved.designs[1].penalties.len();
2064    if expected_noise_penalty_count > actual_noise_penalty_count {
2065        if expected_noise_penalty_count != actual_noise_penalty_count + 1 {
2066            return Err(GamlssError::UnsupportedConfiguration {
2067                reason: format!(
2068                    "location-scale result noise design expected {} penalties after augmentation, got {} before augmentation",
2069                    expected_noise_penalty_count, actual_noise_penalty_count
2070                ),
2071            }
2072            .into());
2073        }
2074        append_binomial_log_sigma_shrinkage_penalty_design(&mut solved.designs[1]);
2075    }
2076
2077    BlockwiseTermFitResult::try_from_parts(BlockwiseTermFitResultParts {
2078        fit: solved.fit,
2079        meanspec_resolved: solved.resolved_specs.remove(0),
2080        noisespec_resolved: solved.resolved_specs.remove(0),
2081        mean_design: solved.designs.remove(0),
2082        noise_design: solved.designs.remove(0),
2083    })
2084}
2085
2086pub(crate) struct GaussianLocationScaleTermBuilder {
2087    pub(crate) y: Array1<f64>,
2088    pub(crate) weights: Array1<f64>,
2089    pub(crate) meanspec: TermCollectionSpec,
2090    pub(crate) noisespec: TermCollectionSpec,
2091    pub(crate) mean_offset: Array1<f64>,
2092    pub(crate) noise_offset: Array1<f64>,
2093}
2094
2095impl LocationScaleFamilyBuilder for GaussianLocationScaleTermBuilder {
2096    type Family = GaussianLocationScaleFamily;
2097
2098    fn meanspec(&self) -> &TermCollectionSpec {
2099        &self.meanspec
2100    }
2101
2102    fn noisespec(&self) -> &TermCollectionSpec {
2103        &self.noisespec
2104    }
2105
2106    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2107        // Mirror the Binomial location-scale path: the log-sigma (scale)
2108        // block carries an extra nullspace shrinkage penalty so its
2109        // polynomial nullspace (constant log-sigma, plus the linear term for
2110        // tp/Duchon bases) is not left unpenalized. Without it, outer REML
2111        // optimizes lambda_sigma on a flat/ill-conditioned surface, which can
2112        // flatten the scale envelope (bad Pearson/CRPS/PIT/NLL) and diverge
2113        // the coupled inner Newton (log_sigma residual blows up, beta ->
2114        // infinity). The strength of this shrinkage is REML-selected.
2115        noise_design.penalties.len() + 1
2116    }
2117
2118    fn exact_spatial_joint_supported(&self) -> bool {
2119        true
2120    }
2121
2122    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
2123        crate::seeding::SeedRiskProfile::GaussianLocationScale
2124    }
2125
2126    fn build_blocks(
2127        &self,
2128        theta: &Array1<f64>,
2129        mean_design: &TermCollectionDesign,
2130        noise_design: &TermCollectionDesign,
2131        mean_beta_hint: Option<Array1<f64>>,
2132        noise_beta_hint: Option<Array1<f64>>,
2133    ) -> Result<Vec<ParameterBlockSpec>, String> {
2134        let layout = GamlssLambdaLayout::two_block(
2135            mean_design.penalties.len(),
2136            self.noise_penalty_count(noise_design),
2137        );
2138        layout.validate_theta_len(theta.len(), "gaussian location-scale")?;
2139        let (meanspec, noisespec) = build_gaussian_mean_and_scale_blocks(
2140            &self.y,
2141            &self.weights,
2142            mean_design,
2143            noise_design,
2144            &self.mean_offset,
2145            &self.noise_offset,
2146            layout.mean_from(theta),
2147            layout.noise_from(theta),
2148            mean_beta_hint,
2149            noise_beta_hint,
2150            "GaussianLocationScale::build_blocks",
2151        )?;
2152        Ok(vec![meanspec, noisespec])
2153    }
2154
2155    fn build_family(
2156        &self,
2157        mean_design: &TermCollectionDesign,
2158        noise_design: &TermCollectionDesign,
2159    ) -> Self::Family {
2160        let preparednoise_design =
2161            prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)
2162                .expect("prepared Gaussian log-sigma design should match block construction");
2163        GaussianLocationScaleFamily {
2164            y: self.y.clone(),
2165            weights: self.weights.clone(),
2166            mu_design: Some(mean_design.design.clone()),
2167            log_sigma_design: Some(preparednoise_design),
2168            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2169            cached_row_scalars: std::sync::RwLock::new(None),
2170        }
2171    }
2172
2173    fn extract_primary_betas(
2174        &self,
2175        fit: &UnifiedFitResult,
2176    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2177        let mean_beta = fit
2178            .block_states
2179            .get(GaussianLocationScaleFamily::BLOCK_MU)
2180            .ok_or_else(|| "missing Gaussian mu block state".to_string())?
2181            .beta
2182            .clone();
2183        let noise_beta = fit
2184            .block_states
2185            .get(GaussianLocationScaleFamily::BLOCK_LOG_SIGMA)
2186            .ok_or_else(|| "missing Gaussian log_sigma block state".to_string())?
2187            .beta
2188            .clone();
2189        Ok((mean_beta, noise_beta))
2190    }
2191
2192    fn build_psiderivative_blocks(
2193        &self,
2194        data: ndarray::ArrayView2<'_, f64>,
2195        meanspec_resolved: &TermCollectionSpec,
2196        noisespec_resolved: &TermCollectionSpec,
2197        mean_design: &TermCollectionDesign,
2198        noise_design: &TermCollectionDesign,
2199    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2200        let mean_derivs =
2201            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2202                .ok_or_else(|| "missing Gaussian mean spatial psi derivatives".to_string())?;
2203        let noise_derivs =
2204            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2205                .ok_or_else(|| "missing Gaussian log-sigma spatial psi derivatives".to_string())?;
2206        Ok(vec![mean_derivs, noise_derivs])
2207    }
2208}
2209
2210pub(crate) struct GaussianLocationScaleWiggleTermBuilder {
2211    pub(crate) y: Array1<f64>,
2212    pub(crate) weights: Array1<f64>,
2213    pub(crate) meanspec: TermCollectionSpec,
2214    pub(crate) noisespec: TermCollectionSpec,
2215    pub(crate) mean_offset: Array1<f64>,
2216    pub(crate) noise_offset: Array1<f64>,
2217    pub(crate) wiggle_knots: Array1<f64>,
2218    pub(crate) wiggle_degree: usize,
2219    pub(crate) wiggle_block: ParameterBlockInput,
2220}
2221
2222impl LocationScaleFamilyBuilder for GaussianLocationScaleWiggleTermBuilder {
2223    type Family = GaussianLocationScaleWiggleFamily;
2224
2225    fn meanspec(&self) -> &TermCollectionSpec {
2226        &self.meanspec
2227    }
2228
2229    fn noisespec(&self) -> &TermCollectionSpec {
2230        &self.noisespec
2231    }
2232
2233    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2234        // Same nullspace log-sigma shrinkage penalty as the non-wiggle
2235        // Gaussian builder; see GaussianLocationScaleTermBuilder.
2236        noise_design.penalties.len() + 1
2237    }
2238
2239    fn exact_spatial_joint_supported(&self) -> bool {
2240        true
2241    }
2242
2243    fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
2244        crate::seeding::SeedRiskProfile::GaussianLocationScale
2245    }
2246
2247    fn require_exact_spatial_joint(&self) -> bool {
2248        true
2249    }
2250
2251    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
2252        initial_log_lambdas_orzeros(&self.wiggle_block)
2253    }
2254
2255    fn build_blocks(
2256        &self,
2257        theta: &Array1<f64>,
2258        mean_design: &TermCollectionDesign,
2259        noise_design: &TermCollectionDesign,
2260        mean_beta_hint: Option<Array1<f64>>,
2261        noise_beta_hint: Option<Array1<f64>>,
2262    ) -> Result<Vec<ParameterBlockSpec>, String> {
2263        let layout = GamlssLambdaLayout::withwiggle(
2264            mean_design.penalties.len(),
2265            self.noise_penalty_count(noise_design),
2266            self.wiggle_block.penalties.len(),
2267        );
2268        layout.validate_theta_len(theta.len(), "gaussian location-scale wiggle")?;
2269        let (mut meanspec, mut noisespec) = build_gaussian_mean_and_scale_blocks(
2270            &self.y,
2271            &self.weights,
2272            mean_design,
2273            noise_design,
2274            &self.mean_offset,
2275            &self.noise_offset,
2276            layout.mean_from(theta),
2277            layout.noise_from(theta),
2278            mean_beta_hint,
2279            noise_beta_hint,
2280            "GaussianLocationScaleWiggle::build_blocks",
2281        )?;
2282        // Keep the dynamic full-width wiggle basis safe from a canonical-gauge
2283        // column drop: route the shared level/intercept alias onto the
2284        // column-reducible mean and log-sigma blocks by giving them a lower
2285        // gauge priority than the wiggle block's fixed 100 (see the binomial
2286        // wiggle path and `build_location_scale_wiggle_block`).
2287        meanspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2288        noisespec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2289        let n_rows = meanspec.design.nrows();
2290        let wigglespec = build_location_scale_wiggle_block(
2291            "wiggle",
2292            self.wiggle_block.design.clone(),
2293            self.wiggle_block.offset.clone(),
2294            wiggle_block_penalty_matrices(&self.wiggle_block),
2295            self.wiggle_block.nullspace_dims.clone(),
2296            layout.wiggle_from(theta),
2297            self.wiggle_block.initial_beta.clone(),
2298            n_rows,
2299        )?;
2300        Ok(vec![meanspec, noisespec, wigglespec])
2301    }
2302
2303    fn build_family(
2304        &self,
2305        mean_design: &TermCollectionDesign,
2306        noise_design: &TermCollectionDesign,
2307    ) -> Self::Family {
2308        let preparednoise_design =
2309            prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design).expect(
2310                "prepared Gaussian log-sigma design should match wiggle block construction",
2311            );
2312        GaussianLocationScaleWiggleFamily {
2313            y: self.y.clone(),
2314            weights: self.weights.clone(),
2315            mu_design: Some(mean_design.design.clone()),
2316            log_sigma_design: Some(preparednoise_design),
2317            wiggle_knots: self.wiggle_knots.clone(),
2318            wiggle_degree: self.wiggle_degree,
2319            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2320            cached_row_scalars: std::sync::RwLock::new(None),
2321        }
2322    }
2323
2324    fn extract_primary_betas(
2325        &self,
2326        fit: &UnifiedFitResult,
2327    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2328        let mean_beta = fit
2329            .block_states
2330            .get(GaussianLocationScaleWiggleFamily::BLOCK_MU)
2331            .ok_or_else(|| "missing Gaussian wiggle mu block state".to_string())?
2332            .beta
2333            .clone();
2334        let noise_beta = fit
2335            .block_states
2336            .get(GaussianLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
2337            .ok_or_else(|| "missing Gaussian wiggle log_sigma block state".to_string())?
2338            .beta
2339            .clone();
2340        Ok((mean_beta, noise_beta))
2341    }
2342
2343    fn build_psiderivative_blocks(
2344        &self,
2345        data: ndarray::ArrayView2<'_, f64>,
2346        meanspec_resolved: &TermCollectionSpec,
2347        noisespec_resolved: &TermCollectionSpec,
2348        mean_design: &TermCollectionDesign,
2349        noise_design: &TermCollectionDesign,
2350    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2351        let mean_derivs =
2352            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?.ok_or_else(
2353                || "missing Gaussian wiggle mean spatial psi derivatives".to_string(),
2354            )?;
2355        let noise_derivs =
2356            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2357                .ok_or_else(|| {
2358                    "missing Gaussian wiggle log-sigma spatial psi derivatives".to_string()
2359                })?;
2360        Ok(vec![mean_derivs, noise_derivs, Vec::new()])
2361    }
2362}
2363
2364pub(crate) struct BinomialLocationScaleTermBuilder {
2365    pub(crate) y: Array1<f64>,
2366    pub(crate) weights: Array1<f64>,
2367    pub(crate) link_kind: InverseLink,
2368    pub(crate) meanspec: TermCollectionSpec,
2369    pub(crate) noisespec: TermCollectionSpec,
2370    pub(crate) mean_offset: Array1<f64>,
2371    pub(crate) noise_offset: Array1<f64>,
2372}
2373
2374impl LocationScaleFamilyBuilder for BinomialLocationScaleTermBuilder {
2375    type Family = BinomialLocationScaleFamily;
2376
2377    fn meanspec(&self) -> &TermCollectionSpec {
2378        &self.meanspec
2379    }
2380
2381    fn noisespec(&self) -> &TermCollectionSpec {
2382        &self.noisespec
2383    }
2384
2385    fn exact_spatial_joint_supported(&self) -> bool {
2386        true
2387    }
2388
2389    fn require_exact_spatial_joint(&self) -> bool {
2390        true
2391    }
2392
2393    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2394        noise_design.penalties.len() + 1
2395    }
2396
2397    fn build_blocks(
2398        &self,
2399        theta: &Array1<f64>,
2400        mean_design: &TermCollectionDesign,
2401        noise_design: &TermCollectionDesign,
2402        mean_beta_hint: Option<Array1<f64>>,
2403        noise_beta_hint: Option<Array1<f64>>,
2404    ) -> Result<Vec<ParameterBlockSpec>, String> {
2405        let layout = GamlssLambdaLayout::two_block(
2406            mean_design.penalties.len(),
2407            self.noise_penalty_count(noise_design),
2408        );
2409        layout.validate_theta_len(theta.len(), "binomial location-scale")?;
2410        let (thresholdspec, log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
2411            &self.y,
2412            &self.weights,
2413            &self.link_kind,
2414            mean_design,
2415            noise_design,
2416            &self.mean_offset,
2417            &self.noise_offset,
2418            layout.mean_from(theta),
2419            layout.noise_from(theta),
2420            mean_beta_hint,
2421            noise_beta_hint,
2422            "BinomialLocationScale::build_blocks",
2423        )?;
2424        Ok(vec![thresholdspec, log_sigmaspec])
2425    }
2426
2427    fn build_family(
2428        &self,
2429        mean_design: &TermCollectionDesign,
2430        noise_design: &TermCollectionDesign,
2431    ) -> Self::Family {
2432        let identifiednoise_design =
2433            identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
2434                .expect("identified binomial log-sigma design");
2435        BinomialLocationScaleFamily {
2436            y: self.y.clone(),
2437            weights: self.weights.clone(),
2438            link_kind: self.link_kind.clone(),
2439            threshold_design: Some(mean_design.design.clone()),
2440            log_sigma_design: Some(identifiednoise_design),
2441            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2442        }
2443    }
2444
2445    fn extract_primary_betas(
2446        &self,
2447        fit: &UnifiedFitResult,
2448    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2449        let mean_beta = fit
2450            .block_states
2451            .get(BinomialLocationScaleFamily::BLOCK_T)
2452            .ok_or_else(|| "missing Binomial threshold block state".to_string())?
2453            .beta
2454            .clone();
2455        let noise_beta = fit
2456            .block_states
2457            .get(BinomialLocationScaleFamily::BLOCK_LOG_SIGMA)
2458            .ok_or_else(|| "missing Binomial log_sigma block state".to_string())?
2459            .beta
2460            .clone();
2461        Ok((mean_beta, noise_beta))
2462    }
2463
2464    fn build_psiderivative_blocks(
2465        &self,
2466        data: ndarray::ArrayView2<'_, f64>,
2467        meanspec_resolved: &TermCollectionSpec,
2468        noisespec_resolved: &TermCollectionSpec,
2469        mean_design: &TermCollectionDesign,
2470        noise_design: &TermCollectionDesign,
2471    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2472        let mean_derivs =
2473            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2474                .ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
2475        let noise_derivs =
2476            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2477                .ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
2478        Ok(vec![mean_derivs, noise_derivs])
2479    }
2480}
2481
2482pub(crate) struct BinomialLocationScaleWiggleTermBuilder {
2483    pub(crate) y: Array1<f64>,
2484    pub(crate) weights: Array1<f64>,
2485    pub(crate) link_kind: InverseLink,
2486    pub(crate) meanspec: TermCollectionSpec,
2487    pub(crate) noisespec: TermCollectionSpec,
2488    pub(crate) mean_offset: Array1<f64>,
2489    pub(crate) noise_offset: Array1<f64>,
2490    pub(crate) wiggle_knots: Array1<f64>,
2491    pub(crate) wiggle_degree: usize,
2492    pub(crate) wiggle_block: ParameterBlockInput,
2493}
2494
2495impl LocationScaleFamilyBuilder for BinomialLocationScaleWiggleTermBuilder {
2496    type Family = BinomialLocationScaleWiggleFamily;
2497
2498    fn meanspec(&self) -> &TermCollectionSpec {
2499        &self.meanspec
2500    }
2501
2502    fn noisespec(&self) -> &TermCollectionSpec {
2503        &self.noisespec
2504    }
2505
2506    fn exact_spatial_joint_supported(&self) -> bool {
2507        true
2508    }
2509
2510    fn require_exact_spatial_joint(&self) -> bool {
2511        true
2512    }
2513
2514    fn extra_rho0(&self) -> Result<Array1<f64>, String> {
2515        initial_log_lambdas_orzeros(&self.wiggle_block)
2516    }
2517
2518    fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
2519        noise_design.penalties.len() + 1
2520    }
2521
2522    fn build_blocks(
2523        &self,
2524        theta: &Array1<f64>,
2525        mean_design: &TermCollectionDesign,
2526        noise_design: &TermCollectionDesign,
2527        mean_beta_hint: Option<Array1<f64>>,
2528        noise_beta_hint: Option<Array1<f64>>,
2529    ) -> Result<Vec<ParameterBlockSpec>, String> {
2530        let layout = GamlssLambdaLayout::withwiggle(
2531            mean_design.penalties.len(),
2532            self.noise_penalty_count(noise_design),
2533            self.wiggle_block.penalties.len(),
2534        );
2535        layout.validate_theta_len(theta.len(), "wiggle location-scale")?;
2536        let (mut thresholdspec, mut log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
2537            &self.y,
2538            &self.weights,
2539            &self.link_kind,
2540            mean_design,
2541            noise_design,
2542            &self.mean_offset,
2543            &self.noise_offset,
2544            layout.mean_from(theta),
2545            layout.noise_from(theta),
2546            mean_beta_hint,
2547            noise_beta_hint,
2548            "BinomialLocationScaleWiggle::build_blocks",
2549        )?;
2550        // The dynamic monotone wiggle basis is regenerated at full raw width
2551        // every inner iteration and asserts `x.ncols() == spec.design.ncols()`
2552        // in `block_geometry`, so it cannot tolerate a canonical-gauge column
2553        // drop. The level/intercept direction the I-spline shares with the
2554        // threshold block must therefore be routed onto the threshold (and the
2555        // log-sigma) block, whose static designs are column-reducible and
2556        // lifted back via the canonical per-block transform `T`. Give both
2557        // non-wiggle blocks a lower gauge priority than the wiggle block (which
2558        // `build_location_scale_wiggle_block` fixes at 100) so the shared-level
2559        // alias drop lands on them and leaves the dynamic wiggle basis full
2560        // width — mirroring the binomial mean-wiggle path.
2561        thresholdspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2562        log_sigmaspec.gauge_priority = LINK_WIGGLE_GAUGE_PRIORITY;
2563        let n_rows = thresholdspec.design.nrows();
2564        let wigglespec = build_location_scale_wiggle_block(
2565            "wiggle",
2566            self.wiggle_block.design.clone(),
2567            self.wiggle_block.offset.clone(),
2568            wiggle_block_penalty_matrices(&self.wiggle_block),
2569            vec![],
2570            layout.wiggle_from(theta),
2571            self.wiggle_block.initial_beta.clone(),
2572            n_rows,
2573        )?;
2574        Ok(vec![thresholdspec, log_sigmaspec, wigglespec])
2575    }
2576
2577    fn build_family(
2578        &self,
2579        mean_design: &TermCollectionDesign,
2580        noise_design: &TermCollectionDesign,
2581    ) -> Self::Family {
2582        let identifiednoise_design =
2583            identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
2584                .expect("identified binomial log-sigma design should match block construction");
2585        BinomialLocationScaleWiggleFamily {
2586            y: self.y.clone(),
2587            weights: self.weights.clone(),
2588            link_kind: self.link_kind.clone(),
2589            threshold_design: Some(mean_design.design.clone()),
2590            log_sigma_design: Some(identifiednoise_design),
2591            wiggle_knots: self.wiggle_knots.clone(),
2592            wiggle_degree: self.wiggle_degree,
2593            policy: gam_runtime::resource::ResourcePolicy::default_library(),
2594        }
2595    }
2596
2597    fn extract_primary_betas(
2598        &self,
2599        fit: &UnifiedFitResult,
2600    ) -> Result<(Array1<f64>, Array1<f64>), String> {
2601        let mean_beta = fit
2602            .block_states
2603            .get(BinomialLocationScaleWiggleFamily::BLOCK_T)
2604            .ok_or_else(|| "missing Binomial wiggle threshold block state".to_string())?
2605            .beta
2606            .clone();
2607        let noise_beta = fit
2608            .block_states
2609            .get(BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
2610            .ok_or_else(|| "missing Binomial wiggle log_sigma block state".to_string())?
2611            .beta
2612            .clone();
2613        Ok((mean_beta, noise_beta))
2614    }
2615
2616    fn build_psiderivative_blocks(
2617        &self,
2618        data: ndarray::ArrayView2<'_, f64>,
2619        meanspec_resolved: &TermCollectionSpec,
2620        noisespec_resolved: &TermCollectionSpec,
2621        mean_design: &TermCollectionDesign,
2622        noise_design: &TermCollectionDesign,
2623    ) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
2624        let mean_derivs =
2625            build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
2626                .ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
2627        let noise_derivs =
2628            build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
2629                .ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
2630        // The wiggle block has no direct spatial design matrix of its own in the
2631        // term builder. Spatial psi moves the wiggle family only through the
2632        // realized threshold/log-sigma designs, which in turn perturb q0 and the
2633        // realized wiggle basis B(q0). The exact joint wiggle psi hooks consume
2634        // those threshold/log-sigma derivative payloads and reconstruct the full
2635        // flattened likelihood-side [rho, psi] calculus internally, so the
2636        // wiggle block intentionally contributes no direct CustomFamilyBlockPsiDerivative
2637        // entries here.
2638        Ok(vec![mean_derivs, noise_derivs, Vec::new()])
2639    }
2640}
2641
2642pub(crate) fn fit_gaussian_location_scale_terms(
2643    data: ndarray::ArrayView2<'_, f64>,
2644    spec: GaussianLocationScaleTermSpec,
2645    options: &BlockwiseFitOptions,
2646    kappa_options: &SpatialLengthScaleOptimizationOptions,
2647) -> Result<BlockwiseTermFitResult, String> {
2648    validate_gaussian_location_scale_termspec(data, &spec, "fit_gaussian_location_scale_terms")?;
2649    fit_location_scale_terms(
2650        data,
2651        GaussianLocationScaleTermBuilder {
2652            y: spec.y,
2653            weights: spec.weights,
2654            meanspec: spec.meanspec,
2655            noisespec: spec.log_sigmaspec,
2656            mean_offset: spec.mean_offset,
2657            noise_offset: spec.log_sigma_offset,
2658        },
2659        options,
2660        kappa_options,
2661    )
2662}
2663
2664pub(crate) fn fit_gaussian_location_scalewiggle_terms(
2665    data: ndarray::ArrayView2<'_, f64>,
2666    spec: GaussianLocationScaleWiggleTermSpec,
2667    options: &BlockwiseFitOptions,
2668    kappa_options: &SpatialLengthScaleOptimizationOptions,
2669) -> Result<BlockwiseTermFitResult, String> {
2670    validate_gaussian_location_scalewiggle_termspec(
2671        data,
2672        &spec,
2673        "fit_gaussian_location_scalewiggle_terms",
2674    )?;
2675    fit_location_scale_terms(
2676        data,
2677        GaussianLocationScaleWiggleTermBuilder {
2678            y: spec.y,
2679            weights: spec.weights,
2680            meanspec: spec.meanspec,
2681            noisespec: spec.log_sigmaspec,
2682            mean_offset: spec.mean_offset,
2683            noise_offset: spec.log_sigma_offset,
2684            wiggle_knots: spec.wiggle_knots,
2685            wiggle_degree: spec.wiggle_degree,
2686            wiggle_block: spec.wiggle_block,
2687        },
2688        options,
2689        kappa_options,
2690    )
2691}
2692
2693pub(crate) fn select_gaussian_location_scale_link_wiggle_basis_from_pilot(
2694    pilot: &BlockwiseTermFitResult,
2695    wiggle_cfg: &WiggleBlockConfig,
2696    wiggle_penalty_orders: &[usize],
2697) -> Result<SelectedWiggleBasis, String> {
2698    let q_seed = pilot
2699        .fit
2700        .block_states
2701        .first()
2702        .ok_or_else(|| "pilot Gaussian wiggle fit is missing mean block".to_string())?
2703        .eta
2704        .view();
2705    select_wiggle_basis_from_seed(q_seed, wiggle_cfg, wiggle_penalty_orders)
2706}
2707
2708pub(crate) fn fit_gaussian_location_scale_terms_with_selected_wiggle(
2709    data: ndarray::ArrayView2<'_, f64>,
2710    spec: GaussianLocationScaleTermSpec,
2711    selected_wiggle_basis: SelectedWiggleBasis,
2712    options: &BlockwiseFitOptions,
2713    kappa_options: &SpatialLengthScaleOptimizationOptions,
2714) -> Result<BlockwiseTermWiggleFitResult, String> {
2715    let SelectedWiggleBasis {
2716        knots: wiggle_knots,
2717        degree: wiggle_degree,
2718        block: wiggle_block,
2719        ..
2720    } = selected_wiggle_basis;
2721    let solved = fit_gaussian_location_scalewiggle_terms(
2722        data,
2723        GaussianLocationScaleWiggleTermSpec {
2724            y: spec.y,
2725            weights: spec.weights,
2726            meanspec: spec.meanspec,
2727            log_sigmaspec: spec.log_sigmaspec,
2728            mean_offset: spec.mean_offset,
2729            log_sigma_offset: spec.log_sigma_offset,
2730            wiggle_knots: wiggle_knots.clone(),
2731            wiggle_degree,
2732            wiggle_block,
2733        },
2734        options,
2735        kappa_options,
2736    )?;
2737
2738    BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
2739        fit: solved,
2740        wiggle_knots,
2741        wiggle_degree,
2742    })
2743}
2744
2745pub(crate) fn fit_binomial_location_scale_terms(
2746    data: ndarray::ArrayView2<'_, f64>,
2747    spec: BinomialLocationScaleTermSpec,
2748    options: &BlockwiseFitOptions,
2749    kappa_options: &SpatialLengthScaleOptimizationOptions,
2750) -> Result<BlockwiseTermFitResult, String> {
2751    validate_binomial_location_scale_termspec(data, &spec, "fit_binomial_location_scale_terms")?;
2752    fit_location_scale_terms(
2753        data,
2754        BinomialLocationScaleTermBuilder {
2755            y: spec.y,
2756            weights: spec.weights,
2757            link_kind: spec.link_kind,
2758            meanspec: spec.thresholdspec,
2759            noisespec: spec.log_sigmaspec,
2760            mean_offset: spec.threshold_offset,
2761            noise_offset: spec.log_sigma_offset,
2762        },
2763        options,
2764        kappa_options,
2765    )
2766}
2767
2768pub(crate) fn fit_binomial_location_scalewiggle_terms(
2769    data: ndarray::ArrayView2<'_, f64>,
2770    spec: BinomialLocationScaleWiggleTermSpec,
2771    options: &BlockwiseFitOptions,
2772    kappa_options: &SpatialLengthScaleOptimizationOptions,
2773) -> Result<BlockwiseTermFitResult, String> {
2774    validate_binomial_location_scalewiggle_termspec(
2775        data,
2776        &spec,
2777        "fit_binomial_location_scalewiggle_terms",
2778    )?;
2779    fit_location_scale_terms(
2780        data,
2781        BinomialLocationScaleWiggleTermBuilder {
2782            y: spec.y,
2783            weights: spec.weights,
2784            link_kind: spec.link_kind,
2785            meanspec: spec.thresholdspec,
2786            noisespec: spec.log_sigmaspec,
2787            mean_offset: spec.threshold_offset,
2788            noise_offset: spec.log_sigma_offset,
2789            wiggle_knots: spec.wiggle_knots,
2790            wiggle_degree: spec.wiggle_degree,
2791            wiggle_block: spec.wiggle_block,
2792        },
2793        options,
2794        kappa_options,
2795    )
2796}
2797
2798pub(crate) fn select_binomial_location_scale_link_wiggle_basis_from_pilot(
2799    pilot: &BlockwiseTermFitResult,
2800    wiggle_cfg: &WiggleBlockConfig,
2801    wiggle_penalty_orders: &[usize],
2802) -> Result<SelectedWiggleBasis, String> {
2803    let eta_t = pilot
2804        .fit
2805        .block_states
2806        .first()
2807        .ok_or_else(|| "pilot fit is missing threshold block".to_string())?
2808        .eta
2809        .view();
2810    let eta_ls = pilot
2811        .fit
2812        .block_states
2813        .get(1)
2814        .ok_or_else(|| "pilot fit is missing log_sigma block".to_string())?
2815        .eta
2816        .view();
2817    let sigma = eta_ls.mapv(safe_exp);
2818    let q_seed = Array1::from_iter(eta_t.iter().zip(sigma.iter()).map(|(&t, &s)| -t / s));
2819    select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
2820}
2821
2822pub(crate) fn fit_binomial_location_scale_terms_with_selected_wiggle(
2823    data: ndarray::ArrayView2<'_, f64>,
2824    spec: BinomialLocationScaleTermSpec,
2825    selected_wiggle_basis: SelectedWiggleBasis,
2826    options: &BlockwiseFitOptions,
2827    kappa_options: &SpatialLengthScaleOptimizationOptions,
2828) -> Result<BlockwiseTermWiggleFitResult, String> {
2829    let SelectedWiggleBasis {
2830        knots: wiggle_knots,
2831        degree: wiggle_degree,
2832        block: wiggle_block,
2833        ..
2834    } = selected_wiggle_basis;
2835    let solved = fit_binomial_location_scalewiggle_terms(
2836        data,
2837        BinomialLocationScaleWiggleTermSpec {
2838            y: spec.y,
2839            weights: spec.weights,
2840            link_kind: spec.link_kind,
2841            thresholdspec: spec.thresholdspec,
2842            log_sigmaspec: spec.log_sigmaspec,
2843            threshold_offset: spec.threshold_offset,
2844            log_sigma_offset: spec.log_sigma_offset,
2845            wiggle_knots: wiggle_knots.clone(),
2846            wiggle_degree,
2847            wiggle_block,
2848        },
2849        options,
2850        kappa_options,
2851    )?;
2852
2853    BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
2854        fit: solved,
2855        wiggle_knots,
2856        wiggle_degree,
2857    })
2858}
2859
2860pub(crate) fn select_binomial_mean_link_wiggle_basis_from_pilot(
2861    pilot_design: &TermCollectionDesign,
2862    pilot_fit: &UnifiedFitResult,
2863    wiggle_cfg: &WiggleBlockConfig,
2864    wiggle_penalty_orders: &[usize],
2865) -> Result<SelectedWiggleBasis, String> {
2866    let q_seed = pilot_design.design.dot(&pilot_fit.beta);
2867    select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
2868}
2869
2870pub(crate) fn fit_binomial_mean_wiggle_terms_with_selected_basis(
2871    data: ndarray::ArrayView2<'_, f64>,
2872    pilot_spec: &TermCollectionSpec,
2873    pilot_design: &TermCollectionDesign,
2874    pilot_fit: &UnifiedFitResult,
2875    y: &Array1<f64>,
2876    weights: &Array1<f64>,
2877    link_kind: InverseLink,
2878    selected_wiggle_basis: SelectedWiggleBasis,
2879    options: &BlockwiseFitOptions,
2880    kappa_options: &SpatialLengthScaleOptimizationOptions,
2881) -> Result<BinomialMeanWiggleTermFitResult, String> {
2882    const RHO_BOUND: f64 = 12.0;
2883
2884    validate_term_weights(
2885        data,
2886        y.len(),
2887        weights,
2888        "fit_binomial_mean_wiggle_terms_with_selected_basis",
2889    )?;
2890    validate_binomial_response(y, "fit_binomial_mean_wiggle_terms_with_selected_basis")?;
2891
2892    // Large-n binomial mean-wiggle fits keep the caller's explicit Hessian
2893    // request. The unified evaluator chooses the scalable exact representation
2894    // (dense for small work, operator HVP for large work) instead of routing to
2895    // gradient-only BFGS by observation count.
2896
2897    let SelectedWiggleBasis {
2898        knots: wiggle_knots,
2899        degree: wiggle_degree,
2900        block: wiggle_block,
2901        ..
2902    } = selected_wiggle_basis;
2903
2904    let spatial_terms = spatial_length_scale_term_indices(pilot_spec);
2905    if spatial_terms.is_empty() {
2906        let (fit, saved_warp_beta) = fit_binomial_mean_wiggle(
2907            BinomialMeanWiggleSpec {
2908                y: y.clone(),
2909                weights: weights.clone(),
2910                link_kind,
2911                wiggle_knots: wiggle_knots.clone(),
2912                wiggle_degree,
2913                eta_block: ParameterBlockInput {
2914                    design: pilot_design.design.clone(),
2915                    offset: Array1::zeros(y.len()),
2916                    penalties: pilot_design
2917                        .penalties
2918                        .iter()
2919                        .map(crate::model_types::PenaltySpec::from_blockwise_ref)
2920                        .collect(),
2921                    nullspace_dims: vec![],
2922                    initial_log_lambdas: Some(
2923                        pilot_fit
2924                            .lambdas
2925                            .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
2926                    ),
2927                    initial_beta: Some(pilot_fit.beta.clone()),
2928                },
2929                wiggle_block,
2930            },
2931            options,
2932        )?;
2933        return Ok(BinomialMeanWiggleTermFitResult {
2934            fit,
2935            resolvedspec: pilot_spec.clone(),
2936            design: pilot_design.clone(),
2937            wiggle_knots,
2938            wiggle_degree,
2939            saved_warp_beta,
2940        });
2941    }
2942
2943    let dims_per_term = spatial_dims_per_term(pilot_spec, &spatial_terms);
2944    let log_kappa0 =
2945        SpatialLogKappaCoords::from_length_scales_aniso(pilot_spec, &spatial_terms, kappa_options)
2946            .reseed_from_data(data, pilot_spec, &spatial_terms, kappa_options);
2947    let log_kappa_lower = SpatialLogKappaCoords::lower_bounds_aniso_from_data(
2948        data,
2949        pilot_spec,
2950        &spatial_terms,
2951        &dims_per_term,
2952        kappa_options,
2953    );
2954    let log_kappa_upper = SpatialLogKappaCoords::upper_bounds_aniso_from_data(
2955        data,
2956        pilot_spec,
2957        &spatial_terms,
2958        &dims_per_term,
2959        kappa_options,
2960    );
2961    // Project seed onto bounds; spec.length_scale is a hint, not a constraint.
2962    let log_kappa0 = log_kappa0.clamp_to_bounds(&log_kappa_lower, &log_kappa_upper);
2963
2964    let eta_penalty_count = pilot_design.penalties.len();
2965    let wiggle_penalty_count = initial_log_lambdas_orzeros(&wiggle_block)?.len();
2966    let rho_dim = eta_penalty_count + wiggle_penalty_count;
2967    let baseline_resolvedspec = log_kappa0
2968        .apply_tospec(pilot_spec, &spatial_terms)
2969        .map_err(|e| e.to_string())?;
2970    let baseline_design =
2971        build_term_collection_design(data, &baseline_resolvedspec).map_err(|e| e.to_string())?;
2972    let baseline_fit = fit_binomial_mean_wiggle(
2973        BinomialMeanWiggleSpec {
2974            y: y.clone(),
2975            weights: weights.clone(),
2976            link_kind: link_kind.clone(),
2977            wiggle_knots: wiggle_knots.clone(),
2978            wiggle_degree,
2979            eta_block: ParameterBlockInput {
2980                design: baseline_design.design.clone(),
2981                offset: Array1::zeros(y.len()),
2982                penalties: baseline_design
2983                    .penalties
2984                    .iter()
2985                    .map(crate::model_types::PenaltySpec::from_blockwise_ref)
2986                    .collect(),
2987                nullspace_dims: vec![],
2988                initial_log_lambdas: Some(
2989                    pilot_fit
2990                        .lambdas
2991                        .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
2992                ),
2993                initial_beta: Some(pilot_fit.beta.clone()),
2994            },
2995            wiggle_block: wiggle_block.clone(),
2996        },
2997        options,
2998    )?
2999    .0;
3000    let baseline_log_lambdas = baseline_fit
3001        .lambdas
3002        .mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln());
3003    if baseline_log_lambdas.len() != rho_dim {
3004        return Err(GamlssError::DimensionMismatch {
3005            reason: format!(
3006                "baseline binomial mean-wiggle fit returned {} log-lambdas, expected {rho_dim}",
3007                baseline_log_lambdas.len()
3008            ),
3009        }
3010        .into());
3011    }
3012    let baseline_eta_beta = baseline_fit
3013        .block_states
3014        .get(BinomialMeanWiggleFamily::BLOCK_ETA)
3015        .ok_or_else(|| "baseline binomial mean-wiggle fit missing eta block".to_string())?
3016        .beta
3017        .clone();
3018    let baseline_wiggle_beta = Some(
3019        baseline_fit
3020            .block_states
3021            .get(BinomialMeanWiggleFamily::BLOCK_WIGGLE)
3022            .ok_or_else(|| "baseline binomial mean-wiggle fit missing wiggle block".to_string())?
3023            .beta
3024            .clone(),
3025    );
3026    let theta_dim = rho_dim + log_kappa0.len();
3027    let mut theta0 = Array1::<f64>::zeros(theta_dim);
3028    theta0
3029        .slice_mut(s![0..rho_dim])
3030        .assign(&baseline_log_lambdas);
3031    theta0
3032        .slice_mut(s![rho_dim..theta_dim])
3033        .assign(log_kappa0.as_array());
3034
3035    let mut lower = Array1::<f64>::from_elem(theta_dim, -RHO_BOUND);
3036    let mut upper = Array1::<f64>::from_elem(theta_dim, RHO_BOUND);
3037    lower
3038        .slice_mut(s![rho_dim..theta_dim])
3039        .assign(log_kappa_lower.as_array());
3040    upper
3041        .slice_mut(s![rho_dim..theta_dim])
3042        .assign(log_kappa_upper.as_array());
3043
3044    let pilot_spec_cloned = pilot_spec.clone();
3045    let pilot_beta = baseline_eta_beta;
3046    let wiggle_design = wiggle_block.design.clone();
3047    let wiggle_offset = wiggle_block.offset.clone();
3048    let wiggle_penalties = wiggle_block.penalties.clone();
3049    let wiggle_initial_beta = baseline_wiggle_beta;
3050    let wiggle_knots_cloned = wiggle_knots.clone();
3051    let y_cloned = y.clone();
3052    let weights_cloned = weights.clone();
3053    let link_kind_cloned = link_kind.clone();
3054    let outer_family = BinomialMeanWiggleFamily {
3055        y: y_cloned.clone(),
3056        weights: weights_cloned.clone(),
3057        link_kind: link_kind_cloned.clone(),
3058        wiggle_knots: wiggle_knots_cloned.clone(),
3059        wiggle_degree,
3060        policy: gam_runtime::resource::ResourcePolicy::default_library(),
3061        // The spatial joint-κ path keeps the dynamic warp basis (#1596 frozen
3062        // basis applies to the non-spatial `fit_binomial_mean_wiggle` loop).
3063        frozen_warp_design: None,
3064    };
3065    let screening_cap = Arc::new(AtomicUsize::new(0));
3066    let mut outer_options = options.clone();
3067    outer_options.screening_max_inner_iterations = Some(Arc::clone(&screening_cap));
3068    struct MeanWiggleOuterState {
3069        pub(crate) warm_cache: Option<crate::custom_family::CustomFamilyWarmStart>,
3070        pub(crate) last_eval: Option<(
3071            Array1<f64>,
3072            f64,
3073            Array1<f64>,
3074            gam_problem::HessianResult,
3075            crate::custom_family::CustomFamilyWarmStart,
3076        )>,
3077    }
3078
3079    let build_realized_blocks = |theta: &Array1<f64>| -> Result<
3080        (
3081            TermCollectionSpec,
3082            TermCollectionDesign,
3083            Vec<ParameterBlockSpec>,
3084            Vec<CustomFamilyBlockPsiDerivative>,
3085        ),
3086        String,
3087    > {
3088        let log_kappa =
3089            SpatialLogKappaCoords::from_theta_tail_with_dims(theta, rho_dim, dims_per_term.clone());
3090        let resolvedspec = log_kappa
3091            .apply_tospec(&pilot_spec_cloned, &spatial_terms)
3092            .map_err(|e| e.to_string())?;
3093        let design =
3094            build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
3095        let eta_derivs = build_block_spatial_psi_derivatives(data, &resolvedspec, &design)?
3096            .ok_or_else(|| {
3097                "missing eta spatial psi derivatives for binomial mean wiggle".to_string()
3098            })?;
3099        let blocks = vec![
3100            ParameterBlockSpec {
3101                name: "eta".to_string(),
3102                design: design.design.clone(),
3103                offset: Array1::zeros(y_cloned.len()),
3104                penalties: design.penalties_as_penalty_matrix(),
3105                nullspace_dims: vec![],
3106                initial_log_lambdas: theta.slice(s![0..eta_penalty_count]).to_owned(),
3107                initial_beta: Some(pilot_beta.clone()),
3108                // Lower gauge priority on the static eta design: it yields the
3109                // shared level/intercept direction to the dynamic full-width
3110                // wiggle I-spline block (see fit_binomial_mean_wiggle).
3111                gauge_priority: LINK_WIGGLE_GAUGE_PRIORITY,
3112                jacobian_callback: None,
3113                stacked_design: None,
3114                stacked_offset: None,
3115            },
3116            ParameterBlockSpec {
3117                name: "wiggle".to_string(),
3118                design: wiggle_design.clone(),
3119                offset: wiggle_offset.clone(),
3120                penalties: {
3121                    let p_wiggle = wiggle_design.ncols();
3122                    wiggle_penalties
3123                        .iter()
3124                        .map(|spec| match spec {
3125                            crate::model_types::PenaltySpec::Block {
3126                                local, col_range, ..
3127                            } => PenaltyMatrix::Blockwise {
3128                                local: local.clone(),
3129                                col_range: col_range.clone(),
3130                                total_dim: p_wiggle,
3131                            },
3132                            crate::model_types::PenaltySpec::Dense(m)
3133                            | crate::model_types::PenaltySpec::DenseWithMean {
3134                                matrix: m, ..
3135                            } => PenaltyMatrix::Dense(m.clone()),
3136                        })
3137                        .collect()
3138                },
3139                nullspace_dims: vec![],
3140                initial_log_lambdas: theta.slice(s![eta_penalty_count..rho_dim]).to_owned(),
3141                initial_beta: wiggle_initial_beta.clone(),
3142                gauge_priority: DEFAULT_GAUGE_PRIORITY,
3143                jacobian_callback: None,
3144                stacked_design: None,
3145                stacked_offset: None,
3146            },
3147        ];
3148        Ok((resolvedspec, design, blocks, eta_derivs))
3149    };
3150
3151    let build_eval = |theta: &Array1<f64>,
3152                      warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>,
3153                      need_hessian: bool|
3154     -> Result<
3155        (
3156            crate::custom_family::CustomFamilyJointHyperResult,
3157            TermCollectionSpec,
3158            TermCollectionDesign,
3159        ),
3160        String,
3161    > {
3162        let (resolvedspec, design, blocks, eta_derivs) = build_realized_blocks(theta)?;
3163        let eval = evaluate_custom_family_joint_hyper(
3164            &outer_family,
3165            &blocks,
3166            &outer_options,
3167            &theta.slice(s![0..rho_dim]).to_owned(),
3168            &[eta_derivs, Vec::new()],
3169            warm_cache,
3170            if need_hessian {
3171                gam_problem::EvalMode::ValueGradientHessian
3172            } else {
3173                gam_problem::EvalMode::ValueAndGradient
3174            },
3175        )?;
3176        Ok((eval, resolvedspec, design))
3177    };
3178
3179    let build_efs = |theta: &Array1<f64>,
3180                     warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>|
3181     -> Result<crate::custom_family::CustomFamilyJointHyperEfsResult, String> {
3182        let (_, _, blocks, eta_derivs) = build_realized_blocks(theta)?;
3183        evaluate_custom_family_joint_hyper_efs(
3184            &outer_family,
3185            &blocks,
3186            &outer_options,
3187            &theta.slice(s![0..rho_dim]).to_owned(),
3188            &[eta_derivs, Vec::new()],
3189            warm_cache,
3190        )
3191        .map_err(|e| e.to_string())
3192    };
3193
3194    use crate::model_types::EstimationError;
3195    use gam_solve::rho_optimizer::OuterEvalOrder;
3196    use gam_problem::{DeclaredHessianForm, Derivative, OuterEval};
3197
3198    // Exact first-order AND second-order [rho, psi] calculus is available
3199    // for all inverse links via the shared jet formulas plus the generic
3200    // exact-Newton D_βH / D²_βH closures routed through
3201    // evaluate_custom_family_joint_hyper -> joint_outer_evaluate ->
3202    // BorrowedJointDerivProvider. This enables the analytic-Hessian outer
3203    // plan for REML optimization instead of the downgraded gradient-only
3204    // outer strategies.
3205    //
3206    // Spatial log-kappa coordinates are ψ (design-moving) dimensions because
3207    // they rebuild the spatial basis and penalties at each outer proposal.
3208    let analytic_outer_hessian_available = true;
3209    let mut seed_heuristic = theta0.to_vec();
3210    for value in &mut seed_heuristic[..rho_dim] {
3211        *value = value.exp();
3212    }
3213    let problem = gam_solve::rho_optimizer::OuterProblem::new(theta_dim)
3214        .with_gradient(Derivative::Analytic)
3215        .with_hessian(if analytic_outer_hessian_available {
3216            DeclaredHessianForm::Either
3217        } else {
3218            DeclaredHessianForm::Unavailable
3219        })
3220        .with_psi_dim(theta_dim - rho_dim)
3221        .with_tolerance(options.outer_tol)
3222        .with_max_iter(options.outer_max_iter)
3223        .with_bounds(lower.clone(), upper.clone())
3224        .with_initial_rho(theta0.clone())
3225        .with_seed_config(crate::seeding::SeedConfig {
3226            max_seeds: 4,
3227            seed_budget: 2,
3228            risk_profile: crate::seeding::SeedRiskProfile::GeneralizedLinear,
3229            num_auxiliary_trailing: theta_dim - rho_dim,
3230            ..Default::default()
3231        })
3232        .with_screening_cap(Arc::clone(&screening_cap))
3233        .with_rho_bound(12.0)
3234        .with_heuristic_lambdas(seed_heuristic);
3235
3236    let eval_outer = |state: &mut MeanWiggleOuterState,
3237                      theta: &Array1<f64>,
3238                      order: OuterEvalOrder|
3239     -> Result<OuterEval, EstimationError> {
3240        if let Some((cached_theta, cached_cost, cached_grad, cached_hess, cached_warm)) =
3241            &state.last_eval
3242            && cached_theta == theta
3243            && (!matches!(order, OuterEvalOrder::ValueGradientHessian)
3244                || matches!(
3245                    cached_hess,
3246                    gam_problem::HessianResult::Analytic(_)
3247                        | gam_problem::HessianResult::Operator(_)
3248                ))
3249        {
3250            state.warm_cache = Some(cached_warm.clone());
3251            return Ok(OuterEval {
3252                cost: *cached_cost,
3253                gradient: cached_grad.clone(),
3254                hessian: cached_hess.clone(),
3255                inner_beta_hint: None,
3256            });
3257        }
3258        let need_hessian = matches!(order, OuterEvalOrder::ValueGradientHessian)
3259            && analytic_outer_hessian_available;
3260        let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), need_hessian)
3261            .map_err(EstimationError::InvalidInput)?;
3262        if !eval.inner_converged {
3263            state.warm_cache = Some(eval.warm_start);
3264            crate::bail_invalid_estim!(
3265                "binomial mean-wiggle exact spatial inner solve did not converge"
3266            );
3267        }
3268        let hessian_result = eval.outer_hessian.clone();
3269        state.last_eval = Some((
3270            theta.clone(),
3271            eval.objective,
3272            eval.gradient.clone(),
3273            eval.outer_hessian.clone(),
3274            eval.warm_start.clone(),
3275        ));
3276        state.warm_cache = Some(eval.warm_start);
3277        Ok(OuterEval {
3278            cost: eval.objective,
3279            gradient: eval.gradient,
3280            hessian: hessian_result,
3281            inner_beta_hint: None,
3282        })
3283    };
3284
3285    let mut obj = problem.build_objective_with_screening_proxy(
3286        MeanWiggleOuterState {
3287            warm_cache: None,
3288            last_eval: None,
3289        },
3290        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3291            if let Some((cached_theta, cached_cost, _, _, cached_warm)) = &state.last_eval
3292                && cached_theta == theta
3293            {
3294                state.warm_cache = Some(cached_warm.clone());
3295                return Ok(*cached_cost);
3296            }
3297            let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), false)
3298                .map_err(EstimationError::InvalidInput)?;
3299            if !eval.inner_converged {
3300                state.warm_cache = Some(eval.warm_start);
3301                crate::bail_invalid_estim!(
3302                    "binomial mean-wiggle exact spatial cost inner solve did not converge"
3303                        .to_string(),
3304                );
3305            }
3306            state.warm_cache = Some(eval.warm_start);
3307            Ok(eval.objective)
3308        },
3309        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3310            eval_outer(
3311                state,
3312                theta,
3313                if analytic_outer_hessian_available {
3314                    OuterEvalOrder::ValueGradientHessian
3315                } else {
3316                    OuterEvalOrder::ValueAndGradient
3317                },
3318            )
3319        },
3320        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>, order: OuterEvalOrder| {
3321            eval_outer(state, theta, order)
3322        },
3323        Some(|state: &mut MeanWiggleOuterState| {
3324            state.warm_cache = None;
3325            state.last_eval = None;
3326        }),
3327        Some(|state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3328            let eval = build_efs(theta, state.warm_cache.as_ref())
3329                .map_err(EstimationError::InvalidInput)?;
3330            if !eval.inner_converged {
3331                state.warm_cache = Some(eval.warm_start);
3332                crate::bail_invalid_estim!(
3333                    "binomial mean-wiggle exact spatial EFS inner solve did not converge"
3334                        .to_string(),
3335                );
3336            }
3337            state.warm_cache = Some(eval.warm_start);
3338            Ok(eval.efs_eval)
3339        }),
3340        // Seed-screening ranking proxy (#969). The cost closure above
3341        // hard-errors on a non-converged inner solve — correct for
3342        // line-search costs, but under the screening cap (wired into the
3343        // outer options and installed by the cascade) the inner solve is
3344        // truncated BY DESIGN, so screening through it rejects every seed
3345        // — the all-seeds-rejected front-door genus. Screening only RANKS
3346        // candidates: the truncated solve's penalized objective is the
3347        // ranking signal; convergence is demanded of the selected seed's
3348        // full-budget fit, not of capped probes.
3349        |state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
3350            if let Some((cached_theta, cached_cost, _, _, cached_warm)) = &state.last_eval
3351                && cached_theta == theta
3352            {
3353                state.warm_cache = Some(cached_warm.clone());
3354                return Ok(*cached_cost);
3355            }
3356            let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), false)
3357                .map_err(EstimationError::InvalidInput)?;
3358            state.warm_cache = Some(eval.warm_start);
3359            Ok(eval.objective)
3360        },
3361    );
3362
3363    let outer = problem
3364        .run(&mut obj, "binomial mean wiggle exact spatial hyper")
3365        .map_err(|e| e.to_string())?;
3366    if !outer.converged {
3367        return Err(GamlssError::NumericalFailure { reason: format!(
3368            "binomial mean wiggle exact spatial hyper did not converge after {} iterations (final_objective={:.6e}, final_grad_norm={})",
3369            outer.iterations,
3370            outer.final_value,
3371            outer.final_grad_norm_report(),
3372        ) }.into());
3373    }
3374    let theta_star = outer.rho;
3375
3376    let log_kappa =
3377        SpatialLogKappaCoords::from_theta_tail_with_dims(&theta_star, rho_dim, dims_per_term);
3378    let resolvedspec = log_kappa
3379        .apply_tospec(&pilot_spec_cloned, &spatial_terms)
3380        .map_err(|e| e.to_string())?;
3381    let design = build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
3382    let resolvedspec =
3383        freeze_term_collection_from_design(&resolvedspec, &design).map_err(|e| e.to_string())?;
3384    let fit = fit_binomial_mean_wiggle(
3385        BinomialMeanWiggleSpec {
3386            y: y_cloned,
3387            weights: weights_cloned,
3388            link_kind: link_kind_cloned,
3389            wiggle_knots: wiggle_knots.clone(),
3390            wiggle_degree,
3391            eta_block: ParameterBlockInput {
3392                design: design.design.clone(),
3393                offset: Array1::zeros(y.len()),
3394                penalties: design
3395                    .penalties
3396                    .iter()
3397                    .map(crate::model_types::PenaltySpec::from_blockwise_ref)
3398                    .collect(),
3399                nullspace_dims: vec![],
3400                initial_log_lambdas: Some(theta_star.slice(s![0..eta_penalty_count]).to_owned()),
3401                initial_beta: Some(pilot_beta),
3402            },
3403            wiggle_block: ParameterBlockInput {
3404                design: wiggle_design,
3405                offset: wiggle_offset,
3406                penalties: wiggle_penalties,
3407                nullspace_dims: vec![],
3408                initial_log_lambdas: Some(
3409                    theta_star.slice(s![eta_penalty_count..rho_dim]).to_owned(),
3410                ),
3411                initial_beta: wiggle_initial_beta,
3412            },
3413        },
3414        options,
3415    )?;
3416    let (fit, saved_warp_beta) = fit;
3417
3418    Ok(BinomialMeanWiggleTermFitResult {
3419        fit,
3420        resolvedspec,
3421        design,
3422        wiggle_knots,
3423        wiggle_degree,
3424        saved_warp_beta,
3425    })
3426}