Skip to main content

gam_models/gamlss/gaussian/
wiggle.rs

1// Real concern-organized submodule of the gamlss family stack.
2// Cross-module items are re-exported flat through the parent (`gamlss.rs`),
3// so `use super::*;` makes the sibling-concern symbols this module references
4// resolve through the parent namespace.
5use super::*;
6
7pub(crate) struct GaussianLocationScaleWiggleGeometry {
8    pub(crate) basis: Array2<f64>,
9    pub(crate) basis_d1: Array2<f64>,
10    pub(crate) basis_d2: Array2<f64>,
11    pub(crate) basis_d3: Array2<f64>,
12    pub(crate) dq_dq0: Array1<f64>,
13    pub(crate) d2q_dq02: Array1<f64>,
14    pub(crate) d3q_dq03: Array1<f64>,
15    pub(crate) d4q_dq04: Array1<f64>,
16}
17
18/// Per-row pieces of the 3-block Gaussian location-scale-wiggle joint
19/// Hessian. Both the dense path and the matrix-free workspace share these
20/// row coefficients; only the assembly differs.
21pub(crate) struct GaussianLocationScaleWiggleHessianRowPieces {
22    pub(crate) coeff_mm: Array1<f64>,
23    pub(crate) coeff_ml: Array1<f64>,
24    pub(crate) coeff_ll: Array1<f64>,
25    pub(crate) coeff_mw_b: Array1<f64>,
26    pub(crate) coeff_mw_d: Array1<f64>,
27    pub(crate) coeff_lw_b: Array1<f64>,
28    pub(crate) coeff_ww: Array1<f64>,
29    pub(crate) basis: Array2<f64>,
30    pub(crate) basis_d1: Array2<f64>,
31}
32
33impl GaussianLocationScaleWiggleHessianRowPieces {
34    pub(crate) fn assemble_dense(
35        &self,
36        xmu: &Array2<f64>,
37        x_ls: &Array2<f64>,
38    ) -> Result<Array2<f64>, String> {
39        let h_mm = xt_diag_x_dense(xmu, &self.coeff_mm)?;
40        let h_ml = xt_diag_y_dense(xmu, &self.coeff_ml, x_ls)?;
41        let h_ll = xt_diag_x_dense(x_ls, &self.coeff_ll)?;
42        let h_mw = xt_diag_y_dense(xmu, &self.coeff_mw_b, &self.basis)?
43            + &xt_diag_y_dense(xmu, &self.coeff_mw_d, &self.basis_d1)?;
44        let h_lw = xt_diag_y_dense(x_ls, &self.coeff_lw_b, &self.basis)?;
45        let h_ww = xt_diag_x_dense(&self.basis, &self.coeff_ww)?;
46        Ok(gaussian_pack_wiggle_joint_symmetrichessian(
47            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
48        ))
49    }
50}
51
52pub struct GaussianLocationScaleWiggleFamily {
53    pub y: Array1<f64>,
54    pub weights: Array1<f64>,
55    pub mu_design: Option<DesignMatrix>,
56    pub log_sigma_design: Option<DesignMatrix>,
57    pub wiggle_knots: Array1<f64>,
58    pub wiggle_degree: usize,
59    /// Resource policy threaded into PsiDesignMap construction (and any other
60    /// per-call materialization decision) made during exact-Newton joint psi
61    /// derivative evaluation. Defaults to `ResourcePolicy::default_library()`
62    /// when the family is built without an explicit policy.
63    pub policy: gam_runtime::resource::ResourcePolicy,
64    pub(crate) cached_row_scalars:
65        std::sync::RwLock<Option<(f64, f64, f64, f64, f64, f64, Arc<GaussianJointRowScalars>)>>,
66}
67
68impl Clone for GaussianLocationScaleWiggleFamily {
69    fn clone(&self) -> Self {
70        Self {
71            y: self.y.clone(),
72            weights: self.weights.clone(),
73            mu_design: self.mu_design.clone(),
74            log_sigma_design: self.log_sigma_design.clone(),
75            wiggle_knots: self.wiggle_knots.clone(),
76            wiggle_degree: self.wiggle_degree,
77            policy: self.policy.clone(),
78            cached_row_scalars: std::sync::RwLock::new(
79                self.cached_row_scalars
80                    .read()
81                    .expect("lock poisoned")
82                    .clone(),
83            ),
84        }
85    }
86}
87
88impl GaussianLocationScaleWiggleFamily {
89    pub const BLOCK_MU: usize = 0;
90    pub const BLOCK_LOG_SIGMA: usize = 1;
91    pub const BLOCK_WIGGLE: usize = 2;
92
93    pub fn parameternames() -> &'static [&'static str] {
94        &["mu", "log_sigma", "wiggle"]
95    }
96
97    pub fn parameter_links() -> &'static [ParameterLink] {
98        &[
99            ParameterLink::Identity,
100            ParameterLink::Log,
101            ParameterLink::Wiggle,
102        ]
103    }
104
105    pub fn metadata() -> FamilyMetadata {
106        FamilyMetadata {
107            name: "gaussian_location_scalewiggle",
108            parameternames: Self::parameternames(),
109            parameter_links: Self::parameter_links(),
110        }
111    }
112
113    pub(crate) fn exact_joint_supported(&self) -> bool {
114        self.mu_design.is_some() && self.log_sigma_design.is_some()
115    }
116
117    pub(crate) fn wiggle_basiswith_options(
118        &self,
119        q0: ArrayView1<'_, f64>,
120        options: BasisOptions,
121    ) -> Result<Array2<f64>, String> {
122        monotone_wiggle_basis_with_derivative_order(
123            q0,
124            &self.wiggle_knots,
125            self.wiggle_degree,
126            options.derivative_order,
127        )
128    }
129
130    pub(crate) fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
131        self.wiggle_basiswith_options(q0, BasisOptions::value())
132    }
133
134    pub(crate) fn wiggle_dq_dq0(
135        &self,
136        q0: ArrayView1<'_, f64>,
137        beta_link_wiggle: ArrayView1<'_, f64>,
138    ) -> Result<Array1<f64>, String> {
139        let d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
140        if d1.ncols() != beta_link_wiggle.len() {
141            return Err(GamlssError::DimensionMismatch { reason: format!(
142                "wiggle derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
143                d1.ncols(),
144                beta_link_wiggle.len()
145            ) }.into());
146        }
147        Ok(d1.dot(&beta_link_wiggle) + 1.0)
148    }
149
150    pub(crate) fn wiggle_d2q_dq02(
151        &self,
152        q0: ArrayView1<'_, f64>,
153        beta_link_wiggle: ArrayView1<'_, f64>,
154    ) -> Result<Array1<f64>, String> {
155        let d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
156        if d2.ncols() != beta_link_wiggle.len() {
157            return Err(GamlssError::DimensionMismatch { reason: format!(
158                "wiggle second-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
159                d2.ncols(),
160                beta_link_wiggle.len()
161            ) }.into());
162        }
163        Ok(d2.dot(&beta_link_wiggle))
164    }
165
166    pub(crate) fn wiggle_d3basis_constrained(
167        &self,
168        q0: ArrayView1<'_, f64>,
169    ) -> Result<Array2<f64>, String> {
170        monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
171    }
172
173    pub(crate) fn wiggle_d3q_dq03(
174        &self,
175        q0: ArrayView1<'_, f64>,
176        beta_link_wiggle: ArrayView1<'_, f64>,
177    ) -> Result<Array1<f64>, String> {
178        let d3 = self.wiggle_d3basis_constrained(q0)?;
179        if d3.ncols() != beta_link_wiggle.len() {
180            return Err(GamlssError::DimensionMismatch { reason: format!(
181                "wiggle third-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
182                d3.ncols(),
183                beta_link_wiggle.len()
184            ) }.into());
185        }
186        Ok(d3.dot(&beta_link_wiggle))
187    }
188
189    pub(crate) fn wiggle_d4q_dq04(
190        &self,
191        q0: ArrayView1<'_, f64>,
192        beta_link_wiggle: ArrayView1<'_, f64>,
193    ) -> Result<Array1<f64>, String> {
194        let d4 = monotone_wiggle_basis_with_derivative_order(
195            q0,
196            &self.wiggle_knots,
197            self.wiggle_degree,
198            4,
199        )?;
200        if d4.ncols() != beta_link_wiggle.len() {
201            return Err(GamlssError::DimensionMismatch { reason: format!(
202                "wiggle fourth-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
203                d4.ncols(),
204                beta_link_wiggle.len()
205            ) }.into());
206        }
207        Ok(d4.dot(&beta_link_wiggle))
208    }
209
210    pub(crate) fn wiggle_geometry(
211        &self,
212        q0: ArrayView1<'_, f64>,
213        beta_link_wiggle: ArrayView1<'_, f64>,
214    ) -> Result<GaussianLocationScaleWiggleGeometry, String> {
215        let basis = self.wiggle_design(q0)?;
216        let basis_d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
217        let basis_d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
218        let basis_d3 = self.wiggle_d3basis_constrained(q0)?;
219        let dq_dq0 = self.wiggle_dq_dq0(q0, beta_link_wiggle)?;
220        let d2q_dq02 = self.wiggle_d2q_dq02(q0, beta_link_wiggle)?;
221        let d3q_dq03 = self.wiggle_d3q_dq03(q0, beta_link_wiggle)?;
222        let d4q_dq04 = self.wiggle_d4q_dq04(q0, beta_link_wiggle)?;
223        Ok(GaussianLocationScaleWiggleGeometry {
224            basis,
225            basis_d1,
226            basis_d2,
227            basis_d3,
228            dq_dq0,
229            d2q_dq02,
230            d3q_dq03,
231            d4q_dq04,
232        })
233    }
234
235    pub(crate) fn get_or_compute_row_scalars(
236        &self,
237        q: &Array1<f64>,
238        eta_ls: &Array1<f64>,
239    ) -> Result<Arc<GaussianJointRowScalars>, String> {
240        Ok(Arc::new(gaussian_jointrow_scalars(
241            &self.y,
242            q,
243            eta_ls,
244            &self.weights,
245        )?))
246    }
247
248    pub(crate) fn dense_block_designs(
249        &self,
250    ) -> Result<(Cow<'_, Array2<f64>>, Cow<'_, Array2<f64>>), String> {
251        dense_locscale_block_designs_cached(
252            self.mu_design.as_ref(),
253            self.log_sigma_design.as_ref(),
254            "GaussianLocationScaleWiggleFamily",
255            "GaussianLocationScaleWiggle",
256            "mu",
257            &self.policy.material_policy(),
258        )
259    }
260    pub(crate) fn dense_block_designs_fromspecs<'a>(
261        &self,
262        specs: &'a [ParameterBlockSpec],
263    ) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
264        dense_locscale_block_designs_fromspecs(
265            specs,
266            3,
267            "GaussianLocationScaleWiggleFamily",
268            "GaussianLocationScaleWiggle",
269            Self::BLOCK_MU,
270            Self::BLOCK_LOG_SIGMA,
271            "mu",
272            &self.policy.material_policy(),
273        )
274    }
275
276    pub(crate) fn exact_joint_dense_block_designs<'a>(
277        &'a self,
278        specs: Option<&'a [ParameterBlockSpec]>,
279    ) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
280        if self.exact_joint_supported() {
281            return self.dense_block_designs().map(Some);
282        }
283        if let Some(specs) = specs {
284            return self.dense_block_designs_fromspecs(specs).map(Some);
285        }
286        Ok(None)
287    }
288
289    /// Build the [`BlockEffectiveJacobian`] for block `block_idx`.
290    ///
291    /// The wiggle block (block 2) modulates the inverse link nonlinearly and
292    /// does not contribute a linear additive term to any output η; its
293    /// Jacobian is an `(2 * n, p_wiggle)` zero matrix.
294    ///
295    /// - block 0 (mu):        output 0 = design rows, output 1 = zeros
296    /// - block 1 (log_sigma): output 0 = zeros, output 1 = design rows
297    /// - block 2 (wiggle):    all zeros (nonlinear link modulation)
298    pub fn block_effective_jacobian(
299        specs: &[ParameterBlockSpec],
300        block_idx: usize,
301    ) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
302        crate::block_layout::block_jacobian::AdditiveWiggleBlockLayout {
303            family: "GaussianLocationScaleWiggleFamily",
304            n_outputs: 2,
305            additive_blocks: &[Self::BLOCK_MU, Self::BLOCK_LOG_SIGMA],
306            wiggle_block: Some(Self::BLOCK_WIGGLE),
307        }
308        .block_effective_jacobian(specs, block_idx)
309    }
310}
311
312/// Row-coefficient bundle for the GLS Wiggle joint second directional
313/// derivative, shared by the matrix-free operator and the dense
314/// `_from_designs` assemblies. Holds exactly the quantities both consumers
315/// read downstream of the (identical) coefficient computation.
316pub(crate) struct GlsWiggleSecondDirCoeffs {
317    pub(crate) coeff_mm_uv: Array1<f64>,
318    pub(crate) coeff_ml_uv: Array1<f64>,
319    pub(crate) coeff_ll_uv: Array1<f64>,
320    pub(crate) a_u: Array1<f64>,
321    pub(crate) a_v: Array1<f64>,
322    pub(crate) a_uv: Array1<f64>,
323    pub(crate) c_u: Array1<f64>,
324    pub(crate) c_v: Array1<f64>,
325    pub(crate) c_uv: Array1<f64>,
326    pub(crate) l_u: Array1<f64>,
327    pub(crate) l_v: Array1<f64>,
328    pub(crate) l_uv: Array1<f64>,
329    pub(crate) dw_u: Array1<f64>,
330    pub(crate) dw_v: Array1<f64>,
331    pub(crate) dw_uv: Array1<f64>,
332}
333
334/// The two probe directions resolved to row space for the GLS Wiggle joint
335/// second directional derivative: `xi`/`zeta` are the X_mu/X_ls contractions,
336/// and `q`/`s1`/`g2` are the mixed first/second-derivative wiggle pieces.
337pub(crate) struct GlsWiggleDirPieces<'a> {
338    pub(crate) zeta_u: &'a Array1<f64>,
339    pub(crate) zeta_v: &'a Array1<f64>,
340    pub(crate) q_u: &'a Array1<f64>,
341    pub(crate) q_v: &'a Array1<f64>,
342    pub(crate) q_uv: &'a Array1<f64>,
343    pub(crate) s1_u: &'a Array1<f64>,
344    pub(crate) s1_v: &'a Array1<f64>,
345    pub(crate) s1_uv: &'a Array1<f64>,
346    pub(crate) g2_u: &'a Array1<f64>,
347    pub(crate) g2_v: &'a Array1<f64>,
348    pub(crate) g2_uv: &'a Array1<f64>,
349}
350
351/// Compute the shared GLS Wiggle second-directional row coefficients from the
352/// per-row scalars, wiggle geometry, and the resolved probe directions.
353pub(crate) fn gls_wiggle_second_directional_coeffs(
354    rows: &GaussianJointRowScalars,
355    geom: &GaussianLocationScaleWiggleGeometry,
356    dir: &GlsWiggleDirPieces<'_>,
357) -> GlsWiggleSecondDirCoeffs {
358    let GlsWiggleDirPieces {
359        zeta_u,
360        zeta_v,
361        q_u,
362        q_v,
363        q_uv,
364        s1_u,
365        s1_v,
366        s1_uv,
367        g2_u,
368        g2_v,
369        g2_uv,
370    } = *dir;
371    let szeta_u = &rows.kappa * zeta_u;
372    let szeta_v = &rows.kappa * zeta_v;
373    let zeta_u_zeta_v = zeta_u * zeta_v;
374    let dw_u = -2.0 * &rows.w * &szeta_u;
375    let dw_v = -2.0 * &rows.w * &szeta_v;
376    let dw_uv =
377        4.0 * &rows.w * &(&szeta_u * &szeta_v) - 2.0 * &rows.w * &rows.kappa_prime * &zeta_u_zeta_v;
378    let dm_u = -(&rows.w * q_u) - &(2.0 * &rows.m * &szeta_u);
379    let dm_v = -(&rows.w * q_v) - &(2.0 * &rows.m * &szeta_v);
380    let dm_uv = &(2.0 * &rows.w * &(q_u * &szeta_v + q_v * &szeta_u)) - &(&rows.w * q_uv)
381        + &(4.0 * &rows.m * &(&szeta_u * &szeta_v))
382        - 2.0 * &rows.m * &rows.kappa_prime * &zeta_u_zeta_v;
383    let coeff_mm_uv = &(&dw_uv * &geom.dq_dq0.mapv(|v| v * v))
384        + &(2.0 * &dw_u * &geom.dq_dq0 * s1_v)
385        + &(2.0 * &dw_v * &geom.dq_dq0 * s1_u)
386        + &(2.0 * &rows.w * s1_u * s1_v)
387        + &(2.0 * &rows.w * &geom.dq_dq0 * s1_uv)
388        - &(&dm_uv * &geom.d2q_dq02)
389        - &(&dm_u * g2_v)
390        - &(&dm_v * g2_u)
391        - &(&rows.m * g2_uv);
392    let n = rows.m.len();
393    // H_{μ,ls} ≡ Fisher 0 (mean⊥scale orthogonality; the wiggle and μ both
394    // enter the mean, log σ is the only scale block), so every β-directional
395    // derivative — including this second-order one — is identically 0.
396    let coeff_ml_uv = Array1::<f64>::zeros(n);
397    // Second directional derivative of the Fisher (log σ, log σ) block
398    // coeff_ll = 2κ²a (#566). η_ls is linear in β (no zeta_uv), so the only
399    // surviving term is ∂²(2κ²a)/∂η² · zeta_u·zeta_v = 4a(κ'²+κκ'')·zeta_u·zeta_v
400    // — matching the dense helper `d_uv` (gaussian_jointsecond_directionalweights).
401    let coeff_ll_uv = 4.0
402        * &rows.obs_weight
403        * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
404        * &zeta_u_zeta_v;
405
406    let a_u = &dw_u * &geom.dq_dq0 + &rows.w * s1_u;
407    let a_v = &dw_v * &geom.dq_dq0 + &rows.w * s1_v;
408    let a_uv = &dw_uv * &geom.dq_dq0 + &dw_u * s1_v + &dw_v * s1_u + &rows.w * s1_uv;
409    let c_u = -&dm_u;
410    let c_v = -&dm_v;
411    let c_uv = -&dm_uv;
412    // H_{ls,w} ≡ Fisher 0 (wiggle is mean-side; mean⊥scale), so all of its
413    // β-directional derivatives are 0.
414    let l_u = Array1::<f64>::zeros(n);
415    let l_v = Array1::<f64>::zeros(n);
416    let l_uv = Array1::<f64>::zeros(n);
417
418    GlsWiggleSecondDirCoeffs {
419        coeff_mm_uv,
420        coeff_ml_uv,
421        coeff_ll_uv,
422        a_u,
423        a_v,
424        a_uv,
425        c_u,
426        c_v,
427        c_uv,
428        l_u,
429        l_v,
430        l_uv,
431        dw_u,
432        dw_v,
433        dw_uv,
434    }
435}
436
437impl GaussianLocationScaleWiggleFamily {
438    pub(crate) fn exact_newton_joint_hessian_for_specs(
439        &self,
440        block_states: &[ParameterBlockState],
441        specs: Option<&[ParameterBlockSpec]>,
442    ) -> Result<Option<Array2<f64>>, String> {
443        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
444            return Ok(None);
445        };
446        self.exact_newton_joint_hessian_from_designs(block_states, &xmu, &x_ls)
447    }
448
449    pub(crate) fn exact_newton_joint_hessian_directional_derivative_for_specs(
450        &self,
451        block_states: &[ParameterBlockState],
452        specs: Option<&[ParameterBlockSpec]>,
453        d_beta_flat: &Array1<f64>,
454    ) -> Result<Option<Array2<f64>>, String> {
455        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
456            return Ok(None);
457        };
458        self.exact_newton_joint_hessian_directional_derivative_from_designs(
459            block_states,
460            &xmu,
461            &x_ls,
462            d_beta_flat,
463        )
464    }
465
466    pub(crate) fn exact_newton_joint_hessian_second_directional_derivative_for_specs(
467        &self,
468        block_states: &[ParameterBlockState],
469        specs: Option<&[ParameterBlockSpec]>,
470        d_beta_u_flat: &Array1<f64>,
471        d_beta_v_flat: &Array1<f64>,
472    ) -> Result<Option<Array2<f64>>, String> {
473        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
474            return Ok(None);
475        };
476        self.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
477            block_states,
478            &xmu,
479            &x_ls,
480            d_beta_u_flat,
481            d_beta_v_flat,
482        )
483    }
484
485    pub(crate) fn exact_newton_joint_psi_direction(
486        &self,
487        block_states: &[ParameterBlockState],
488        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
489        psi_index: usize,
490        xmu: &Array2<f64>,
491        x_ls: &Array2<f64>,
492        policy: &gam_runtime::resource::ResourcePolicy,
493    ) -> Result<Option<LocationScaleJointPsiDirection>, String> {
494        let Some(parts) = locscale_joint_psi_direction_parts(
495            block_states,
496            derivative_blocks,
497            psi_index,
498            self.y.len(),
499            xmu.ncols(),
500            x_ls.ncols(),
501            Self::BLOCK_MU,
502            Self::BLOCK_LOG_SIGMA,
503            3,
504            "GaussianLocationScaleWiggleFamily",
505            "mu",
506            policy,
507        )?
508        else {
509            return Ok(None);
510        };
511        Ok(Some(LocationScaleJointPsiDirection {
512            block_idx: parts.block_idx,
513            local_idx: parts.local_idx,
514            z_primary_psi: parts.primary_z,
515            z_ls_psi: parts.log_sigma_z,
516            x_primary_psi: parts.primary_psi,
517            x_ls_psi: parts.log_sigma_psi,
518        }))
519    }
520
521    pub(crate) fn exact_newton_joint_psisecond_design_drifts(
522        &self,
523        block_states: &[ParameterBlockState],
524        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
525        psi_a: &LocationScaleJointPsiDirection,
526        psi_b: &LocationScaleJointPsiDirection,
527        xmu: &Array2<f64>,
528        x_ls: &Array2<f64>,
529    ) -> Result<LocationScaleJointPsiSecondDrifts, String> {
530        locscale_joint_psisecond_design_drifts(
531            block_states,
532            derivative_blocks,
533            psi_a,
534            psi_b,
535            LocScalePsiDriftConfig {
536                n: self.y.len(),
537                p_primary: xmu.ncols(),
538                p_log_sigma: x_ls.ncols(),
539                primary_block_idx: Self::BLOCK_MU,
540                log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
541                family_name: "GaussianLocationScaleWiggleFamily",
542                primary_label: "mu",
543                policy: &self.policy,
544            },
545        )
546    }
547
548    /// Compute the rowwise Hessian pieces shared by the dense path and the
549    /// matrix-free workspace operator. The same coefficients reconstruct the
550    /// dense p×p matrix or apply `Hv` directly without ever forming it.
551    pub(crate) fn wiggle_hessian_row_pieces(
552        &self,
553        block_states: &[ParameterBlockState],
554    ) -> Result<GaussianLocationScaleWiggleHessianRowPieces, String> {
555        validate_block_count::<GamlssError>(
556            "GaussianLocationScaleWiggleFamily",
557            3,
558            block_states.len(),
559        )?;
560        let q0 = &block_states[Self::BLOCK_MU].eta;
561        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
562        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
563        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
564        let n = self.y.len();
565        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
566            return Err(GamlssError::DimensionMismatch {
567                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
568            }
569            .into());
570        }
571        let q = q0 + etaw;
572        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
573        if geom.basis.ncols() != betaw.len() {
574            return Err(GamlssError::DimensionMismatch { reason: format!(
575                "GaussianLocationScaleWiggleFamily wiggle basis/beta mismatch: basis has {} columns but beta has {} entries",
576                geom.basis.ncols(),
577                betaw.len()
578            ) }.into());
579        }
580        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
581        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
582        // Gaussian mean⊥scale Fisher orthogonality. μ (mu) AND the wiggle both
583        // enter the MEAN q = q0 + B(q0)·βw (see `let q = q0 + etaw`); log σ is
584        // the only scale-side block. The Fisher (expected) cross between any
585        // mean-side parameter and log σ is exactly 0: H_{μ,ls} = 2κm·dq_dq0 and
586        // H_{ls,w} = 2κm both carry m = r·w = (y−q)·weight/σ², and E[m] =
587        // E[r]·w = 0. The dense and matrix-free workspace paths SHARE these row
588        // pieces, so setting the cross coeffs to 0 fixes the curvature object
589        // (the observed 2κm value) for both. Diagonal/same-side blocks
590        // (coeff_mm within mean, coeff_ll within scale, coeff_mw_* within mean,
591        // coeff_ww within mean) are untouched.
592        let coeff_ml = Array1::<f64>::zeros(n);
593        // Fisher/expected (log σ, log σ) information E[H_{ls,ls}] = 2κ²a (#566):
594        // the observed 2κ²n + κ'(a−n) collapses at small residuals and
595        // over-smooths the scale; E[n]=a gives the residual-free 2κ²a.
596        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
597        let coeff_mw_b = &rows.w * &geom.dq_dq0;
598        let coeff_mw_d = -&rows.m;
599        // ls↔wiggle is a mean⊥scale cross (wiggle is mean-side): Fisher 0.
600        let coeff_lw_b = Array1::<f64>::zeros(n);
601        let coeff_ww = rows.w.clone();
602        Ok(GaussianLocationScaleWiggleHessianRowPieces {
603            coeff_mm,
604            coeff_ml,
605            coeff_ll,
606            coeff_mw_b,
607            coeff_mw_d,
608            coeff_lw_b,
609            coeff_ww,
610            basis: geom.basis,
611            basis_d1: geom.basis_d1,
612        })
613    }
614
615    pub(crate) fn exact_newton_joint_hessian_from_designs(
616        &self,
617        block_states: &[ParameterBlockState],
618        xmu: &Array2<f64>,
619        x_ls: &Array2<f64>,
620    ) -> Result<Option<Array2<f64>>, String> {
621        let pieces = self.wiggle_hessian_row_pieces(block_states)?;
622        Ok(Some(pieces.assemble_dense(xmu, x_ls)?))
623    }
624
625    pub(crate) fn exact_newton_joint_hessian_directional_derivative_from_designs(
626        &self,
627        block_states: &[ParameterBlockState],
628        xmu: &Array2<f64>,
629        x_ls: &Array2<f64>,
630        d_beta_flat: &Array1<f64>,
631    ) -> Result<Option<Array2<f64>>, String> {
632        validate_block_count::<GamlssError>(
633            "GaussianLocationScaleWiggleFamily",
634            3,
635            block_states.len(),
636        )?;
637        let pmu = xmu.ncols();
638        let p_ls = x_ls.ncols();
639        let q0 = &block_states[Self::BLOCK_MU].eta;
640        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
641        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
642        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
643        let n = self.y.len();
644        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
645        let (umu, u_ls, uw) = layout.split_three(
646            d_beta_flat,
647            "GaussianLocationScaleWiggleFamily exact joint directional Hessian",
648        )?;
649        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
650            return Err(GamlssError::DimensionMismatch {
651                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
652            }
653            .into());
654        }
655        let q = q0 + etaw;
656        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
657        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
658        let xi = fast_av(xmu, &umu);
659        let zeta = fast_av(x_ls, &u_ls);
660        // logb κ-scaled η_ls direction; κ' = dκ/dη_ls = κ(1−κ).
661        let szeta = &rows.kappa * &zeta;
662        let phi = fast_av(&geom.basis, &uw);
663        let mut q_u = &geom.dq_dq0 * &xi;
664        q_u += &phi;
665        let mut s1_u = &geom.d2q_dq02 * &xi;
666        s1_u += &fast_av(&geom.basis_d1, &uw);
667        let mut g2_u = &geom.d3q_dq03 * &xi;
668        g2_u += &fast_av(&geom.basis_d2, &uw);
669        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
670        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
671        let dw_u = -2.0 * &rows.w * &szeta;
672        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
673
674        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
675            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
676            - &(&dm_u * &geom.d2q_dq02)
677            - &(&rows.m * &g2_u);
678        // Static blocks: H_{μ,ls} = Fisher 0 (mean⊥scale); H_{ls,ls} = Fisher
679        // 2κ²a (#566). H_{μ,ls} ≡ 0 for all β, so its directional derivative is
680        // also identically 0. The Fisher (ls,ls) block 2κ²a depends only on
681        // η_ls (a is the constant prior weight), so its directional derivative
682        // is 4κκ'a·zeta.
683        let coeff_ml_u = Array1::<f64>::zeros(n);
684        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
685        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
686        let c_u = -&dm_u;
687        // ls↔wiggle cross block: Fisher 0 (wiggle is mean-side), so its
688        // directional derivative is 0 as well.
689        let l_u = Array1::<f64>::zeros(n);
690        let zeros_ls_b1 = Array1::<f64>::zeros(n);
691
692        let h_mm = xt_diag_x_dense(xmu, &coeff_mm_u)?;
693        let h_ml = xt_diag_y_dense(xmu, &coeff_ml_u, x_ls)?;
694        let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_u)?;
695        let h_mw = xt_diag_y_dense(xmu, &a_u, &geom.basis)?
696            + &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_u)?
697            + &xt_diag_y_dense(xmu, &c_u, &geom.basis_d1)?
698            + &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_u)?;
699        let h_lw = xt_diag_y_dense(x_ls, &l_u, &geom.basis)?
700            + &xt_diag_y_dense(x_ls, &zeros_ls_b1, &basis_u)?;
701        let a_ww = xt_diag_y_dense(&basis_u, &rows.w, &geom.basis)?;
702        let h_ww = &a_ww + &a_ww.t() + &xt_diag_x_dense(&geom.basis, &dw_u)?;
703        Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
704            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
705        )))
706    }
707
708    /// Build a matrix-free `RowCoeffOperator` for the GLS Wiggle joint
709    /// directional derivative `D_β H_L[u]`. Output dimension is
710    /// `pmu + p_ls + pw`. Channels (in order): X_mu, X_ls, B, B', B''.
711    pub(crate) fn gls_wiggle_directional_operator(
712        &self,
713        block_states: &[ParameterBlockState],
714        xmu_arc: Arc<Array2<f64>>,
715        x_ls_arc: Arc<Array2<f64>>,
716        d_beta_flat: &Array1<f64>,
717    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
718        validate_block_count::<GamlssError>(
719            "GaussianLocationScaleWiggleFamily",
720            3,
721            block_states.len(),
722        )?;
723        let pmu = xmu_arc.ncols();
724        let p_ls = x_ls_arc.ncols();
725        let q0_eta = &block_states[Self::BLOCK_MU].eta;
726        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
727        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
728        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
729        let n = self.y.len();
730        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
731        let (umu, u_ls, uw) =
732            layout.split_three(d_beta_flat, "GLS Wiggle joint dH operator d_beta")?;
733        if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
734            return Err(GamlssError::DimensionMismatch {
735                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
736            }
737            .into());
738        }
739        let q = q0_eta + etaw;
740        let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
741        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
742        let xi = fast_av(xmu_arc.as_ref(), &umu);
743        let zeta = fast_av(x_ls_arc.as_ref(), &u_ls);
744        let szeta = &rows.kappa * &zeta;
745        let phi = fast_av(&geom.basis, &uw);
746        let mut q_u = &geom.dq_dq0 * &xi;
747        q_u += &phi;
748        let mut s1_u = &geom.d2q_dq02 * &xi;
749        s1_u += &fast_av(&geom.basis_d1, &uw);
750        let mut g2_u = &geom.d3q_dq03 * &xi;
751        g2_u += &fast_av(&geom.basis_d2, &uw);
752        let dw_u = -2.0 * &rows.w * &szeta;
753        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
754
755        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
756            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
757            - &(&dm_u * &geom.d2q_dq02)
758            - &(&rows.m * &g2_u);
759        // H_{μ,ls} ≡ Fisher 0 (mean⊥scale); its directional derivative is 0.
760        let coeff_ml_u = Array1::<f64>::zeros(n);
761        // Fisher (ls,ls) 2κ²a directional derivative: 4κκ'a·zeta (#566).
762        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
763        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
764        let c_u = -&dm_u;
765        // H_{ls,w} ≡ Fisher 0 (wiggle is mean-side); its derivative is 0 in
766        // both the B channel (l_u) and the B' channel (coeff_ls_b1).
767        let l_u = Array1::<f64>::zeros(n);
768
769        // Pair-coefficient bundles. For (0=X_mu, 3=B'): combine
770        // `xt_diag_y_dense(xmu, &(w·dq_dq0), &basis_u=diag(xi)·B')`
771        // (giving coeff `w·dq_dq0·xi`) with `xt_diag_y_dense(xmu, &c_u, &B')`
772        // (coeff `c_u`).
773        let coeff_m_b1 = &(&rows.w * &geom.dq_dq0 * &xi) + &c_u;
774        // (0=X_mu, 4=B''): from `xt_diag_y_dense(xmu, &(-m), &basis1_u=diag(xi)·B'')`.
775        let coeff_m_b2 = -(&rows.m * &xi);
776        // (1=X_ls, 3=B'): ls↔wiggle Fisher-0 cross → zero.
777        let coeff_ls_b1 = Array1::<f64>::zeros(n);
778        // (2=B, 3=B'): a_ww + a_ww^T where a_ww = (diag(xi)·B')^T diag(w) B
779        // = B'^T diag(w·xi) B. The symmetric pair contribution in
780        // `RowCoeffOperator` reproduces a_ww + a_ww^T with c = w·xi.
781        let coeff_b_b1 = &rows.w * &xi;
782
783        let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
784        let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
785        let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
786        let pw = basis.ncols();
787
788        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
789            vec![pmu, p_ls, pw],
790            vec![
791                (0, xmu_arc),
792                (1, x_ls_arc),
793                (2, basis),
794                (2, basis_d1),
795                (2, basis_d2),
796            ],
797            vec![
798                // (X_mu, X_mu) ← `xt_diag_x_dense(xmu, &coeff_mm_u)`
799                (0, 0, coeff_mm_u),
800                // (X_mu, X_ls) ← `xt_diag_y_dense(xmu, &coeff_ml_u, x_ls)`
801                (0, 1, coeff_ml_u),
802                // (X_ls, X_ls) ← `xt_diag_x_dense(x_ls, &coeff_ll_u)`
803                (1, 1, coeff_ll_u),
804                // (X_mu, B) ← `xt_diag_y_dense(xmu, &a_u, &geom.basis)`
805                (0, 2, a_u),
806                // (X_mu, B') ← `xt_diag_y_dense(xmu, w·dq_dq0, basis_u=diag(ξ)·B') + xt_diag_y_dense(xmu, c_u, B')`
807                (0, 3, coeff_m_b1),
808                // (X_mu, B'') ← `xt_diag_y_dense(xmu, -m, basis1_u=diag(ξ)·B'')`
809                (0, 4, coeff_m_b2),
810                // (X_ls, B) ← `xt_diag_y_dense(x_ls, &l_u, &geom.basis)`
811                (1, 2, l_u),
812                // (X_ls, B') ← ls↔wiggle is mean⊥scale Fisher 0, so coeff_ls_b1 = 0
813                (1, 3, coeff_ls_b1),
814                // (B, B) ← `xt_diag_x_dense(&geom.basis, &dw_u)`
815                (2, 2, dw_u),
816                // (B, B') ← a_ww + a_ww^T = B^T diag(w·ξ) B' + B'^T diag(w·ξ) B
817                (2, 3, coeff_b_b1),
818            ],
819            n,
820        ))))
821    }
822
823    /// Build a matrix-free `RowCoeffOperator` for the GLS Wiggle joint
824    /// second directional derivative `D²_β H_L[u, v]`. Channels: X_mu,
825    /// X_ls, B, B', B'', B'''. Pair list mirrors the 8-term `xt_diag_*`
826    /// assembly in `_from_designs`, with row-coefficient bundles that
827    /// absorb the `ξ_u, ξ_v, ξ_u·ξ_v` row factors arising from
828    /// `basis_u = diag(ξ_u)·B'`, `basis_uv = diag(ξ_u·ξ_v)·B''`, etc.
829    pub(crate) fn gls_wiggle_second_directional_operator(
830        &self,
831        block_states: &[ParameterBlockState],
832        xmu_arc: Arc<Array2<f64>>,
833        x_ls_arc: Arc<Array2<f64>>,
834        d_beta_u: &Array1<f64>,
835        d_beta_v: &Array1<f64>,
836    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
837        validate_block_count::<GamlssError>(
838            "GaussianLocationScaleWiggleFamily",
839            3,
840            block_states.len(),
841        )?;
842        let pmu = xmu_arc.ncols();
843        let p_ls = x_ls_arc.ncols();
844        let q0_eta = &block_states[Self::BLOCK_MU].eta;
845        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
846        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
847        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
848        let n = self.y.len();
849        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
850        let (umu, u_ls, uw) = layout.split_three(d_beta_u, "GLS Wiggle d2H operator (u)")?;
851        let (vmu, v_ls, vw) = layout.split_three(d_beta_v, "GLS Wiggle d2H operator (v)")?;
852        if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
853            return Err(GamlssError::DimensionMismatch {
854                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
855            }
856            .into());
857        }
858        let q = q0_eta + etaw;
859        let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
860        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
861
862        let xi_u = fast_av(xmu_arc.as_ref(), &umu);
863        let xi_v = fast_av(xmu_arc.as_ref(), &vmu);
864        let zeta_u = fast_av(x_ls_arc.as_ref(), &u_ls);
865        let zeta_v = fast_av(x_ls_arc.as_ref(), &v_ls);
866        let phi_u = fast_av(&geom.basis, &uw);
867        let phi_v = fast_av(&geom.basis, &vw);
868        let b1u = fast_av(&geom.basis_d1, &uw);
869        let b1v = fast_av(&geom.basis_d1, &vw);
870        let b2u = fast_av(&geom.basis_d2, &uw);
871        let b2v = fast_av(&geom.basis_d2, &vw);
872        let b3u = fast_av(&geom.basis_d3, &uw);
873        let b3v = fast_av(&geom.basis_d3, &vw);
874
875        let mut q_u = &geom.dq_dq0 * &xi_u;
876        q_u += &phi_u;
877        let mut q_v = &geom.dq_dq0 * &xi_v;
878        q_v += &phi_v;
879        let mut s1_u = &geom.d2q_dq02 * &xi_u;
880        s1_u += &b1u;
881        let mut s1_v = &geom.d2q_dq02 * &xi_v;
882        s1_v += &b1v;
883        let mut g2_u = &geom.d3q_dq03 * &xi_u;
884        g2_u += &b2u;
885        let mut g2_v = &geom.d3q_dq03 * &xi_v;
886        g2_v += &b2v;
887        let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
888        let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
889        let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
890
891        let GlsWiggleSecondDirCoeffs {
892            coeff_mm_uv,
893            coeff_ml_uv,
894            coeff_ll_uv,
895            a_u,
896            a_v,
897            a_uv,
898            c_u,
899            c_v,
900            c_uv,
901            l_u,
902            l_v,
903            l_uv,
904            dw_u,
905            dw_v,
906            dw_uv,
907        } = gls_wiggle_second_directional_coeffs(
908            &rows,
909            &geom,
910            &GlsWiggleDirPieces {
911                zeta_u: &zeta_u,
912                zeta_v: &zeta_v,
913                q_u: &q_u,
914                q_v: &q_v,
915                q_uv: &q_uv,
916                s1_u: &s1_u,
917                s1_v: &s1_v,
918                s1_uv: &s1_uv,
919                g2_u: &g2_u,
920                g2_v: &g2_v,
921                g2_uv: &g2_uv,
922            },
923        );
924
925        // Pair-coefficient bundles. Cross-block (mu, B'/B'') absorb basis_u/v/uv row scaling.
926        let xi_u_xi_v = &xi_u * &xi_v;
927        let coeff_m_b1 = &(&a_u * &xi_v) + &(&a_v * &xi_u) + &c_uv;
928        let coeff_m_b2 = &(&rows.w * &geom.dq_dq0 * &xi_u_xi_v) + &(&c_u * &xi_v) + &(&c_v * &xi_u);
929        let coeff_m_b3 = -(&rows.m * &xi_u_xi_v);
930        // ls↔wiggle is Fisher-0 (mean⊥scale): the B' (coeff_ls_b1) and B''
931        // (coeff_ls_b2) channels of its second directional derivative vanish.
932        let coeff_ls_b1 = &(&l_u * &xi_v) + &(&l_v * &xi_u);
933        let coeff_ls_b2 = Array1::<f64>::zeros(n);
934        // Wiggle-wiggle from a_ab + a_ab^T + a_ij + a_ij^T + a_iwj + a_iwj^T + a_jwi + a_jwi^T:
935        //   a_ab = B''^T diag(w·ξ_uξ_v) B    → pair (B, B'', w·ξ_uξ_v)
936        //   a_ij = B'^T diag(w·ξ_uξ_v) B'   → pair (B', B', 2·w·ξ_uξ_v)  (a_ij + a_ij^T)
937        //   a_iwj+a_jwi = B'^T diag(dw_v·ξ_u + dw_u·ξ_v) B → pair (B, B', sum)
938        let coeff_b_b1 = &(&dw_u * &xi_v) + &(&dw_v * &xi_u);
939        let coeff_b_b2 = &rows.w * &xi_u_xi_v;
940        let coeff_b1_b1 = 2.0 * &(&rows.w * &xi_u_xi_v);
941
942        let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
943        let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
944        let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
945        let basis_d3: Arc<Array2<f64>> = Arc::new(geom.basis_d3.clone());
946        let pw = basis.ncols();
947
948        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
949            vec![pmu, p_ls, pw],
950            vec![
951                (0, xmu_arc),
952                (1, x_ls_arc),
953                (2, basis),
954                (2, basis_d1),
955                (2, basis_d2),
956                (2, basis_d3),
957            ],
958            vec![
959                // (X_mu, X_mu) ← `xt_diag_x_dense(xmu, &coeff_mm_uv)`
960                (0, 0, coeff_mm_uv),
961                // (X_mu, X_ls) ← `xt_diag_y_dense(xmu, &coeff_ml_uv, x_ls)`
962                (0, 1, coeff_ml_uv),
963                // (X_ls, X_ls) ← `xt_diag_x_dense(x_ls, &coeff_ll_uv)`
964                (1, 1, coeff_ll_uv),
965                // (X_mu, B) ← `xt_diag_y_dense(xmu, &a_uv, &geom.basis)`
966                (0, 2, a_uv),
967                // (X_mu, B') ← combined `a_u·ξ_v + a_v·ξ_u + c_uv` from
968                // `xt_diag_y_dense(xmu, a_u, basis_v) + xt_diag_y_dense(xmu,
969                // a_v, basis_u) + xt_diag_y_dense(xmu, c_uv, B')`
970                (0, 3, coeff_m_b1),
971                // (X_mu, B'') ← `xt_diag_y_dense(xmu, w·dq_dq0, basis_uv) +
972                // xt_diag_y_dense(xmu, c_u, basis1_v) + xt_diag_y_dense(xmu,
973                // c_v, basis1_u)` (basis_uv = diag(ξ_uξ_v)·B'';
974                // basis1_{u,v} = diag(ξ_{u,v})·B'')
975                (0, 4, coeff_m_b2),
976                // (X_mu, B''') ← `xt_diag_y_dense(xmu, -m, basis1_uv)`
977                // with basis1_uv = diag(ξ_uξ_v)·B'''
978                (0, 5, coeff_m_b3),
979                // (X_ls, B) ← `xt_diag_y_dense(x_ls, &l_uv, &geom.basis)`
980                (1, 2, l_uv),
981                // (X_ls, B') ← combined from `xt_diag_y_dense(x_ls, l_u,
982                // basis_v) + xt_diag_y_dense(x_ls, l_v, basis_u)` =
983                // `l_u·ξ_v + l_v·ξ_u`
984                (1, 3, coeff_ls_b1),
985                // (X_ls, B'') ← ls↔wiggle is mean⊥scale Fisher 0, so coeff_ls_b2 = 0
986                (1, 4, coeff_ls_b2),
987                // (B, B) ← `xt_diag_x_dense(&geom.basis, &dw_uv)`
988                (2, 2, dw_uv),
989                // (B, B') ← combined `a_iwj + a_iwj^T + a_jwi + a_jwi^T` =
990                // B^T diag(dw_u·ξ_v + dw_v·ξ_u) B' + B'^T diag(...) B
991                (2, 3, coeff_b_b1),
992                // (B, B'') ← `a_ab + a_ab^T` with a_ab = B''^T diag(w·ξ_uξ_v) B
993                (2, 4, coeff_b_b2),
994                // (B', B') ← `a_ij + a_ij^T = 2·B'^T diag(w·ξ_uξ_v) B'`;
995                // diagonal pair coeff doubles to absorb the factor of 2
996                (3, 3, coeff_b1_b1),
997            ],
998            n,
999        ))))
1000    }
1001
1002    pub(crate) fn exact_newton_joint_hessiansecond_directional_derivative_from_designs(
1003        &self,
1004        block_states: &[ParameterBlockState],
1005        xmu: &Array2<f64>,
1006        x_ls: &Array2<f64>,
1007        d_beta_u_flat: &Array1<f64>,
1008        d_beta_v_flat: &Array1<f64>,
1009    ) -> Result<Option<Array2<f64>>, String> {
1010        validate_block_count::<GamlssError>(
1011            "GaussianLocationScaleWiggleFamily",
1012            3,
1013            block_states.len(),
1014        )?;
1015        let pmu = xmu.ncols();
1016        let p_ls = x_ls.ncols();
1017        let q0 = &block_states[Self::BLOCK_MU].eta;
1018        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1019        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1020        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1021        let n = self.y.len();
1022        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
1023        let (umu, u_ls, uw) = layout.split_three(
1024            d_beta_u_flat,
1025            "GaussianLocationScaleWiggleFamily exact joint second directional Hessian (u)",
1026        )?;
1027        let (vmu, v_ls, vw) = layout.split_three(
1028            d_beta_v_flat,
1029            "GaussianLocationScaleWiggleFamily exact joint second directional Hessian (v)",
1030        )?;
1031        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
1032            return Err(GamlssError::DimensionMismatch {
1033                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
1034            }
1035            .into());
1036        }
1037        let q = q0 + etaw;
1038        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1039        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1040
1041        let xi_u = fast_av(xmu, &umu);
1042        let xi_v = fast_av(xmu, &vmu);
1043        let zeta_u = fast_av(x_ls, &u_ls);
1044        let zeta_v = fast_av(x_ls, &v_ls);
1045        let phi_u = fast_av(&geom.basis, &uw);
1046        let phi_v = fast_av(&geom.basis, &vw);
1047        let b1u = fast_av(&geom.basis_d1, &uw);
1048        let b1v = fast_av(&geom.basis_d1, &vw);
1049        let b2u = fast_av(&geom.basis_d2, &uw);
1050        let b2v = fast_av(&geom.basis_d2, &vw);
1051        let b3u = fast_av(&geom.basis_d3, &uw);
1052        let b3v = fast_av(&geom.basis_d3, &vw);
1053
1054        let mut q_u = &geom.dq_dq0 * &xi_u;
1055        q_u += &phi_u;
1056        let mut q_v = &geom.dq_dq0 * &xi_v;
1057        q_v += &phi_v;
1058        let mut s1_u = &geom.d2q_dq02 * &xi_u;
1059        s1_u += &b1u;
1060        let mut s1_v = &geom.d2q_dq02 * &xi_v;
1061        s1_v += &b1v;
1062        let mut g2_u = &geom.d3q_dq03 * &xi_u;
1063        g2_u += &b2u;
1064        let mut g2_v = &geom.d3q_dq03 * &xi_v;
1065        g2_v += &b2v;
1066        let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
1067        let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
1068        let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
1069
1070        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi_u)?;
1071        let basis_v = scale_matrix_rows(&geom.basis_d1, &xi_v)?;
1072        let basis_uv = scale_matrix_rows(&geom.basis_d2, &(&xi_u * &xi_v))?;
1073        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi_u)?;
1074        let basis1_v = scale_matrix_rows(&geom.basis_d2, &xi_v)?;
1075        let basis1_uv = scale_matrix_rows(&geom.basis_d3, &(&xi_u * &xi_v))?;
1076
1077        // Shared κ-aware second-directional row coefficients (κ' = κ(1−κ),
1078        // κ'' = κ(1−κ)(1−2κ), κ''' = κ''(1−2κ) − 2(κ')²): identical to the
1079        // matrix-free operator path, factored into one helper.
1080        let GlsWiggleSecondDirCoeffs {
1081            coeff_mm_uv,
1082            coeff_ml_uv,
1083            coeff_ll_uv,
1084            a_u,
1085            a_v,
1086            a_uv,
1087            c_u,
1088            c_v,
1089            c_uv,
1090            l_u,
1091            l_v,
1092            l_uv,
1093            dw_u,
1094            dw_v,
1095            dw_uv,
1096        } = gls_wiggle_second_directional_coeffs(
1097            &rows,
1098            &geom,
1099            &GlsWiggleDirPieces {
1100                zeta_u: &zeta_u,
1101                zeta_v: &zeta_v,
1102                q_u: &q_u,
1103                q_v: &q_v,
1104                q_uv: &q_uv,
1105                s1_u: &s1_u,
1106                s1_v: &s1_v,
1107                s1_uv: &s1_uv,
1108                g2_u: &g2_u,
1109                g2_v: &g2_v,
1110                g2_uv: &g2_uv,
1111            },
1112        );
1113
1114        let h_mm = xt_diag_x_dense(xmu, &coeff_mm_uv)?;
1115        let h_ml = xt_diag_y_dense(xmu, &coeff_ml_uv, x_ls)?;
1116        let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_uv)?;
1117        let h_mw = xt_diag_y_dense(xmu, &a_uv, &geom.basis)?
1118            + &xt_diag_y_dense(xmu, &a_u, &basis_v)?
1119            + &xt_diag_y_dense(xmu, &a_v, &basis_u)?
1120            + &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_uv)?
1121            + &xt_diag_y_dense(xmu, &c_uv, &geom.basis_d1)?
1122            + &xt_diag_y_dense(xmu, &c_u, &basis1_v)?
1123            + &xt_diag_y_dense(xmu, &c_v, &basis1_u)?
1124            + &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_uv)?;
1125        // H_{ls,w} ≡ Fisher 0 (mean⊥scale): l_uv/l_u/l_v are 0 (shared helper)
1126        // and the 2κm·B'' channel vanishes too.
1127        let zeros_ls_b2 = Array1::<f64>::zeros(n);
1128        let h_lw = xt_diag_y_dense(x_ls, &l_uv, &geom.basis)?
1129            + &xt_diag_y_dense(x_ls, &l_u, &basis_v)?
1130            + &xt_diag_y_dense(x_ls, &l_v, &basis_u)?
1131            + &xt_diag_y_dense(x_ls, &zeros_ls_b2, &basis_uv)?;
1132        let a_ab = xt_diag_y_dense(&basis_uv, &rows.w, &geom.basis)?;
1133        let a_ij = xt_diag_y_dense(&basis_u, &rows.w, &basis_v)?;
1134        let a_iwj = xt_diag_y_dense(&basis_u, &dw_v, &geom.basis)?;
1135        let a_jwi = xt_diag_y_dense(&basis_v, &dw_u, &geom.basis)?;
1136        let h_ww = &a_ab
1137            + &a_ab.t()
1138            + &a_ij
1139            + a_ij.t()
1140            + &a_iwj
1141            + a_iwj.t()
1142            + &a_jwi
1143            + a_jwi.t()
1144            + &xt_diag_x_dense(&geom.basis, &dw_uv)?;
1145        Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
1146            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1147        )))
1148    }
1149
1150    pub(crate) fn exact_newton_joint_psi_terms_from_designs(
1151        &self,
1152        block_states: &[ParameterBlockState],
1153        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1154        psi_index: usize,
1155        xmu: &Array2<f64>,
1156        x_ls: &Array2<f64>,
1157    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
1158        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1159            block_states,
1160            derivative_blocks,
1161            psi_index,
1162            xmu,
1163            x_ls,
1164            &self.policy,
1165        )?
1166        else {
1167            return Ok(None);
1168        };
1169        let q0 = &block_states[Self::BLOCK_MU].eta;
1170        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1171        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1172        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1173        let q = q0 + etaw;
1174        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1175        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1176        let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
1177        let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
1178
1179        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1180        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1181        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1182        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1183        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1184        // logb κ-chain on η_ls; e_a = ∂η_ls/∂ψ_a row-direction.
1185        let e_a = &dir_a.z_ls_psi;
1186        let amn = &rows.obs_weight - &rows.n;
1187        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1188        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1189        let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
1190        let s_mu = -&rows.m * &geom.dq_dq0;
1191        let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
1192        let s_ls = &rows.kappa * &amn;
1193        let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
1194        let s_w = -&rows.m;
1195        let s_w_a = -&dm_a;
1196
1197        let objective_psi = (-&rows.m * &q_a + &s_ls * e_a).sum();
1198        let score_psi = gaussian_pack_wiggle_joint_score(
1199            &(xmu_map.transpose_mul(s_mu.view()) + fast_atv(xmu, &s_mu_a)),
1200            &(x_ls_map.transpose_mul(s_ls.view()) + fast_atv(x_ls, &s_ls_a)),
1201            &(fast_atv(&basis_a, &s_w) + fast_atv(&geom.basis, &s_w_a)),
1202        );
1203
1204        // Static blocks under logb. Gaussian mean⊥scale Fisher orthogonality:
1205        // μ AND the wiggle both enter the MEAN q = q0 + B(q0)·βw, so log σ is
1206        // the only scale-side block. The Fisher (expected) cross between any
1207        // mean-side parameter and log σ is exactly 0 because it carries
1208        // m = r·weight/σ² and E[m] = E[r]·weight/σ² = 0:
1209        //   coeff_ml = E[H_{μ,ls}] = 0  (observed 2κmD)
1210        //   l        = E[H_{ls,w}] = 0  (observed 2κm)
1211        // A function identically 0 has 0 ψ-derivatives, so coeff_ml_a and l_a
1212        // vanish too. This mirrors the non-wiggle psi path
1213        // (gaussian_joint_psi_firstweights: hmu_ls = dhmu_ls = 0) and the
1214        // wiggle Newton/REML Hessian path (wiggle_hessian_row_pieces:
1215        // coeff_ml = coeff_lw_b = 0). The observed SCORE (s_mu/s_ls/s_w above)
1216        // stays exact so Fisher scoring still hits the joint MLE; only the
1217        // curvature feeding the REML determinant / IFT correction is the
1218        // (orthogonal) expectation. coeff_ll is the residual-free Fisher
1219        // 2κ²a (#566); its ψ-derivative coeff_ll_a = 4κκ'a·e_a depends only on
1220        // η_ls. Same-side blocks (coeff_mm within mean, a/c the μ↔wiggle
1221        // within-mean cross, coeff_ww within mean) are untouched.
1222        let n = rows.m.len();
1223        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
1224        let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
1225            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
1226            - &(&dm_a * &geom.d2q_dq02)
1227            - &(&rows.m * &g2_a);
1228        let coeff_ml = Array1::<f64>::zeros(n);
1229        let coeff_ml_a = Array1::<f64>::zeros(n);
1230        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
1231        let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
1232        let a = &rows.w * &geom.dq_dq0;
1233        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1234        let c = -&rows.m;
1235        let c_a = -&dm_a;
1236        let l = Array1::<f64>::zeros(n);
1237        let l_a = Array1::<f64>::zeros(n);
1238        let h_mm_a1 = weighted_crossprod_psi_maps(
1239            xmu_map,
1240            coeff_mm.view(),
1241            CustomFamilyPsiLinearMapRef::Dense(xmu),
1242        )?;
1243        let h_mm = &h_mm_a1 + &h_mm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a)?;
1244        let h_ml = weighted_crossprod_psi_maps(
1245            xmu_map,
1246            coeff_ml.view(),
1247            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1248        )? + &weighted_crossprod_psi_maps(
1249            CustomFamilyPsiLinearMapRef::Dense(xmu),
1250            coeff_ml.view(),
1251            x_ls_map,
1252        )? + &xt_diag_y_dense(xmu, &coeff_ml_a, x_ls)?;
1253        let h_ll_a1 = weighted_crossprod_psi_maps(
1254            x_ls_map,
1255            coeff_ll.view(),
1256            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1257        )?;
1258        let h_ll = &h_ll_a1 + &h_ll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a)?;
1259        let h_mw = weighted_crossprod_psi_maps(
1260            xmu_map,
1261            a.view(),
1262            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1263        )? + &xt_diag_y_dense(xmu, &a_a, &geom.basis)?
1264            + &xt_diag_y_dense(xmu, &a, &basis_a)?
1265            + &weighted_crossprod_psi_maps(
1266                xmu_map,
1267                c.view(),
1268                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1269            )?
1270            + &xt_diag_y_dense(xmu, &c_a, &geom.basis_d1)?
1271            + &xt_diag_y_dense(xmu, &c, &basis1_a)?;
1272        let h_lw = weighted_crossprod_psi_maps(
1273            x_ls_map,
1274            l.view(),
1275            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1276        )? + &xt_diag_y_dense(x_ls, &l_a, &geom.basis)?
1277            + &xt_diag_y_dense(x_ls, &l, &basis_a)?;
1278        let h_ww_a1 = xt_diag_y_dense(&basis_a, &rows.w, &geom.basis)?;
1279        let h_ww = &h_ww_a1 + &h_ww_a1.t() + &xt_diag_x_dense(&geom.basis, &dw_a)?;
1280
1281        Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
1282            objective_psi,
1283            score_psi,
1284            hessian_psi: gaussian_pack_wiggle_joint_symmetrichessian(
1285                &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1286            ),
1287            hessian_psi_operator: None,
1288        }))
1289    }
1290
1291    pub(crate) fn exact_newton_joint_psisecond_order_terms_from_designs(
1292        &self,
1293        block_states: &[ParameterBlockState],
1294        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1295        psi_i: usize,
1296        psi_j: usize,
1297        xmu: &Array2<f64>,
1298        x_ls: &Array2<f64>,
1299    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
1300        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1301            block_states,
1302            derivative_blocks,
1303            psi_i,
1304            xmu,
1305            x_ls,
1306            &self.policy,
1307        )?
1308        else {
1309            return Ok(None);
1310        };
1311        let Some(dir_b) = self.exact_newton_joint_psi_direction(
1312            block_states,
1313            derivative_blocks,
1314            psi_j,
1315            xmu,
1316            x_ls,
1317            &self.policy,
1318        )?
1319        else {
1320            return Ok(None);
1321        };
1322        Ok(Some(
1323            self.exact_newton_joint_psisecond_order_terms_from_parts(
1324                block_states,
1325                derivative_blocks,
1326                &dir_a,
1327                &dir_b,
1328                xmu,
1329                x_ls,
1330            )?,
1331        ))
1332    }
1333
1334    pub(crate) fn exact_newton_joint_psisecond_order_terms_from_parts(
1335        &self,
1336        block_states: &[ParameterBlockState],
1337        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1338        dir_a: &LocationScaleJointPsiDirection,
1339        dir_b: &LocationScaleJointPsiDirection,
1340        xmu: &Array2<f64>,
1341        x_ls: &Array2<f64>,
1342    ) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
1343        let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
1344            block_states,
1345            derivative_blocks,
1346            dir_a,
1347            dir_b,
1348            xmu,
1349            x_ls,
1350        )?;
1351        let n = self.y.len();
1352        let xmu_a_map = dir_a.x_primary_psi.as_linear_map_ref();
1353        let x_ls_a_map = dir_a.x_ls_psi.as_linear_map_ref();
1354        let xmu_b_map = dir_b.x_primary_psi.as_linear_map_ref();
1355        let x_ls_b_map = dir_b.x_ls_psi.as_linear_map_ref();
1356        let xmu_ab_map = second_psi_linear_map(
1357            second_drifts.x_primary_ab_action.as_ref(),
1358            second_drifts.x_primary_ab.as_ref(),
1359            n,
1360            xmu.ncols(),
1361        );
1362        let x_ls_ab_map = second_psi_linear_map(
1363            second_drifts.x_ls_ab_action.as_ref(),
1364            second_drifts.x_ls_ab.as_ref(),
1365            n,
1366            x_ls.ncols(),
1367        );
1368        let q0 = &block_states[Self::BLOCK_MU].eta;
1369        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1370        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1371        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1372        let q = q0 + etaw;
1373        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1374        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1375
1376        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1377        let q_b = &geom.dq_dq0 * &dir_b.z_primary_psi;
1378        let q_ab = &(&geom.dq_dq0 * &second_drifts.z_primary_ab)
1379            + &(&geom.d2q_dq02 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi));
1380        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1381        let s1_b = &geom.d2q_dq02 * &dir_b.z_primary_psi;
1382        let s1_ab = &(&geom.d3q_dq03 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
1383            + &(&geom.d2q_dq02 * &second_drifts.z_primary_ab);
1384        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1385        let g2_b = &geom.d3q_dq03 * &dir_b.z_primary_psi;
1386        let g2_ab = &(&geom.d4q_dq04 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
1387            + &(&geom.d3q_dq03 * &second_drifts.z_primary_ab);
1388        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1389        let basis_b = scale_matrix_rows(&geom.basis_d1, &dir_b.z_primary_psi)?;
1390        let basis_ab = scale_matrix_rows(&geom.basis_d1, &second_drifts.z_primary_ab)?
1391            + &scale_matrix_rows(
1392                &geom.basis_d2,
1393                &(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
1394            )?;
1395        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1396        let basis1_b = scale_matrix_rows(&geom.basis_d2, &dir_b.z_primary_psi)?;
1397        let basis1_ab = scale_matrix_rows(&geom.basis_d2, &second_drifts.z_primary_ab)?
1398            + &scale_matrix_rows(
1399                &geom.basis_d3,
1400                &(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
1401            )?;
1402
1403        // logb κ-chain on η_ls; κ' = κ(1−κ), κ'' = κ(1−κ)(1−2κ),
1404        // κ''' = κ''(1−2κ) − 2(κ')².
1405        let e_a = &dir_a.z_ls_psi;
1406        let e_b = &dir_b.z_ls_psi;
1407        let e_ab = &second_drifts.z_ls_ab;
1408        let amn = &rows.obs_weight - &rows.n;
1409        // 4κ² − 2κ' (∂²w/∂η² style coefficient when both directions hit η_ls).
1410        let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
1411
1412        // Row drifts under logb. The η_ls direction picks up a κ on each step,
1413        // and η_ls·η_ls picks up (4κ²−2κ') from differentiating κ on the
1414        // second leg. The η_ab (z_ls_ab) leg uses just one κ from the chain.
1415        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1416        let dw_b = -2.0 * &rows.w * &rows.kappa * e_b;
1417        let dw_ab =
1418            &four_k2_minus_2kpi * &rows.w * &(e_a * e_b) - &(2.0 * &rows.w * &rows.kappa * e_ab);
1419        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1420        let dm_b = -(&rows.w * &q_b) - &(2.0 * &rows.m * &rows.kappa * e_b);
1421        let dm_ab = &(2.0 * &rows.w * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1422            - &(&rows.w * &q_ab)
1423            + &(&four_k2_minus_2kpi * &rows.m * &(e_a * e_b))
1424            - &(2.0 * &rows.m * &rows.kappa * e_ab);
1425        let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
1426        let dn_b = -(2.0 * &rows.m * &q_b) - &(2.0 * &rows.n * &rows.kappa * e_b);
1427        let dn_ab = &(2.0 * &rows.w * &(&q_a * &q_b))
1428            + &(4.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1429            - &(2.0 * &rows.m * &q_ab)
1430            + &(&four_k2_minus_2kpi * &rows.n * &(e_a * e_b))
1431            - &(2.0 * &rows.n * &rows.kappa * e_ab);
1432
1433        let s_mu = -&rows.m * &geom.dq_dq0;
1434        let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
1435        let s_mu_b = -(&dm_b * &geom.dq_dq0) - &(&rows.m * &s1_b);
1436        let s_mu_ab =
1437            -(&dm_ab * &geom.dq_dq0) - &(&dm_a * &s1_b) - &(&dm_b * &s1_a) - &(&rows.m * &s1_ab);
1438        // score_ls = κ(a−n); ψ derivatives carry κ' / κ'' from chain on κ.
1439        let s_ls = &rows.kappa * &amn;
1440        let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
1441        let s_ls_b = &rows.kappa_prime * &(e_b * &amn) - &rows.kappa * &dn_b;
1442        // s_ls_ab = κ''·e_a·e_b·(a−n) + κ'·e_ab·(a−n)
1443        //         − κ'·(e_a·n_b + e_b·n_a) − κ·n_ab
1444        let s_ls_ab = &rows.kappa_dprime * &(e_a * e_b) * &amn + &rows.kappa_prime * e_ab * &amn
1445            - &rows.kappa_prime * &(e_a * &dn_b + e_b * &dn_a)
1446            - &rows.kappa * &dn_ab;
1447        let s_w = -&rows.m;
1448        let s_w_a = -&dm_a;
1449        let s_w_b = -&dm_b;
1450        let s_w_ab = -&dm_ab;
1451
1452        let objective_psi_psi = (&rows.w * &(&q_a * &q_b)
1453            + &(2.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1454            + &((2.0 * &rows.kappa * &rows.kappa * &rows.n + &rows.kappa_prime * &amn)
1455                * &(e_a * e_b))
1456            - &(&rows.m * &q_ab)
1457            + &(&rows.kappa * &amn * e_ab))
1458            .sum();
1459
1460        let score_psi_psi = gaussian_pack_wiggle_joint_score(
1461            &(xmu_ab_map.transpose_mul(s_mu.view())
1462                + xmu_a_map.transpose_mul(s_mu_b.view())
1463                + xmu_b_map.transpose_mul(s_mu_a.view())
1464                + fast_atv(xmu, &s_mu_ab)),
1465            &(x_ls_ab_map.transpose_mul(s_ls.view())
1466                + x_ls_a_map.transpose_mul(s_ls_b.view())
1467                + x_ls_b_map.transpose_mul(s_ls_a.view())
1468                + fast_atv(x_ls, &s_ls_ab)),
1469            &(fast_atv(&basis_ab, &s_w)
1470                + fast_atv(&basis_a, &s_w_b)
1471                + fast_atv(&basis_b, &s_w_a)
1472                + fast_atv(&geom.basis, &s_w_ab)),
1473        );
1474
1475        // Static blocks under logb. coeff_mm has no κ; coeff_ll = Fisher 2κ²a
1476        // (#566). Gaussian mean⊥scale Fisher orthogonality: the wiggle and μ
1477        // both enter the mean (q = q0 + B·βw), log σ is the only scale block,
1478        // so coeff_ml = E[H_{μ,ls}] = 0 and l = E[H_{ls,w}] = 0 (observed 2κm,
1479        // E[m]=0). All of their ψ-directional derivatives (a/b/ab) are 0 since
1480        // a function identically 0 has 0 derivatives. The Fisher (ls,ls) block
1481        // depends only on η_ls so its derivatives carry only κ.
1482        let n = rows.m.len();
1483        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
1484        let coeff_ml = Array1::<f64>::zeros(n);
1485        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
1486        // coeff_mm_a/b/ab: structurally κ-free; correctness now follows from
1487        // dw_a/_b/_ab and dm_a/_b/_ab carrying the κ chain on η_ls (above).
1488        let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
1489            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
1490            - &(&dm_a * &geom.d2q_dq02)
1491            - &(&rows.m * &g2_a);
1492        let coeff_mm_b = &(&dw_b * &geom.dq_dq0.mapv(|v| v * v))
1493            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_b)
1494            - &(&dm_b * &geom.d2q_dq02)
1495            - &(&rows.m * &g2_b);
1496        let coeff_mm_ab = &(&dw_ab * &geom.dq_dq0.mapv(|v| v * v))
1497            + &(2.0 * &dw_a * &geom.dq_dq0 * &s1_b)
1498            + &(2.0 * &dw_b * &geom.dq_dq0 * &s1_a)
1499            + &(2.0 * &rows.w * &s1_a * &s1_b)
1500            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_ab)
1501            - &(&dm_ab * &geom.d2q_dq02)
1502            - &(&dm_a * &g2_b)
1503            - &(&dm_b * &g2_a)
1504            - &(&rows.m * &g2_ab);
1505        // coeff_ml (μ↔logσ) is Fisher 0; its 1st/2nd ψ-directional derivatives
1506        // are 0 as well.
1507        let coeff_ml_a = Array1::<f64>::zeros(n);
1508        let coeff_ml_b = Array1::<f64>::zeros(n);
1509        let coeff_ml_ab = Array1::<f64>::zeros(n);
1510        // Fisher (ls,ls) coeff_ll = 2κ²a (a constant prior weight) depends only
1511        // on η_ls (#566): ∂(2κ²a)/∂η = 4κκ'a, so the ψ-first derivatives are
1512        // 4κκ'a·e_a / e_b. The η_ab leg carries one κ on top.
1513        let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
1514        let coeff_ll_b = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_b;
1515        // coeff_ll_ab = ∂²(2κ²a)/∂a∂b = 4a(κ'²+κκ'')·e_a·e_b + 4κκ'a·e_ab
1516        // (mirrors the dense helper `d2h_ls_ls`).
1517        let coeff_ll_ab = 4.0
1518            * &rows.obs_weight
1519            * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
1520            * &(e_a * e_b)
1521            + 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_ab;
1522        let a = &rows.w * &geom.dq_dq0;
1523        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1524        let a_b = &dw_b * &geom.dq_dq0 + &rows.w * &s1_b;
1525        let a_ab = &dw_ab * &geom.dq_dq0 + &dw_a * &s1_b + &dw_b * &s1_a + &rows.w * &s1_ab;
1526        let c = -&rows.m;
1527        let c_a = -&dm_a;
1528        let c_b = -&dm_b;
1529        let c_ab = -&dm_ab;
1530        // l (logσ↔wiggle) is Fisher 0 (wiggle is mean-side; mean⊥scale), so all
1531        // of its 1st/2nd ψ-directional derivatives vanish.
1532        let l = Array1::<f64>::zeros(n);
1533        let l_a = Array1::<f64>::zeros(n);
1534        let l_b = Array1::<f64>::zeros(n);
1535        let l_ab = Array1::<f64>::zeros(n);
1536
1537        let hmm_ab = weighted_crossprod_psi_maps(
1538            xmu_ab_map,
1539            coeff_mm.view(),
1540            CustomFamilyPsiLinearMapRef::Dense(xmu),
1541        )?;
1542        let hmm_ij = weighted_crossprod_psi_maps(xmu_a_map, coeff_mm.view(), xmu_b_map)?;
1543        let hmm_iwj = weighted_crossprod_psi_maps(
1544            xmu_a_map,
1545            coeff_mm_b.view(),
1546            CustomFamilyPsiLinearMapRef::Dense(xmu),
1547        )?;
1548        let hmm_jwi = weighted_crossprod_psi_maps(
1549            xmu_b_map,
1550            coeff_mm_a.view(),
1551            CustomFamilyPsiLinearMapRef::Dense(xmu),
1552        )?;
1553        let h_mm = &hmm_ab
1554            + &hmm_ab.t()
1555            + &hmm_ij
1556            + hmm_ij.t()
1557            + &hmm_iwj
1558            + hmm_iwj.t()
1559            + &hmm_jwi
1560            + hmm_jwi.t()
1561            + &xt_diag_x_dense(xmu, &coeff_mm_ab)?;
1562        let h_ml = weighted_crossprod_psi_maps(
1563            xmu_ab_map,
1564            coeff_ml.view(),
1565            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1566        )? + &weighted_crossprod_psi_maps(xmu_a_map, coeff_ml.view(), x_ls_b_map)?
1567            + &weighted_crossprod_psi_maps(xmu_b_map, coeff_ml.view(), x_ls_a_map)?
1568            + &weighted_crossprod_psi_maps(
1569                xmu_a_map,
1570                coeff_ml_b.view(),
1571                CustomFamilyPsiLinearMapRef::Dense(x_ls),
1572            )?
1573            + &weighted_crossprod_psi_maps(
1574                xmu_b_map,
1575                coeff_ml_a.view(),
1576                CustomFamilyPsiLinearMapRef::Dense(x_ls),
1577            )?
1578            + &weighted_crossprod_psi_maps(
1579                CustomFamilyPsiLinearMapRef::Dense(xmu),
1580                coeff_ml_a.view(),
1581                x_ls_b_map,
1582            )?
1583            + &weighted_crossprod_psi_maps(
1584                CustomFamilyPsiLinearMapRef::Dense(xmu),
1585                coeff_ml_b.view(),
1586                x_ls_a_map,
1587            )?
1588            + &xt_diag_y_dense(xmu, &coeff_ml_ab, x_ls)?
1589            + &weighted_crossprod_psi_maps(
1590                CustomFamilyPsiLinearMapRef::Dense(xmu),
1591                coeff_ml.view(),
1592                x_ls_ab_map,
1593            )?;
1594        let hll_ab = weighted_crossprod_psi_maps(
1595            x_ls_ab_map,
1596            coeff_ll.view(),
1597            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1598        )?;
1599        let hll_ij = weighted_crossprod_psi_maps(x_ls_a_map, coeff_ll.view(), x_ls_b_map)?;
1600        let hll_iwj = weighted_crossprod_psi_maps(
1601            x_ls_a_map,
1602            coeff_ll_b.view(),
1603            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1604        )?;
1605        let hll_jwi = weighted_crossprod_psi_maps(
1606            x_ls_b_map,
1607            coeff_ll_a.view(),
1608            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1609        )?;
1610        let h_ll = &hll_ab
1611            + &hll_ab.t()
1612            + &hll_ij
1613            + hll_ij.t()
1614            + &hll_iwj
1615            + hll_iwj.t()
1616            + &hll_jwi
1617            + hll_jwi.t()
1618            + &xt_diag_x_dense(x_ls, &coeff_ll_ab)?;
1619        let h_mw = weighted_crossprod_psi_maps(
1620            xmu_ab_map,
1621            a.view(),
1622            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1623        )? + &weighted_crossprod_psi_maps(
1624            xmu_a_map,
1625            a_b.view(),
1626            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1627        )? + &weighted_crossprod_psi_maps(
1628            xmu_a_map,
1629            a.view(),
1630            CustomFamilyPsiLinearMapRef::Dense(&basis_b),
1631        )? + &weighted_crossprod_psi_maps(
1632            xmu_b_map,
1633            a_a.view(),
1634            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1635        )? + &xt_diag_y_dense(xmu, &a_ab, &geom.basis)?
1636            + &xt_diag_y_dense(xmu, &a_a, &basis_b)?
1637            + &weighted_crossprod_psi_maps(
1638                xmu_b_map,
1639                a.view(),
1640                CustomFamilyPsiLinearMapRef::Dense(&basis_a),
1641            )?
1642            + &xt_diag_y_dense(xmu, &a_b, &basis_a)?
1643            + &xt_diag_y_dense(xmu, &a, &basis_ab)?
1644            + &weighted_crossprod_psi_maps(
1645                xmu_ab_map,
1646                c.view(),
1647                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1648            )?
1649            + &weighted_crossprod_psi_maps(
1650                xmu_a_map,
1651                c_b.view(),
1652                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1653            )?
1654            + &weighted_crossprod_psi_maps(
1655                xmu_a_map,
1656                c.view(),
1657                CustomFamilyPsiLinearMapRef::Dense(&basis1_b),
1658            )?
1659            + &weighted_crossprod_psi_maps(
1660                xmu_b_map,
1661                c_a.view(),
1662                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1663            )?
1664            + &xt_diag_y_dense(xmu, &c_ab, &geom.basis_d1)?
1665            + &xt_diag_y_dense(xmu, &c_a, &basis1_b)?
1666            + &weighted_crossprod_psi_maps(
1667                xmu_b_map,
1668                c.view(),
1669                CustomFamilyPsiLinearMapRef::Dense(&basis1_a),
1670            )?
1671            + &xt_diag_y_dense(xmu, &c_b, &basis1_a)?
1672            + &xt_diag_y_dense(xmu, &c, &basis1_ab)?;
1673        let h_lw = weighted_crossprod_psi_maps(
1674            x_ls_ab_map,
1675            l.view(),
1676            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1677        )? + &weighted_crossprod_psi_maps(
1678            x_ls_a_map,
1679            l_b.view(),
1680            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1681        )? + &weighted_crossprod_psi_maps(
1682            x_ls_a_map,
1683            l.view(),
1684            CustomFamilyPsiLinearMapRef::Dense(&basis_b),
1685        )? + &weighted_crossprod_psi_maps(
1686            x_ls_b_map,
1687            l_a.view(),
1688            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1689        )? + &xt_diag_y_dense(x_ls, &l_ab, &geom.basis)?
1690            + &xt_diag_y_dense(x_ls, &l_a, &basis_b)?
1691            + &weighted_crossprod_psi_maps(
1692                x_ls_b_map,
1693                l.view(),
1694                CustomFamilyPsiLinearMapRef::Dense(&basis_a),
1695            )?
1696            + &xt_diag_y_dense(x_ls, &l_b, &basis_a)?
1697            + &xt_diag_y_dense(x_ls, &l, &basis_ab)?;
1698        let hww_ab = xt_diag_y_dense(&basis_ab, &rows.w, &geom.basis)?;
1699        let hww_ij = xt_diag_y_dense(&basis_a, &rows.w, &basis_b)?;
1700        let hww_iwj = xt_diag_y_dense(&basis_a, &dw_b, &geom.basis)?;
1701        let hww_jwi = xt_diag_y_dense(&basis_b, &dw_a, &geom.basis)?;
1702        let h_ww = &hww_ab
1703            + &hww_ab.t()
1704            + &hww_ij
1705            + hww_ij.t()
1706            + &hww_iwj
1707            + hww_iwj.t()
1708            + &hww_jwi
1709            + hww_jwi.t()
1710            + &xt_diag_x_dense(&geom.basis, &dw_ab)?;
1711
1712        Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
1713            objective_psi_psi,
1714            score_psi_psi,
1715            hessian_psi_psi: gaussian_pack_wiggle_joint_symmetrichessian(
1716                &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1717            ),
1718            hessian_psi_psi_operator: None,
1719        })
1720    }
1721
1722    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_from_designs(
1723        &self,
1724        block_states: &[ParameterBlockState],
1725        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1726        psi_index: usize,
1727        d_beta_flat: &Array1<f64>,
1728        xmu: &Array2<f64>,
1729        x_ls: &Array2<f64>,
1730    ) -> Result<Option<Array2<f64>>, String> {
1731        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1732            block_states,
1733            derivative_blocks,
1734            psi_index,
1735            xmu,
1736            x_ls,
1737            &self.policy,
1738        )?
1739        else {
1740            return Ok(None);
1741        };
1742        Ok(Some(
1743            self.exact_newton_joint_psihessian_directional_derivative_from_parts(
1744                block_states,
1745                &dir_a,
1746                d_beta_flat,
1747                xmu,
1748                x_ls,
1749            )?,
1750        ))
1751    }
1752
1753    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_from_parts(
1754        &self,
1755        block_states: &[ParameterBlockState],
1756        dir_a: &LocationScaleJointPsiDirection,
1757        d_beta_flat: &Array1<f64>,
1758        xmu: &Array2<f64>,
1759        x_ls: &Array2<f64>,
1760    ) -> Result<Array2<f64>, String> {
1761        let pmu = xmu.ncols();
1762        let p_ls = x_ls.ncols();
1763        let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
1764        let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
1765        let q0 = &block_states[Self::BLOCK_MU].eta;
1766        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1767        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1768        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1769        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
1770        let (umu, u_ls, uw) = layout.split_three(
1771            d_beta_flat,
1772            "GaussianLocationScaleWiggleFamily joint psi hessian directional derivative",
1773        )?;
1774        let q = q0 + etaw;
1775        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1776        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1777
1778        let xi = fast_av(xmu, &umu);
1779        let zeta = fast_av(x_ls, &u_ls);
1780        let zmu_a_u = xmu_map.forward_mul(umu.view());
1781        let zls_a_u = x_ls_map.forward_mul(u_ls.view());
1782        let b1u = fast_av(&geom.basis_d1, &uw);
1783        let b2u = fast_av(&geom.basis_d2, &uw);
1784        let b3u = fast_av(&geom.basis_d3, &uw);
1785
1786        let q_u = &(&geom.dq_dq0 * &xi) + &fast_av(&geom.basis, &uw);
1787        let s1_u = &(&geom.d2q_dq02 * &xi) + &b1u;
1788        let g2_u = &(&geom.d3q_dq03 * &xi) + &b2u;
1789        let g3_u = &(&geom.d4q_dq04 * &xi) + &b3u;
1790
1791        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1792        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1793        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1794        let q_a_u = &(&s1_u * &dir_a.z_primary_psi) + &(&geom.dq_dq0 * &zmu_a_u);
1795        let s1_a_u = &(&g2_u * &dir_a.z_primary_psi) + &(&geom.d2q_dq02 * &zmu_a_u);
1796        let g2_a_u = &(&g3_u * &dir_a.z_primary_psi) + &(&geom.d3q_dq03 * &zmu_a_u);
1797
1798        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
1799        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
1800        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1801        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1802        let basis_a_u = scale_matrix_rows(&geom.basis_d2, &(&xi * &dir_a.z_primary_psi))?
1803            + &scale_matrix_rows(&geom.basis_d1, &zmu_a_u)?;
1804        let basis1_a_u = scale_matrix_rows(&geom.basis_d3, &(&xi * &dir_a.z_primary_psi))?
1805            + &scale_matrix_rows(&geom.basis_d2, &zmu_a_u)?;
1806
1807        // logb κ-chain on η_ls; e_a = ψ_a's η_ls direction, ζ = β-direction.
1808        // η_au = zls_a_u is the second mixed derivative (β·ψ).
1809        let e_a = &dir_a.z_ls_psi;
1810        let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
1811        let dw_u = -2.0 * &rows.w * &rows.kappa * &zeta;
1812        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &rows.kappa * &zeta);
1813        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1814        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1815        let dw_a_u = &four_k2_minus_2kpi * &rows.w * &(e_a * &zeta)
1816            - &(2.0 * &rows.w * &rows.kappa * &zls_a_u);
1817        let dm_a_u = &(2.0 * &rows.w * &rows.kappa * &(&q_a * &zeta + &q_u * e_a))
1818            - &(&rows.w * &q_a_u)
1819            + &(&four_k2_minus_2kpi * &rows.m * &(e_a * &zeta))
1820            - &(2.0 * &rows.m * &rows.kappa * &zls_a_u);
1821
1822        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
1823            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
1824            - &(&dm_u * &geom.d2q_dq02)
1825            - &(&rows.m * &g2_u);
1826        // coeff_ml (μ↔logσ) is mean⊥scale Fisher 0 (E[m]=0), so both its
1827        // β-drift derivative coeff_ml_u and the mixed coeff_ml_a_u are 0.
1828        let n = rows.m.len();
1829        let coeff_ml_u = Array1::<f64>::zeros(n);
1830        // Fisher (ls,ls) coeff_ll = 2κ²a (#566); ∂(2κ²a)/∂η = 4κκ'a, so the
1831        // β-drift derivative along ζ is 4κκ'a·ζ.
1832        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * &zeta;
1833        let coeff_mm_a_u = &(&dw_a_u * &geom.dq_dq0.mapv(|v| v * v))
1834            + &(2.0 * &dw_a * &geom.dq_dq0 * &s1_u)
1835            + &(2.0 * &dw_u * &geom.dq_dq0 * &s1_a)
1836            + &(2.0 * &rows.w * &s1_u * &s1_a)
1837            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a_u)
1838            - &(&dm_a_u * &geom.d2q_dq02)
1839            - &(&dm_a * &g2_u)
1840            - &(&dm_u * &g2_a)
1841            - &(&rows.m * &g2_a_u);
1842        // coeff_ml_a_u = ∂²(coeff_ml)/∂a∂u = 0 (coeff_ml ≡ Fisher 0).
1843        let coeff_ml_a_u = Array1::<f64>::zeros(n);
1844        // coeff_ll_a_u = ∂²(2κ²a)/∂a∂u for the Fisher (ls,ls) block (#566):
1845        // 4a(κ'²+κκ'')·e_a·ζ + 4κκ'a·η_au (the η_au=zls_a_u mixed leg), mirroring
1846        // the dense mixed-drift helper.
1847        let coeff_ll_a_u = 4.0
1848            * &rows.obs_weight
1849            * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
1850            * &(e_a * &zeta)
1851            + 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * &zls_a_u;
1852
1853        let a = &rows.w * &geom.dq_dq0;
1854        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
1855        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1856        let a_a_u = &dw_a_u * &geom.dq_dq0 + &dw_a * &s1_u + &dw_u * &s1_a + &rows.w * &s1_a_u;
1857        let c = -&rows.m;
1858        let c_u = -&dm_u;
1859        let c_a = -&dm_a;
1860        let c_a_u = -&dm_a_u;
1861        // l (logσ↔wiggle) is mean⊥scale Fisher 0 (wiggle is mean-side), so its
1862        // β-drift (l_u), ψ (l_a), and mixed (l_a_u) derivatives all vanish.
1863        let l = Array1::<f64>::zeros(n);
1864        let l_u = Array1::<f64>::zeros(n);
1865        let l_a = Array1::<f64>::zeros(n);
1866        let l_a_u = Array1::<f64>::zeros(n);
1867
1868        let hmm_a1 = weighted_crossprod_psi_maps(
1869            xmu_map,
1870            coeff_mm_u.view(),
1871            CustomFamilyPsiLinearMapRef::Dense(xmu),
1872        )?;
1873        let h_mm = &hmm_a1 + &hmm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a_u)?;
1874        let h_ml = weighted_crossprod_psi_maps(
1875            xmu_map,
1876            coeff_ml_u.view(),
1877            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1878        )? + &weighted_crossprod_psi_maps(
1879            CustomFamilyPsiLinearMapRef::Dense(xmu),
1880            coeff_ml_u.view(),
1881            x_ls_map,
1882        )? + &xt_diag_y_dense(xmu, &coeff_ml_a_u, x_ls)?;
1883        let hll_a1 = weighted_crossprod_psi_maps(
1884            x_ls_map,
1885            coeff_ll_u.view(),
1886            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1887        )?;
1888        let h_ll = &hll_a1 + &hll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a_u)?;
1889        let h_mw = weighted_crossprod_psi_maps(
1890            xmu_map,
1891            a_u.view(),
1892            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1893        )? + &weighted_crossprod_psi_maps(
1894            xmu_map,
1895            a.view(),
1896            CustomFamilyPsiLinearMapRef::Dense(&basis_u),
1897        )? + &xt_diag_y_dense(xmu, &a_a_u, &geom.basis)?
1898            + &xt_diag_y_dense(xmu, &a_a, &basis_u)?
1899            + &xt_diag_y_dense(xmu, &a_u, &basis_a)?
1900            + &xt_diag_y_dense(xmu, &a, &basis_a_u)?
1901            + &weighted_crossprod_psi_maps(
1902                xmu_map,
1903                c_u.view(),
1904                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1905            )?
1906            + &weighted_crossprod_psi_maps(
1907                xmu_map,
1908                c.view(),
1909                CustomFamilyPsiLinearMapRef::Dense(&basis1_u),
1910            )?
1911            + &xt_diag_y_dense(xmu, &c_a_u, &geom.basis_d1)?
1912            + &xt_diag_y_dense(xmu, &c_a, &basis1_u)?
1913            + &xt_diag_y_dense(xmu, &c_u, &basis1_a)?
1914            + &xt_diag_y_dense(xmu, &c, &basis1_a_u)?;
1915        let h_lw = weighted_crossprod_psi_maps(
1916            x_ls_map,
1917            l_u.view(),
1918            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1919        )? + &weighted_crossprod_psi_maps(
1920            x_ls_map,
1921            l.view(),
1922            CustomFamilyPsiLinearMapRef::Dense(&basis_u),
1923        )? + &xt_diag_y_dense(x_ls, &l_a_u, &geom.basis)?
1924            + &xt_diag_y_dense(x_ls, &l_a, &basis_u)?
1925            + &xt_diag_y_dense(x_ls, &l_u, &basis_a)?
1926            + &xt_diag_y_dense(x_ls, &l, &basis_a_u)?;
1927        let hww_a_u = xt_diag_y_dense(&basis_a_u, &rows.w, &geom.basis)?;
1928        let hww_aw = xt_diag_y_dense(&basis_a, &dw_u, &geom.basis)?;
1929        let hww_au = xt_diag_y_dense(&basis_a, &rows.w, &basis_u)?;
1930        let h_ww = &hww_a_u
1931            + &hww_a_u.t()
1932            + &hww_aw
1933            + hww_aw.t()
1934            + &hww_au
1935            + hww_au.t()
1936            + &xt_diag_x_dense(&geom.basis, &dw_a_u)?;
1937
1938        Ok(gaussian_pack_wiggle_joint_symmetrichessian(
1939            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1940        ))
1941    }
1942
1943    pub(crate) fn exact_newton_joint_psi_terms_for_specs(
1944        &self,
1945        block_states: &[ParameterBlockState],
1946        specs: &[ParameterBlockSpec],
1947        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1948        psi_index: usize,
1949    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
1950        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1951            return Ok(None);
1952        };
1953        self.exact_newton_joint_psi_terms_from_designs(
1954            block_states,
1955            derivative_blocks,
1956            psi_index,
1957            &xmu,
1958            &x_ls,
1959        )
1960    }
1961
1962    pub(crate) fn exact_newton_joint_psisecond_order_terms_for_specs(
1963        &self,
1964        block_states: &[ParameterBlockState],
1965        specs: &[ParameterBlockSpec],
1966        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1967        psi_i: usize,
1968        psi_j: usize,
1969    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
1970        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1971            return Ok(None);
1972        };
1973        self.exact_newton_joint_psisecond_order_terms_from_designs(
1974            block_states,
1975            derivative_blocks,
1976            psi_i,
1977            psi_j,
1978            &xmu,
1979            &x_ls,
1980        )
1981    }
1982
1983    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_for_specs(
1984        &self,
1985        block_states: &[ParameterBlockState],
1986        specs: &[ParameterBlockSpec],
1987        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1988        psi_index: usize,
1989        d_beta_flat: &Array1<f64>,
1990    ) -> Result<Option<Array2<f64>>, String> {
1991        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1992            return Ok(None);
1993        };
1994        self.exact_newton_joint_psihessian_directional_derivative_from_designs(
1995            block_states,
1996            derivative_blocks,
1997            psi_index,
1998            d_beta_flat,
1999            &xmu,
2000            &x_ls,
2001        )
2002    }
2003}
2004
2005impl CustomFamily for GaussianLocationScaleWiggleFamily {
2006    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
2007    // flat-prior exact-Newton objective carries no Jeffreys term), so families
2008    // that historically armed the term by default opt back in explicitly.
2009    fn joint_jeffreys_term_required(&self) -> bool {
2010        true
2011    }
2012
2013    fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
2014        true
2015    }
2016
2017    /// Non-spatial location-scale seeding classification — see
2018    /// `GaussianLocationScaleFamily::outer_seed_config` for the full rationale.
2019    /// The wiggle variant has the identical non-profiled log-σ predictor, so it
2020    /// needs the same `GaussianLocationScale` classification (flexible Gaussian
2021    /// seed grid + lowest-cost keep-best + interior-extreme promotion) on the
2022    /// rho-only path; without it the log-σ block over-smooths exactly as the
2023    /// non-wiggle family does.
2024    fn outer_seed_config(&self, n_params: usize) -> crate::seeding::SeedConfig {
2025        if n_params == 0 {
2026            return crate::seeding::SeedConfig::default();
2027        }
2028        let mut config = crate::seeding::SeedConfig::default();
2029        config.risk_profile = crate::seeding::SeedRiskProfile::GaussianLocationScale;
2030        // Same anti-over-smoothing widening as the non-wiggle family: fully
2031        // solve more of the grid and deepen the screening proxy so the flexible
2032        // (low-λ) log-σ basin is not screened out by the over-smoothed seed's
2033        // near-flat Fisher surface looking cheapest at a shallow cap. Provably
2034        // non-worsening under lowest-cost keep-best. See
2035        // `GaussianLocationScaleFamily::outer_seed_config` for the full
2036        // rationale.
2037        config.max_seeds = 8;
2038        config.seed_budget = 4;
2039        config.screen_max_inner_iterations = 16;
2040        config
2041    }
2042
2043    fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
2044        // Operator-aware (see GaussianLocationScaleFamily for derivation): when
2045        // `use_joint_matrix_free_path` selects the workspace operator, joint
2046        // Hv apply is O(n · (p_t + p_ℓ + p_w)) — the row-streaming RowCoeffOperator
2047        // never materializes the dense (p_t + p_ℓ + p_w)² matrix.
2048        crate::location_scale_engine::location_scale_coefficient_hessian_cost(
2049            self.y.len() as u64,
2050            specs,
2051        )
2052    }
2053
2054    fn block_linear_constraints(
2055        &self,
2056        _: &[ParameterBlockState],
2057        block_idx: usize,
2058        spec: &ParameterBlockSpec,
2059    ) -> Result<Option<LinearInequalityConstraints>, String> {
2060        if block_idx != Self::BLOCK_WIGGLE {
2061            return Ok(None);
2062        }
2063        Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
2064    }
2065
2066    fn post_update_block_beta(
2067        &self,
2068        _: &[ParameterBlockState],
2069        block_idx: usize,
2070        block_spec: &ParameterBlockSpec,
2071        beta: Array1<f64>,
2072    ) -> Result<Array1<f64>, String> {
2073        assert!(!block_spec.name.is_empty());
2074        if block_idx != Self::BLOCK_WIGGLE {
2075            return Ok(beta);
2076        }
2077        let beta = project_monotone_wiggle_beta_nonnegative(beta);
2078        validate_monotone_wiggle_beta_nonnegative(
2079            &beta,
2080            "GaussianLocationScaleWiggleFamily post-update",
2081        )?;
2082        Ok(beta)
2083    }
2084
2085    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
2086        validate_block_count::<GamlssError>(
2087            "GaussianLocationScaleWiggleFamily",
2088            3,
2089            block_states.len(),
2090        )?;
2091        let n = self.y.len();
2092        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2093        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2094        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2095        if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
2096            return Err(GamlssError::DimensionMismatch {
2097                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2098            }
2099            .into());
2100        }
2101        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2102        // Per-row kernel emits 6 working values into pre-allocated outputs;
2103        // ll is reduced via Rayon's sum. Independent across rows. Note
2104        // wmu == ww (both equal location_working_weight) and the mean+wiggle
2105        // working responses share row.location_working_shift, applied to
2106        // eta_mu[i] and etaw[i] respectively. The previous `q = eta_mu + etaw`
2107        // intermediate is inlined to avoid an extra n-vector allocation.
2108        let mut zmu = Array1::<f64>::zeros(n);
2109        let mut wmu = Array1::<f64>::zeros(n);
2110        let mut zls = Array1::<f64>::zeros(n);
2111        let mut wls = Array1::<f64>::zeros(n);
2112        let mut zw = Array1::<f64>::zeros(n);
2113        let mut ww = Array1::<f64>::zeros(n);
2114        const CHUNK: usize = 1024;
2115        let zmu_s = zmu
2116            .as_slice_memory_order_mut()
2117            .expect("zeros is contiguous");
2118        let wmu_s = wmu
2119            .as_slice_memory_order_mut()
2120            .expect("zeros is contiguous");
2121        let zls_s = zls
2122            .as_slice_memory_order_mut()
2123            .expect("zeros is contiguous");
2124        let wls_s = wls
2125            .as_slice_memory_order_mut()
2126            .expect("zeros is contiguous");
2127        let zw_s = zw.as_slice_memory_order_mut().expect("zeros is contiguous");
2128        let ww_s = ww.as_slice_memory_order_mut().expect("zeros is contiguous");
2129        let y_view = self.y.view();
2130        let w_view = self.weights.view();
2131        let eta_mu_view = eta_mu.view();
2132        let eta_ls_view = eta_ls.view();
2133        let etaw_view = etaw.view();
2134        let ll: f64 = zmu_s
2135            .par_chunks_mut(CHUNK)
2136            .zip(wmu_s.par_chunks_mut(CHUNK))
2137            .zip(zls_s.par_chunks_mut(CHUNK))
2138            .zip(wls_s.par_chunks_mut(CHUNK))
2139            .zip(zw_s.par_chunks_mut(CHUNK))
2140            .zip(ww_s.par_chunks_mut(CHUNK))
2141            .enumerate()
2142            .map(
2143                |(chunk_idx, (((((zmu_c, wmu_c), zls_c), wls_c), zw_c), ww_c))| {
2144                    let start = chunk_idx * CHUNK;
2145                    let mut local_ll = 0.0;
2146                    for local in 0..zmu_c.len() {
2147                        let i = start + local;
2148                        let q_i = eta_mu_view[i] + etaw_view[i];
2149                        let row = gaussian_diagonal_row_kernel(
2150                            y_view[i],
2151                            q_i,
2152                            eta_ls_view[i],
2153                            w_view[i],
2154                            ln2pi,
2155                        );
2156                        let w_i = row.location_working_weight;
2157                        let shift = row.location_working_shift;
2158                        zmu_c[local] = eta_mu_view[i] + shift;
2159                        wmu_c[local] = w_i;
2160                        zw_c[local] = etaw_view[i] + shift;
2161                        ww_c[local] = w_i;
2162                        zls_c[local] = row.log_sigma_working_response;
2163                        wls_c[local] = row.log_sigma_working_weight;
2164                        local_ll += row.log_likelihood;
2165                    }
2166                    local_ll
2167                },
2168            )
2169            .sum();
2170
2171        Ok(FamilyEvaluation {
2172            log_likelihood: ll,
2173            blockworking_sets: vec![
2174                BlockWorkingSet::diagonal_checked(zmu, wmu)?,
2175                BlockWorkingSet::diagonal_checked(zls, wls)?,
2176                BlockWorkingSet::diagonal_checked(zw, ww)?,
2177            ],
2178        })
2179    }
2180
2181    fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
2182        validate_block_count::<GamlssError>(
2183            "GaussianLocationScaleWiggleFamily",
2184            3,
2185            block_states.len(),
2186        )?;
2187        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2188        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2189        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2190        if eta_mu.len() != self.y.len()
2191            || eta_ls.len() != self.y.len()
2192            || etaw.len() != self.y.len()
2193            || self.weights.len() != self.y.len()
2194        {
2195            return Err(GamlssError::DimensionMismatch {
2196                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2197            }
2198            .into());
2199        }
2200        let q = eta_mu + etaw;
2201        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2202        let mut ll = 0.0;
2203        for i in 0..self.y.len() {
2204            let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
2205            let inv_s2 = (sigma_i * sigma_i).recip();
2206            let r = self.y[i] - q[i];
2207            ll += self.weights[i] * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()));
2208        }
2209        Ok(ll)
2210    }
2211
2212    /// Outer-only log-likelihood with optional row subsample.
2213    ///
2214    /// When `options.outer_score_subsample` is `Some`, only the sampled rows
2215    /// contribute; each row's per-row log-likelihood term is multiplied by
2216    /// `WeightedOuterRow.weight`, the Horvitz–Thompson inverse-inclusion
2217    /// factor 1/π_i (uniform or stratified sampling both supported), so the
2218    /// partial sum is an unbiased estimator of the full-data log-likelihood.
2219    /// When `None`, this returns the full-data `log_likelihood_only`. Inner
2220    /// PIRLS line searches never install the subsample option, so they
2221    /// continue to score the exact full-data log-likelihood.
2222    fn log_likelihood_only_with_options(
2223        &self,
2224        block_states: &[ParameterBlockState],
2225        options: &BlockwiseFitOptions,
2226    ) -> Result<f64, String> {
2227        let Some(subsample) = options.outer_score_subsample.as_ref() else {
2228            return self.log_likelihood_only(block_states);
2229        };
2230        validate_block_count::<GamlssError>(
2231            "GaussianLocationScaleWiggleFamily",
2232            3,
2233            block_states.len(),
2234        )?;
2235        let n = self.y.len();
2236        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2237        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2238        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2239        if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
2240            return Err(GamlssError::DimensionMismatch {
2241                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2242            }
2243            .into());
2244        }
2245        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2246        use rayon::iter::ParallelIterator;
2247        let ll: f64 = subsample
2248            .rows
2249            .par_iter()
2250            .map(|row| {
2251                let i = row.index;
2252                let wi = self.weights[i];
2253                if wi == 0.0 {
2254                    return 0.0;
2255                }
2256                let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
2257                let inv_s2 = (sigma_i * sigma_i).recip();
2258                let r = self.y[i] - eta_mu[i] - etaw[i];
2259                row.weight * wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
2260            })
2261            .sum();
2262        Ok(ll)
2263    }
2264
2265    fn requires_joint_outer_hyper_path(&self) -> bool {
2266        true
2267    }
2268
2269    fn exact_newton_hessian_directional_derivative(
2270        &self,
2271        block_states: &[ParameterBlockState],
2272        block_idx: usize,
2273        d_beta: &Array1<f64>,
2274    ) -> Result<Option<Array2<f64>>, String> {
2275        validate_block_count::<GamlssError>(
2276            "GaussianLocationScaleWiggleFamily",
2277            3,
2278            block_states.len(),
2279        )?;
2280        let pmu = self
2281            .mu_design
2282            .as_ref()
2283            .ok_or_else(|| {
2284                "GaussianLocationScaleWiggleFamily exact path is missing mu design".to_string()
2285            })?
2286            .ncols();
2287        let p_ls = self
2288            .log_sigma_design
2289            .as_ref()
2290            .ok_or_else(|| {
2291                "GaussianLocationScaleWiggleFamily exact path is missing log-sigma design"
2292                    .to_string()
2293            })?
2294            .ncols();
2295        let pw = block_states[Self::BLOCK_WIGGLE].beta.len();
2296        let total = pmu + p_ls + pw;
2297        let (start, end) = match block_idx {
2298            Self::BLOCK_MU => (0usize, pmu),
2299            Self::BLOCK_LOG_SIGMA => (pmu, pmu + p_ls),
2300            Self::BLOCK_WIGGLE => (pmu + p_ls, total),
2301            _ => return Ok(None),
2302        };
2303        if d_beta.len() != end - start {
2304            return Err(GamlssError::DimensionMismatch { reason: format!(
2305                "GaussianLocationScaleWiggleFamily block {block_idx} d_beta length mismatch: got {}, expected {}",
2306                d_beta.len(),
2307                end - start
2308            ) }.into());
2309        }
2310        let mut d_beta_flat = Array1::<f64>::zeros(total);
2311        d_beta_flat.slice_mut(s![start..end]).assign(d_beta);
2312        let (xmu, x_ls) = self.dense_block_designs()?;
2313        let d_joint = self
2314            .exact_newton_joint_hessian_directional_derivative_from_designs(
2315                block_states,
2316                &xmu,
2317                &x_ls,
2318                &d_beta_flat,
2319            )?
2320            .ok_or_else(|| "missing Gaussian wiggle exact joint directional Hessian".to_string())?;
2321        Ok(Some(d_joint.slice(s![start..end, start..end]).to_owned()))
2322    }
2323
2324    fn exact_newton_joint_hessian(
2325        &self,
2326        block_states: &[ParameterBlockState],
2327    ) -> Result<Option<Array2<f64>>, String> {
2328        self.exact_newton_joint_hessian_for_specs(block_states, None)
2329    }
2330
2331    fn has_explicit_joint_hessian(&self) -> bool {
2332        true
2333    }
2334
2335    fn exact_newton_joint_hessian_directional_derivative(
2336        &self,
2337        block_states: &[ParameterBlockState],
2338        d_beta_flat: &Array1<f64>,
2339    ) -> Result<Option<Array2<f64>>, String> {
2340        self.exact_newton_joint_hessian_directional_derivative_for_specs(
2341            block_states,
2342            None,
2343            d_beta_flat,
2344        )
2345    }
2346
2347    fn exact_newton_joint_hessiansecond_directional_derivative(
2348        &self,
2349        block_states: &[ParameterBlockState],
2350        d_beta_u_flat: &Array1<f64>,
2351        d_beta_v_flat: &Array1<f64>,
2352    ) -> Result<Option<Array2<f64>>, String> {
2353        self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
2354            block_states,
2355            None,
2356            d_beta_u_flat,
2357            d_beta_v_flat,
2358        )
2359    }
2360
2361    fn exact_newton_joint_hessian_with_specs(
2362        &self,
2363        block_states: &[ParameterBlockState],
2364        specs: &[ParameterBlockSpec],
2365    ) -> Result<Option<Array2<f64>>, String> {
2366        self.exact_newton_joint_hessian_for_specs(block_states, Some(specs))
2367    }
2368
2369    fn exact_newton_joint_hessian_directional_derivative_with_specs(
2370        &self,
2371        block_states: &[ParameterBlockState],
2372        specs: &[ParameterBlockSpec],
2373        d_beta_flat: &Array1<f64>,
2374    ) -> Result<Option<Array2<f64>>, String> {
2375        self.exact_newton_joint_hessian_directional_derivative_for_specs(
2376            block_states,
2377            Some(specs),
2378            d_beta_flat,
2379        )
2380    }
2381
2382    fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
2383        &self,
2384        block_states: &[ParameterBlockState],
2385        specs: &[ParameterBlockSpec],
2386        d_beta_u_flat: &Array1<f64>,
2387        d_beta_v_flat: &Array1<f64>,
2388    ) -> Result<Option<Array2<f64>>, String> {
2389        self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
2390            block_states,
2391            Some(specs),
2392            d_beta_u_flat,
2393            d_beta_v_flat,
2394        )
2395    }
2396
2397    fn exact_newton_joint_psi_terms(
2398        &self,
2399        block_states: &[ParameterBlockState],
2400        specs: &[ParameterBlockSpec],
2401        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2402        psi_index: usize,
2403    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
2404        self.exact_newton_joint_psi_terms_for_specs(
2405            block_states,
2406            specs,
2407            derivative_blocks,
2408            psi_index,
2409        )
2410    }
2411
2412    fn exact_newton_joint_psisecond_order_terms(
2413        &self,
2414        block_states: &[ParameterBlockState],
2415        specs: &[ParameterBlockSpec],
2416        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2417        psi_i: usize,
2418        psi_j: usize,
2419    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
2420        self.exact_newton_joint_psisecond_order_terms_for_specs(
2421            block_states,
2422            specs,
2423            derivative_blocks,
2424            psi_i,
2425            psi_j,
2426        )
2427    }
2428
2429    fn exact_newton_joint_psihessian_directional_derivative(
2430        &self,
2431        block_states: &[ParameterBlockState],
2432        specs: &[ParameterBlockSpec],
2433        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2434        psi_index: usize,
2435        d_beta_flat: &Array1<f64>,
2436    ) -> Result<Option<Array2<f64>>, String> {
2437        self.exact_newton_joint_psihessian_directional_derivative_for_specs(
2438            block_states,
2439            specs,
2440            derivative_blocks,
2441            psi_index,
2442            d_beta_flat,
2443        )
2444    }
2445
2446    fn exact_newton_joint_psi_workspace(
2447        &self,
2448        block_states: &[ParameterBlockState],
2449        specs: &[ParameterBlockSpec],
2450        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2451    ) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
2452        if !self.exact_joint_supported() {
2453            return Ok(None);
2454        }
2455        Ok(Some(Arc::new(
2456            GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new(
2457                self.clone(),
2458                block_states.to_vec(),
2459                specs,
2460                derivative_blocks.to_vec(),
2461            )?,
2462        )))
2463    }
2464
2465    /// Outer-aware joint ψ workspace with optional row subsample.
2466    ///
2467    /// The wiggle ψ workspace shares the generic `LocationScaleJointPsiWorkspace`
2468    /// with the non-wiggle GLS family, and the subsample is plumbed through
2469    /// the trait. The wiggle's `ws_psi_*_from_parts` impls currently drop the
2470    /// subsample and fall back to the full-data exact wiggle ψ path; see
2471    /// their inline rationale and the `apply_ht_mask_*` helpers used by the
2472    /// non-wiggle GLS family. Storing the subsample here keeps the workspace
2473    /// signature uniform across both families and leaves a hook for the
2474    /// follow-up that refactors the wiggle inline arrays into a weights
2475    /// struct so HT masking can be applied in one place. Even without that
2476    /// refactor, the total outer score under subsampling remains an unbiased
2477    /// estimator of the full-data outer score: HT-unbiased LL
2478    /// (`log_likelihood_only_with_options`) + HT-unbiased ρ-Hessian
2479    /// (`exact_newton_joint_hessian_workspace_with_options`) + exact-unbiased
2480    /// ψ (the wiggle workspace path) = unbiased.
2481    fn exact_newton_joint_psi_workspace_with_options(
2482        &self,
2483        block_states: &[ParameterBlockState],
2484        specs: &[ParameterBlockSpec],
2485        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2486        options: &BlockwiseFitOptions,
2487    ) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
2488        if !self.exact_joint_supported() {
2489            return Ok(None);
2490        }
2491        Ok(Some(Arc::new(
2492            GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new_with_subsample(
2493                self.clone(),
2494                block_states.to_vec(),
2495                specs,
2496                derivative_blocks.to_vec(),
2497                options.outer_score_subsample.clone(),
2498            )?,
2499        )))
2500    }
2501
2502    fn block_geometry(
2503        &self,
2504        block_states: &[ParameterBlockState],
2505        spec: &ParameterBlockSpec,
2506    ) -> Result<(DesignMatrix, Array1<f64>), String> {
2507        if spec.name != "wiggle" {
2508            return Ok((spec.design.clone(), spec.offset.clone()));
2509        }
2510        if block_states.is_empty() {
2511            return Err(GamlssError::UnsupportedConfiguration {
2512                reason: "Gaussian wiggle geometry requires mean block".to_string(),
2513            }
2514            .into());
2515        }
2516        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2517        if eta_mu.len() != self.y.len() {
2518            return Err(GamlssError::DimensionMismatch {
2519                reason: "Gaussian wiggle geometry input size mismatch".to_string(),
2520            }
2521            .into());
2522        }
2523        let x = self.wiggle_design(eta_mu.view())?;
2524        if x.ncols() != spec.design.ncols() {
2525            return Err(GamlssError::DimensionMismatch {
2526                reason: format!(
2527                    "Gaussian dynamic wiggle design col mismatch: got {}, expected {}",
2528                    x.ncols(),
2529                    spec.design.ncols()
2530                ),
2531            }
2532            .into());
2533        }
2534        let nrows = x.nrows();
2535        Ok((
2536            DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(x)),
2537            Array1::zeros(nrows),
2538        ))
2539    }
2540
2541    fn block_geometry_is_dynamic(&self) -> bool {
2542        true
2543    }
2544
2545    fn exact_newton_joint_hessian_workspace(
2546        &self,
2547        block_states: &[ParameterBlockState],
2548        specs: &[ParameterBlockSpec],
2549    ) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
2550        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
2551            return Ok(None);
2552        };
2553        let workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
2554            self.clone(),
2555            block_states.to_vec(),
2556            xmu.into_owned(),
2557            x_ls.into_owned(),
2558        )?;
2559        Ok(Some(Arc::new(workspace)))
2560    }
2561
2562    /// Outer-aware joint-Hessian workspace with optional row subsample.
2563    ///
2564    /// When `options.outer_score_subsample` is `None`, this is byte-identical
2565    /// to `exact_newton_joint_hessian_workspace`. When `Some`, the precomputed
2566    /// per-row coefficient arrays in `pieces` (`coeff_mm`, `coeff_ml`,
2567    /// `coeff_ll`, `coeff_mw_b`, `coeff_mw_d`, `coeff_lw_b`, `coeff_ww`) —
2568    /// which every downstream assembly (`hessian_dense`, `hessian_matvec`,
2569    /// `hessian_diagonal`) consumes row-linearly via `Xᵀ diag(W) Y` — are
2570    /// replaced by a Horvitz–Thompson mask: each sampled row's coefficient
2571    /// is multiplied by `WeightedOuterRow.weight` (the inverse-inclusion
2572    /// factor 1/π_i; uniform or stratified sampling both supported), and
2573    /// non-sampled rows are zeroed. The `basis`/`basis_d1` matrices are
2574    /// row-weight-independent and remain unchanged. Note that the Gaussian
2575    /// wiggle has one fewer cross-coefficient than the binomial wiggle
2576    /// (no `coeff_lw_d`) because the wiggle enters the Gaussian likelihood
2577    /// only through `q = η_μ + η_w` (no σ-chain). The resulting joint Hessian
2578    /// is an unbiased estimator of the full-data joint Hessian. Inner PIRLS
2579    /// never installs the option, so the inner solve continues to consume
2580    /// the exact full-data Hessian.
2581    fn exact_newton_joint_hessian_workspace_with_options(
2582        &self,
2583        block_states: &[ParameterBlockState],
2584        specs: &[ParameterBlockSpec],
2585        options: &BlockwiseFitOptions,
2586    ) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
2587        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
2588            return Ok(None);
2589        };
2590        let mut workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
2591            self.clone(),
2592            block_states.to_vec(),
2593            xmu.into_owned(),
2594            x_ls.into_owned(),
2595        )?;
2596        if let Some(subsample) = options.outer_score_subsample.as_ref() {
2597            workspace.apply_outer_subsample(subsample.rows.as_ref());
2598        }
2599        Ok(Some(Arc::new(workspace)))
2600    }
2601
2602    /// Outer-derivative policy: declare HT-subsample capability.
2603    ///
2604    /// GaussianLocationScaleWiggleFamily overrides
2605    /// `log_likelihood_only_with_options` and
2606    /// `exact_newton_joint_hessian_workspace_with_options` to consume
2607    /// `options.outer_score_subsample` with per-row Horvitz–Thompson weights
2608    /// (each sampled row's contribution is multiplied by
2609    /// `WeightedOuterRow.weight = 1/π_i`; non-sampled rows are zeroed),
2610    /// yielding unbiased estimators of the full-data log-likelihood and
2611    /// joint Hessian. The ψ-workspace path is also subsample-aware via
2612    /// `exact_newton_joint_psi_workspace_with_options`, which threads the
2613    /// subsample down to per-row weight masking inside the joint-ψ second-
2614    /// order and directional-derivative reductions. Inner-PIRLS and final-
2615    /// covariance paths never install the option, so they continue to
2616    /// consume the exact full-data quantities.
2617    fn outer_derivative_subsample_capable(&self) -> bool {
2618        true
2619    }
2620
2621    fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
2622        // Same gating as the workspace impl above: matrix-free fires when
2623        // `exact_joint_dense_block_designs` is satisfiable, which requires
2624        // both location and scale block designs to be present.  The wiggle
2625        // block is folded into the operator via the per-row pieces — its
2626        // presence is implied by reaching the wiggle family in the first
2627        // place — so the predicate matches the non-wiggle case.
2628        self.exact_joint_supported()
2629            && matches!(
2630                self.exact_joint_dense_block_designs(Some(specs)),
2631                Ok(Some(_))
2632            )
2633    }
2634}
2635
2636/// Matrix-free joint-Hessian operator for the 3-block Gaussian
2637/// location-scale wiggle family. See `GaussianLocationScaleWiggleHessianRowPieces`
2638/// for the per-row weight structure. The matvec applies
2639///
2640///   r_μ  = D_mm u_μ + D_ml u_ls + D_mw_b (B v_w) + D_mw_d (B' v_w),
2641///   r_ls = D_ml u_μ + D_ll u_ls + D_lw_b (B v_w),
2642///   r_b  = D_mw_b u_μ + D_lw_b u_ls + D_ww (B v_w),
2643///   r_d  = D_mw_d u_μ,
2644///
2645/// then forms `out_w = B^T r_b + (B')^T r_d`. The ls-wiggle cross block has
2646/// no B' contribution because the wiggle enters the Gaussian likelihood only
2647/// through `q = η_μ + η_w` (no σ-chain), so the Gaussian wiggle has one
2648/// fewer cross-coefficient than the binomial wiggle.
2649pub(crate) struct GaussianLocationScaleWiggleHessianWorkspace {
2650    pub(crate) family: GaussianLocationScaleWiggleFamily,
2651    pub(crate) block_states: Vec<ParameterBlockState>,
2652    pub(crate) xmu: Arc<Array2<f64>>,
2653    pub(crate) x_ls: Arc<Array2<f64>>,
2654    pub(crate) pieces: GaussianLocationScaleWiggleHessianRowPieces,
2655}
2656
2657impl GaussianLocationScaleWiggleHessianWorkspace {
2658    pub(crate) fn new(
2659        family: GaussianLocationScaleWiggleFamily,
2660        block_states: Vec<ParameterBlockState>,
2661        xmu: Array2<f64>,
2662        x_ls: Array2<f64>,
2663    ) -> Result<Self, String> {
2664        let pieces = family.wiggle_hessian_row_pieces(&block_states)?;
2665        Ok(Self {
2666            family,
2667            block_states,
2668            xmu: Arc::new(xmu),
2669            x_ls: Arc::new(x_ls),
2670            pieces,
2671        })
2672    }
2673
2674    /// Apply a Horvitz–Thompson outer-row subsample mask to the precomputed
2675    /// per-row coefficient arrays in place.
2676    ///
2677    /// Each sampled row's `coeff_*[i]` is multiplied by its
2678    /// `WeightedOuterRow.weight` (the HT inverse-inclusion factor 1/π_i —
2679    /// uniform or stratified sampling both supported). All non-sampled rows
2680    /// are zeroed. Because every downstream assembly (`hessian_dense`,
2681    /// `hessian_matvec`, `hessian_diagonal`) is row-linear in these arrays
2682    /// via `Xᵀ diag(W) Y`, the resulting joint-Hessian is an unbiased
2683    /// estimator of the full-data joint Hessian. The `basis`/`basis_d1`
2684    /// matrices are independent of the per-row weights and remain unchanged.
2685    /// The Gaussian wiggle has 7 coefficient arrays (no `coeff_lw_d`, unlike
2686    /// the binomial wiggle's 8) because the wiggle enters the Gaussian
2687    /// likelihood only through `q = η_μ + η_w` (no σ-chain).
2688    pub(crate) fn apply_outer_subsample(
2689        &mut self,
2690        rows: &[crate::outer_subsample::WeightedOuterRow],
2691    ) {
2692        let n = self.pieces.coeff_mm.len();
2693        let mut mask_mm = Array1::<f64>::zeros(n);
2694        let mut mask_ml = Array1::<f64>::zeros(n);
2695        let mut mask_ll = Array1::<f64>::zeros(n);
2696        let mut mask_mw_b = Array1::<f64>::zeros(n);
2697        let mut mask_mw_d = Array1::<f64>::zeros(n);
2698        let mut mask_lw_b = Array1::<f64>::zeros(n);
2699        let mut maskww = Array1::<f64>::zeros(n);
2700        for r in rows {
2701            let i = r.index;
2702            let w = r.weight;
2703            mask_mm[i] = self.pieces.coeff_mm[i] * w;
2704            mask_ml[i] = self.pieces.coeff_ml[i] * w;
2705            mask_ll[i] = self.pieces.coeff_ll[i] * w;
2706            mask_mw_b[i] = self.pieces.coeff_mw_b[i] * w;
2707            mask_mw_d[i] = self.pieces.coeff_mw_d[i] * w;
2708            mask_lw_b[i] = self.pieces.coeff_lw_b[i] * w;
2709            maskww[i] = self.pieces.coeff_ww[i] * w;
2710        }
2711        self.pieces.coeff_mm = mask_mm;
2712        self.pieces.coeff_ml = mask_ml;
2713        self.pieces.coeff_ll = mask_ll;
2714        self.pieces.coeff_mw_b = mask_mw_b;
2715        self.pieces.coeff_mw_d = mask_mw_d;
2716        self.pieces.coeff_lw_b = mask_lw_b;
2717        self.pieces.coeff_ww = maskww;
2718    }
2719}
2720
2721impl ExactNewtonJointHessianWorkspace for GaussianLocationScaleWiggleHessianWorkspace {
2722    fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
2723        // Same Hv structure as `hessian_matvec`, but routed through the
2724        // already-existing `assemble_dense` row-pieces helper (six GEMMs:
2725        // h_mm, h_ml, h_mw_b, h_mw_d, h_lw, h_ww). Avoids `total` canonical-
2726        // basis HVPs in `MatrixFreeSpdOperator::materialize_dense_operator`,
2727        // which at large scale (n≈320k, p_total≈82) costs ~568s per κ-iter
2728        // versus ~1s for the dense build.
2729        let dense = self
2730            .pieces
2731            .assemble_dense(self.xmu.as_ref(), self.x_ls.as_ref())?;
2732        Ok(Some(dense))
2733    }
2734
2735    fn hessian_matvec_available(&self) -> bool {
2736        true
2737    }
2738
2739    fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
2740        let pmu = self.xmu.ncols();
2741        let p_ls = self.x_ls.ncols();
2742        let pw = self.pieces.basis.ncols();
2743        let total = pmu + p_ls + pw;
2744        if v.len() != total {
2745            return Err(GamlssError::DimensionMismatch {
2746                reason: format!(
2747                    "GaussianLocationScaleWiggle matvec dimension mismatch: got {}, expected {}",
2748                    v.len(),
2749                    total
2750                ),
2751            }
2752            .into());
2753        }
2754        let v_mu = v.slice(s![0..pmu]);
2755        let v_ls = v.slice(s![pmu..pmu + p_ls]);
2756        let v_w = v.slice(s![pmu + p_ls..total]);
2757
2758        let u_mu = fast_av(self.xmu.as_ref(), &v_mu);
2759        let u_ls = fast_av(self.x_ls.as_ref(), &v_ls);
2760        let u_b = fast_av(&self.pieces.basis, &v_w);
2761        let u_d = fast_av(&self.pieces.basis_d1, &v_w);
2762
2763        let r_mu = &self.pieces.coeff_mm * &u_mu
2764            + &self.pieces.coeff_ml * &u_ls
2765            + &self.pieces.coeff_mw_b * &u_b
2766            + &self.pieces.coeff_mw_d * &u_d;
2767        let r_ls = &self.pieces.coeff_ml * &u_mu
2768            + &self.pieces.coeff_ll * &u_ls
2769            + &self.pieces.coeff_lw_b * &u_b;
2770        let r_b = &self.pieces.coeff_mw_b * &u_mu
2771            + &self.pieces.coeff_lw_b * &u_ls
2772            + &self.pieces.coeff_ww * &u_b;
2773        let r_d = &self.pieces.coeff_mw_d * &u_mu;
2774
2775        let out_mu = fast_atv(self.xmu.as_ref(), &r_mu);
2776        let out_ls = fast_atv(self.x_ls.as_ref(), &r_ls);
2777        let out_w = fast_atv(&self.pieces.basis, &r_b) + &fast_atv(&self.pieces.basis_d1, &r_d);
2778
2779        let mut out = Array1::<f64>::zeros(total);
2780        out.slice_mut(s![0..pmu]).assign(&out_mu);
2781        out.slice_mut(s![pmu..pmu + p_ls]).assign(&out_ls);
2782        out.slice_mut(s![pmu + p_ls..total]).assign(&out_w);
2783        Ok(Some(out))
2784    }
2785
2786    fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
2787        let pmu = self.xmu.ncols();
2788        let p_ls = self.x_ls.ncols();
2789        let pw = self.pieces.basis.ncols();
2790        let total = pmu + p_ls + pw;
2791        // Diagonals are independent column-wise reductions: parallelize.
2792        use rayon::iter::{IntoParallelIterator, ParallelIterator};
2793        let diag_mu: Vec<f64> = (0..pmu)
2794            .into_par_iter()
2795            .map(|j| {
2796                let col = self.xmu.column(j);
2797                col.iter()
2798                    .zip(self.pieces.coeff_mm.iter())
2799                    .map(|(&v, &c)| c * v * v)
2800                    .sum()
2801            })
2802            .collect();
2803        let diag_ls: Vec<f64> = (0..p_ls)
2804            .into_par_iter()
2805            .map(|j| {
2806                let col = self.x_ls.column(j);
2807                col.iter()
2808                    .zip(self.pieces.coeff_ll.iter())
2809                    .map(|(&v, &c)| c * v * v)
2810                    .sum()
2811            })
2812            .collect();
2813        let diag_w: Vec<f64> = (0..pw)
2814            .into_par_iter()
2815            .map(|j| {
2816                let col = self.pieces.basis.column(j);
2817                col.iter()
2818                    .zip(self.pieces.coeff_ww.iter())
2819                    .map(|(&v, &c)| c * v * v)
2820                    .sum()
2821            })
2822            .collect();
2823        let mut diag = Array1::<f64>::zeros(total);
2824        for (j, v) in diag_mu.into_iter().enumerate() {
2825            diag[j] = v;
2826        }
2827        for (j, v) in diag_ls.into_iter().enumerate() {
2828            diag[pmu + j] = v;
2829        }
2830        for (j, v) in diag_w.into_iter().enumerate() {
2831            diag[pmu + p_ls + j] = v;
2832        }
2833        Ok(Some(diag))
2834    }
2835
2836    fn directional_derivative(
2837        &self,
2838        d_beta_flat: &Array1<f64>,
2839    ) -> Result<Option<Array2<f64>>, String> {
2840        self.family
2841            .exact_newton_joint_hessian_directional_derivative_from_designs(
2842                &self.block_states,
2843                self.xmu.as_ref(),
2844                self.x_ls.as_ref(),
2845                d_beta_flat,
2846            )
2847    }
2848
2849    fn directional_derivative_operator(
2850        &self,
2851        d_beta_flat: &Array1<f64>,
2852    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
2853        self.family.gls_wiggle_directional_operator(
2854            &self.block_states,
2855            self.xmu.clone(),
2856            self.x_ls.clone(),
2857            d_beta_flat,
2858        )
2859    }
2860
2861    fn second_directional_derivative(
2862        &self,
2863        d_beta_u_flat: &Array1<f64>,
2864        d_beta_v_flat: &Array1<f64>,
2865    ) -> Result<Option<Array2<f64>>, String> {
2866        self.family
2867            .exact_newton_joint_hessiansecond_directional_derivative_from_designs(
2868                &self.block_states,
2869                self.xmu.as_ref(),
2870                self.x_ls.as_ref(),
2871                d_beta_u_flat,
2872                d_beta_v_flat,
2873            )
2874    }
2875
2876    fn second_directional_derivative_operator(
2877        &self,
2878        d_beta_u: &Array1<f64>,
2879        d_beta_v: &Array1<f64>,
2880    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
2881        self.family.gls_wiggle_second_directional_operator(
2882            &self.block_states,
2883            self.xmu.clone(),
2884            self.x_ls.clone(),
2885            d_beta_u,
2886            d_beta_v,
2887        )
2888    }
2889}
2890
2891impl CustomFamilyGenerative for GaussianLocationScaleWiggleFamily {
2892    fn generativespec(
2893        &self,
2894        block_states: &[ParameterBlockState],
2895    ) -> Result<GenerativeSpec, String> {
2896        validate_block_count::<GamlssError>(
2897            "GaussianLocationScaleWiggleFamily",
2898            3,
2899            block_states.len(),
2900        )?;
2901        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2902        let eta_wiggle = &block_states[Self::BLOCK_WIGGLE].eta;
2903        let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2904        let n = eta_mu.len();
2905        let mean = gamlss_rowwise_map(n, |i| eta_mu[i] + eta_wiggle[i]);
2906        let sigma = gamlss_rowwise_map(n, |i| logb_sigma_from_eta_scalar(eta_log_sigma[i]));
2907        Ok(GenerativeSpec {
2908            mean,
2909            noise: NoiseModel::Gaussian { sigma },
2910        })
2911    }
2912}
2913
2914pub(crate) fn expect_single_block<'a>(
2915    block_states: &'a [ParameterBlockState],
2916    family_name: &str,
2917) -> Result<&'a ParameterBlockState, String> {
2918    validate_block_count::<GamlssError>(family_name, 1, block_states.len())?;
2919    Ok(&block_states[0])
2920}