Skip to main content

gam_models/gamlss/gaussian/
wiggle.rs

1// Real concern-organized submodule of the gamlss family stack.
2// Cross-module items are re-exported flat through the parent (`gamlss.rs`),
3// so `use super::*;` makes the sibling-concern symbols this module references
4// resolve through the parent namespace.
5use super::*;
6
7pub(crate) struct GaussianLocationScaleWiggleGeometry {
8    pub(crate) basis: Array2<f64>,
9    pub(crate) basis_d1: Array2<f64>,
10    pub(crate) basis_d2: Array2<f64>,
11    pub(crate) basis_d3: Array2<f64>,
12    pub(crate) dq_dq0: Array1<f64>,
13    pub(crate) d2q_dq02: Array1<f64>,
14    pub(crate) d3q_dq03: Array1<f64>,
15    pub(crate) d4q_dq04: Array1<f64>,
16}
17
18/// Per-row pieces of the 3-block Gaussian location-scale-wiggle joint
19/// Hessian. Both the dense path and the matrix-free workspace share these
20/// row coefficients; only the assembly differs.
21pub(crate) struct GaussianLocationScaleWiggleHessianRowPieces {
22    pub(crate) coeff_mm: Array1<f64>,
23    pub(crate) coeff_ml: Array1<f64>,
24    pub(crate) coeff_ll: Array1<f64>,
25    pub(crate) coeff_mw_b: Array1<f64>,
26    pub(crate) coeff_mw_d: Array1<f64>,
27    pub(crate) coeff_lw_b: Array1<f64>,
28    pub(crate) coeff_ww: Array1<f64>,
29    pub(crate) basis: Array2<f64>,
30    pub(crate) basis_d1: Array2<f64>,
31}
32
33impl GaussianLocationScaleWiggleHessianRowPieces {
34    pub(crate) fn assemble_dense(
35        &self,
36        xmu: &Array2<f64>,
37        x_ls: &Array2<f64>,
38    ) -> Result<Array2<f64>, String> {
39        let h_mm = xt_diag_x_dense(xmu, &self.coeff_mm)?;
40        let h_ml = xt_diag_y_dense(xmu, &self.coeff_ml, x_ls)?;
41        let h_ll = xt_diag_x_dense(x_ls, &self.coeff_ll)?;
42        let h_mw = xt_diag_y_dense(xmu, &self.coeff_mw_b, &self.basis)?
43            + &xt_diag_y_dense(xmu, &self.coeff_mw_d, &self.basis_d1)?;
44        let h_lw = xt_diag_y_dense(x_ls, &self.coeff_lw_b, &self.basis)?;
45        let h_ww = xt_diag_x_dense(&self.basis, &self.coeff_ww)?;
46        Ok(gaussian_pack_wiggle_joint_symmetrichessian(
47            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
48        ))
49    }
50}
51
52pub struct GaussianLocationScaleWiggleFamily {
53    pub y: Array1<f64>,
54    pub weights: Array1<f64>,
55    pub mu_design: Option<DesignMatrix>,
56    pub log_sigma_design: Option<DesignMatrix>,
57    pub wiggle_knots: Array1<f64>,
58    pub wiggle_degree: usize,
59    /// Resource policy threaded into PsiDesignMap construction (and any other
60    /// per-call materialization decision) made during exact-Newton joint psi
61    /// derivative evaluation. Defaults to `ResourcePolicy::default_library()`
62    /// when the family is built without an explicit policy.
63    pub policy: gam_runtime::resource::ResourcePolicy,
64    pub(crate) cached_row_scalars:
65        std::sync::RwLock<Option<(f64, f64, f64, f64, f64, f64, Arc<GaussianJointRowScalars>)>>,
66}
67
68impl Clone for GaussianLocationScaleWiggleFamily {
69    fn clone(&self) -> Self {
70        Self {
71            y: self.y.clone(),
72            weights: self.weights.clone(),
73            mu_design: self.mu_design.clone(),
74            log_sigma_design: self.log_sigma_design.clone(),
75            wiggle_knots: self.wiggle_knots.clone(),
76            wiggle_degree: self.wiggle_degree,
77            policy: self.policy.clone(),
78            cached_row_scalars: std::sync::RwLock::new(
79                self.cached_row_scalars
80                    .read()
81                    .expect("lock poisoned")
82                    .clone(),
83            ),
84        }
85    }
86}
87
88impl GaussianLocationScaleWiggleFamily {
89    pub const BLOCK_MU: usize = 0;
90    pub const BLOCK_LOG_SIGMA: usize = 1;
91    pub const BLOCK_WIGGLE: usize = 2;
92
93    pub fn parameternames() -> &'static [&'static str] {
94        &["mu", "log_sigma", "wiggle"]
95    }
96
97    pub fn parameter_links() -> &'static [ParameterLink] {
98        &[
99            ParameterLink::Identity,
100            ParameterLink::Log,
101            ParameterLink::Wiggle,
102        ]
103    }
104
105    pub fn metadata() -> FamilyMetadata {
106        FamilyMetadata {
107            name: "gaussian_location_scalewiggle",
108            parameternames: Self::parameternames(),
109            parameter_links: Self::parameter_links(),
110        }
111    }
112
113    pub(crate) fn exact_joint_supported(&self) -> bool {
114        self.mu_design.is_some() && self.log_sigma_design.is_some()
115    }
116
117    pub(crate) fn wiggle_basiswith_options(
118        &self,
119        q0: ArrayView1<'_, f64>,
120        options: BasisOptions,
121    ) -> Result<Array2<f64>, String> {
122        monotone_wiggle_basis_with_derivative_order(
123            q0,
124            &self.wiggle_knots,
125            self.wiggle_degree,
126            options.derivative_order,
127        )
128    }
129
130    pub(crate) fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
131        self.wiggle_basiswith_options(q0, BasisOptions::value())
132    }
133
134    pub(crate) fn wiggle_dq_dq0(
135        &self,
136        q0: ArrayView1<'_, f64>,
137        beta_link_wiggle: ArrayView1<'_, f64>,
138    ) -> Result<Array1<f64>, String> {
139        let d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
140        if d1.ncols() != beta_link_wiggle.len() {
141            return Err(GamlssError::DimensionMismatch { reason: format!(
142                "wiggle derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
143                d1.ncols(),
144                beta_link_wiggle.len()
145            ) }.into());
146        }
147        Ok(d1.dot(&beta_link_wiggle) + 1.0)
148    }
149
150    pub(crate) fn wiggle_d2q_dq02(
151        &self,
152        q0: ArrayView1<'_, f64>,
153        beta_link_wiggle: ArrayView1<'_, f64>,
154    ) -> Result<Array1<f64>, String> {
155        let d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
156        if d2.ncols() != beta_link_wiggle.len() {
157            return Err(GamlssError::DimensionMismatch { reason: format!(
158                "wiggle second-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
159                d2.ncols(),
160                beta_link_wiggle.len()
161            ) }.into());
162        }
163        Ok(d2.dot(&beta_link_wiggle))
164    }
165
166    pub(crate) fn wiggle_d3basis_constrained(
167        &self,
168        q0: ArrayView1<'_, f64>,
169    ) -> Result<Array2<f64>, String> {
170        monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
171    }
172
173    pub(crate) fn wiggle_d3q_dq03(
174        &self,
175        q0: ArrayView1<'_, f64>,
176        beta_link_wiggle: ArrayView1<'_, f64>,
177    ) -> Result<Array1<f64>, String> {
178        let d3 = self.wiggle_d3basis_constrained(q0)?;
179        if d3.ncols() != beta_link_wiggle.len() {
180            return Err(GamlssError::DimensionMismatch { reason: format!(
181                "wiggle third-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
182                d3.ncols(),
183                beta_link_wiggle.len()
184            ) }.into());
185        }
186        Ok(d3.dot(&beta_link_wiggle))
187    }
188
189    pub(crate) fn wiggle_d4q_dq04(
190        &self,
191        q0: ArrayView1<'_, f64>,
192        beta_link_wiggle: ArrayView1<'_, f64>,
193    ) -> Result<Array1<f64>, String> {
194        let d4 = monotone_wiggle_basis_with_derivative_order(
195            q0,
196            &self.wiggle_knots,
197            self.wiggle_degree,
198            4,
199        )?;
200        if d4.ncols() != beta_link_wiggle.len() {
201            return Err(GamlssError::DimensionMismatch { reason: format!(
202                "wiggle fourth-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
203                d4.ncols(),
204                beta_link_wiggle.len()
205            ) }.into());
206        }
207        Ok(d4.dot(&beta_link_wiggle))
208    }
209
210    pub(crate) fn wiggle_geometry(
211        &self,
212        q0: ArrayView1<'_, f64>,
213        beta_link_wiggle: ArrayView1<'_, f64>,
214    ) -> Result<GaussianLocationScaleWiggleGeometry, String> {
215        let basis = self.wiggle_design(q0)?;
216        let basis_d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
217        let basis_d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
218        let basis_d3 = self.wiggle_d3basis_constrained(q0)?;
219        let dq_dq0 = self.wiggle_dq_dq0(q0, beta_link_wiggle)?;
220        let d2q_dq02 = self.wiggle_d2q_dq02(q0, beta_link_wiggle)?;
221        let d3q_dq03 = self.wiggle_d3q_dq03(q0, beta_link_wiggle)?;
222        let d4q_dq04 = self.wiggle_d4q_dq04(q0, beta_link_wiggle)?;
223        Ok(GaussianLocationScaleWiggleGeometry {
224            basis,
225            basis_d1,
226            basis_d2,
227            basis_d3,
228            dq_dq0,
229            d2q_dq02,
230            d3q_dq03,
231            d4q_dq04,
232        })
233    }
234
235    pub(crate) fn get_or_compute_row_scalars(
236        &self,
237        q: &Array1<f64>,
238        eta_ls: &Array1<f64>,
239    ) -> Result<Arc<GaussianJointRowScalars>, String> {
240        Ok(Arc::new(gaussian_jointrow_scalars(
241            &self.y,
242            q,
243            eta_ls,
244            &self.weights,
245        )?))
246    }
247
248    pub(crate) fn dense_block_designs(
249        &self,
250    ) -> Result<(Cow<'_, Array2<f64>>, Cow<'_, Array2<f64>>), String> {
251        dense_locscale_block_designs_cached(
252            self.mu_design.as_ref(),
253            self.log_sigma_design.as_ref(),
254            "GaussianLocationScaleWiggleFamily",
255            "GaussianLocationScaleWiggle",
256            "mu",
257            &self.policy.material_policy(),
258        )
259    }
260    pub(crate) fn dense_block_designs_fromspecs<'a>(
261        &self,
262        specs: &'a [ParameterBlockSpec],
263    ) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
264        dense_locscale_block_designs_fromspecs(
265            specs,
266            3,
267            "GaussianLocationScaleWiggleFamily",
268            "GaussianLocationScaleWiggle",
269            Self::BLOCK_MU,
270            Self::BLOCK_LOG_SIGMA,
271            "mu",
272            &self.policy.material_policy(),
273        )
274    }
275
276    pub(crate) fn exact_joint_dense_block_designs<'a>(
277        &'a self,
278        specs: Option<&'a [ParameterBlockSpec]>,
279    ) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
280        if self.exact_joint_supported() {
281            return self.dense_block_designs().map(Some);
282        }
283        if let Some(specs) = specs {
284            return self.dense_block_designs_fromspecs(specs).map(Some);
285        }
286        Ok(None)
287    }
288
289    /// Build the [`BlockEffectiveJacobian`] for block `block_idx`.
290    ///
291    /// The wiggle block (block 2) modulates the inverse link nonlinearly and
292    /// does not contribute a linear additive term to any output η; its
293    /// Jacobian is an `(2 * n, p_wiggle)` zero matrix.
294    ///
295    /// - block 0 (mu):        output 0 = design rows, output 1 = zeros
296    /// - block 1 (log_sigma): output 0 = zeros, output 1 = design rows
297    /// - block 2 (wiggle):    all zeros (nonlinear link modulation)
298    pub fn block_effective_jacobian(
299        specs: &[ParameterBlockSpec],
300        block_idx: usize,
301    ) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
302        crate::block_layout::block_jacobian::AdditiveWiggleBlockLayout {
303            family: "GaussianLocationScaleWiggleFamily",
304            n_outputs: 2,
305            additive_blocks: &[Self::BLOCK_MU, Self::BLOCK_LOG_SIGMA],
306            wiggle_block: Some(Self::BLOCK_WIGGLE),
307        }
308        .block_effective_jacobian(specs, block_idx)
309    }
310}
311
312/// Row-coefficient bundle for the GLS Wiggle joint second directional
313/// derivative, shared by the matrix-free operator and the dense
314/// `_from_designs` assemblies. Holds exactly the quantities both consumers
315/// read downstream of the (identical) coefficient computation.
316pub(crate) struct GlsWiggleSecondDirCoeffs {
317    pub(crate) coeff_mm_uv: Array1<f64>,
318    pub(crate) coeff_ml_uv: Array1<f64>,
319    pub(crate) coeff_ll_uv: Array1<f64>,
320    pub(crate) a_u: Array1<f64>,
321    pub(crate) a_v: Array1<f64>,
322    pub(crate) a_uv: Array1<f64>,
323    pub(crate) c_u: Array1<f64>,
324    pub(crate) c_v: Array1<f64>,
325    pub(crate) c_uv: Array1<f64>,
326    pub(crate) l_u: Array1<f64>,
327    pub(crate) l_v: Array1<f64>,
328    pub(crate) l_uv: Array1<f64>,
329    pub(crate) dw_u: Array1<f64>,
330    pub(crate) dw_v: Array1<f64>,
331    pub(crate) dw_uv: Array1<f64>,
332}
333
334/// The two probe directions resolved to row space for the GLS Wiggle joint
335/// second directional derivative: `xi`/`zeta` are the X_mu/X_ls contractions,
336/// and `q`/`s1`/`g2` are the mixed first/second-derivative wiggle pieces.
337pub(crate) struct GlsWiggleDirPieces<'a> {
338    pub(crate) zeta_u: &'a Array1<f64>,
339    pub(crate) zeta_v: &'a Array1<f64>,
340    pub(crate) q_u: &'a Array1<f64>,
341    pub(crate) q_v: &'a Array1<f64>,
342    pub(crate) q_uv: &'a Array1<f64>,
343    pub(crate) s1_u: &'a Array1<f64>,
344    pub(crate) s1_v: &'a Array1<f64>,
345    pub(crate) s1_uv: &'a Array1<f64>,
346    pub(crate) g2_u: &'a Array1<f64>,
347    pub(crate) g2_v: &'a Array1<f64>,
348    pub(crate) g2_uv: &'a Array1<f64>,
349}
350
351/// Compute the shared GLS Wiggle second-directional row coefficients from the
352/// per-row scalars, wiggle geometry, and the resolved probe directions.
353pub(crate) fn gls_wiggle_second_directional_coeffs(
354    rows: &GaussianJointRowScalars,
355    geom: &GaussianLocationScaleWiggleGeometry,
356    dir: &GlsWiggleDirPieces<'_>,
357) -> GlsWiggleSecondDirCoeffs {
358    let GlsWiggleDirPieces {
359        zeta_u,
360        zeta_v,
361        q_u,
362        q_v,
363        q_uv,
364        s1_u,
365        s1_v,
366        s1_uv,
367        g2_u,
368        g2_v,
369        g2_uv,
370    } = *dir;
371    let szeta_u = &rows.kappa * zeta_u;
372    let szeta_v = &rows.kappa * zeta_v;
373    let zeta_u_zeta_v = zeta_u * zeta_v;
374    let dw_u = -2.0 * &rows.w * &szeta_u;
375    let dw_v = -2.0 * &rows.w * &szeta_v;
376    let dw_uv =
377        4.0 * &rows.w * &(&szeta_u * &szeta_v) - 2.0 * &rows.w * &rows.kappa_prime * &zeta_u_zeta_v;
378    let dm_u = -(&rows.w * q_u) - &(2.0 * &rows.m * &szeta_u);
379    let dm_v = -(&rows.w * q_v) - &(2.0 * &rows.m * &szeta_v);
380    let dm_uv = &(2.0 * &rows.w * &(q_u * &szeta_v + q_v * &szeta_u)) - &(&rows.w * q_uv)
381        + &(4.0 * &rows.m * &(&szeta_u * &szeta_v))
382        - 2.0 * &rows.m * &rows.kappa_prime * &zeta_u_zeta_v;
383    let coeff_mm_uv = &(&dw_uv * &geom.dq_dq0.mapv(|v| v * v))
384        + &(2.0 * &dw_u * &geom.dq_dq0 * s1_v)
385        + &(2.0 * &dw_v * &geom.dq_dq0 * s1_u)
386        + &(2.0 * &rows.w * s1_u * s1_v)
387        + &(2.0 * &rows.w * &geom.dq_dq0 * s1_uv)
388        - &(&dm_uv * &geom.d2q_dq02)
389        - &(&dm_u * g2_v)
390        - &(&dm_v * g2_u)
391        - &(&rows.m * g2_uv);
392    let n = rows.m.len();
393    // H_{μ,ls} ≡ Fisher 0 (mean⊥scale orthogonality; the wiggle and μ both
394    // enter the mean, log σ is the only scale block), so every β-directional
395    // derivative — including this second-order one — is identically 0.
396    let coeff_ml_uv = Array1::<f64>::zeros(n);
397    // Second directional derivative of the Fisher (log σ, log σ) block
398    // coeff_ll = 2κ²a (#566). η_ls is linear in β (no zeta_uv), so the only
399    // surviving term is ∂²(2κ²a)/∂η² · zeta_u·zeta_v = 4a(κ'²+κκ'')·zeta_u·zeta_v
400    // — matching the dense helper `d_uv` (gaussian_jointsecond_directionalweights).
401    let coeff_ll_uv = 4.0
402        * &rows.obs_weight
403        * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
404        * &zeta_u_zeta_v;
405
406    let a_u = &dw_u * &geom.dq_dq0 + &rows.w * s1_u;
407    let a_v = &dw_v * &geom.dq_dq0 + &rows.w * s1_v;
408    let a_uv = &dw_uv * &geom.dq_dq0 + &dw_u * s1_v + &dw_v * s1_u + &rows.w * s1_uv;
409    let c_u = -&dm_u;
410    let c_v = -&dm_v;
411    let c_uv = -&dm_uv;
412    // H_{ls,w} ≡ Fisher 0 (wiggle is mean-side; mean⊥scale), so all of its
413    // β-directional derivatives are 0.
414    let l_u = Array1::<f64>::zeros(n);
415    let l_v = Array1::<f64>::zeros(n);
416    let l_uv = Array1::<f64>::zeros(n);
417
418    GlsWiggleSecondDirCoeffs {
419        coeff_mm_uv,
420        coeff_ml_uv,
421        coeff_ll_uv,
422        a_u,
423        a_v,
424        a_uv,
425        c_u,
426        c_v,
427        c_uv,
428        l_u,
429        l_v,
430        l_uv,
431        dw_u,
432        dw_v,
433        dw_uv,
434    }
435}
436
437impl GaussianLocationScaleWiggleFamily {
438    pub(crate) fn exact_newton_joint_hessian_for_specs(
439        &self,
440        block_states: &[ParameterBlockState],
441        specs: Option<&[ParameterBlockSpec]>,
442    ) -> Result<Option<Array2<f64>>, String> {
443        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
444            return Ok(None);
445        };
446        self.exact_newton_joint_hessian_from_designs(block_states, &xmu, &x_ls)
447    }
448
449    pub(crate) fn exact_newton_joint_hessian_directional_derivative_for_specs(
450        &self,
451        block_states: &[ParameterBlockState],
452        specs: Option<&[ParameterBlockSpec]>,
453        d_beta_flat: &Array1<f64>,
454    ) -> Result<Option<Array2<f64>>, String> {
455        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
456            return Ok(None);
457        };
458        self.exact_newton_joint_hessian_directional_derivative_from_designs(
459            block_states,
460            &xmu,
461            &x_ls,
462            d_beta_flat,
463        )
464    }
465
466    pub(crate) fn exact_newton_joint_hessian_second_directional_derivative_for_specs(
467        &self,
468        block_states: &[ParameterBlockState],
469        specs: Option<&[ParameterBlockSpec]>,
470        d_beta_u_flat: &Array1<f64>,
471        d_beta_v_flat: &Array1<f64>,
472    ) -> Result<Option<Array2<f64>>, String> {
473        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
474            return Ok(None);
475        };
476        self.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
477            block_states,
478            &xmu,
479            &x_ls,
480            d_beta_u_flat,
481            d_beta_v_flat,
482        )
483    }
484
485    pub(crate) fn exact_newton_joint_psi_direction(
486        &self,
487        block_states: &[ParameterBlockState],
488        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
489        psi_index: usize,
490        xmu: &Array2<f64>,
491        x_ls: &Array2<f64>,
492        policy: &gam_runtime::resource::ResourcePolicy,
493    ) -> Result<Option<LocationScaleJointPsiDirection>, String> {
494        let Some(parts) = locscale_joint_psi_direction_parts(
495            block_states,
496            derivative_blocks,
497            psi_index,
498            self.y.len(),
499            xmu.ncols(),
500            x_ls.ncols(),
501            Self::BLOCK_MU,
502            Self::BLOCK_LOG_SIGMA,
503            3,
504            "GaussianLocationScaleWiggleFamily",
505            "mu",
506            policy,
507        )?
508        else {
509            return Ok(None);
510        };
511        Ok(Some(LocationScaleJointPsiDirection {
512            block_idx: parts.block_idx,
513            local_idx: parts.local_idx,
514            z_primary_psi: parts.primary_z,
515            z_ls_psi: parts.log_sigma_z,
516            x_primary_psi: parts.primary_psi,
517            x_ls_psi: parts.log_sigma_psi,
518        }))
519    }
520
521    pub(crate) fn exact_newton_joint_psisecond_design_drifts(
522        &self,
523        block_states: &[ParameterBlockState],
524        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
525        psi_a: &LocationScaleJointPsiDirection,
526        psi_b: &LocationScaleJointPsiDirection,
527        xmu: &Array2<f64>,
528        x_ls: &Array2<f64>,
529    ) -> Result<LocationScaleJointPsiSecondDrifts, String> {
530        locscale_joint_psisecond_design_drifts(
531            block_states,
532            derivative_blocks,
533            psi_a,
534            psi_b,
535            LocScalePsiDriftConfig {
536                n: self.y.len(),
537                p_primary: xmu.ncols(),
538                p_log_sigma: x_ls.ncols(),
539                primary_block_idx: Self::BLOCK_MU,
540                log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
541                family_name: "GaussianLocationScaleWiggleFamily",
542                primary_label: "mu",
543                policy: &self.policy,
544            },
545        )
546    }
547
548    /// Compute the rowwise Hessian pieces shared by the dense path and the
549    /// matrix-free workspace operator. The same coefficients reconstruct the
550    /// dense p×p matrix or apply `Hv` directly without ever forming it.
551    pub(crate) fn wiggle_hessian_row_pieces(
552        &self,
553        block_states: &[ParameterBlockState],
554    ) -> Result<GaussianLocationScaleWiggleHessianRowPieces, String> {
555        validate_block_count::<GamlssError>(
556            "GaussianLocationScaleWiggleFamily",
557            3,
558            block_states.len(),
559        )?;
560        let q0 = &block_states[Self::BLOCK_MU].eta;
561        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
562        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
563        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
564        let n = self.y.len();
565        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
566            return Err(GamlssError::DimensionMismatch {
567                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
568            }
569            .into());
570        }
571        let q = q0 + etaw;
572        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
573        if geom.basis.ncols() != betaw.len() {
574            return Err(GamlssError::DimensionMismatch { reason: format!(
575                "GaussianLocationScaleWiggleFamily wiggle basis/beta mismatch: basis has {} columns but beta has {} entries",
576                geom.basis.ncols(),
577                betaw.len()
578            ) }.into());
579        }
580        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
581        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
582        // Gaussian mean⊥scale Fisher orthogonality. μ (mu) AND the wiggle both
583        // enter the MEAN q = q0 + B(q0)·βw (see `let q = q0 + etaw`); log σ is
584        // the only scale-side block. The Fisher (expected) cross between any
585        // mean-side parameter and log σ is exactly 0: H_{μ,ls} = 2κm·dq_dq0 and
586        // H_{ls,w} = 2κm both carry m = r·w = (y−q)·weight/σ², and E[m] =
587        // E[r]·w = 0. The dense and matrix-free workspace paths SHARE these row
588        // pieces, so setting the cross coeffs to 0 fixes the curvature object
589        // (the observed 2κm value) for both. Diagonal/same-side blocks
590        // (coeff_mm within mean, coeff_ll within scale, coeff_mw_* within mean,
591        // coeff_ww within mean) are untouched.
592        let coeff_ml = Array1::<f64>::zeros(n);
593        // Fisher/expected (log σ, log σ) information E[H_{ls,ls}] = 2κ²a (#566):
594        // the observed 2κ²n + κ'(a−n) collapses at small residuals and
595        // over-smooths the scale; E[n]=a gives the residual-free 2κ²a.
596        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
597        let coeff_mw_b = &rows.w * &geom.dq_dq0;
598        let coeff_mw_d = -&rows.m;
599        // ls↔wiggle is a mean⊥scale cross (wiggle is mean-side): Fisher 0.
600        let coeff_lw_b = Array1::<f64>::zeros(n);
601        let coeff_ww = rows.w.clone();
602        Ok(GaussianLocationScaleWiggleHessianRowPieces {
603            coeff_mm,
604            coeff_ml,
605            coeff_ll,
606            coeff_mw_b,
607            coeff_mw_d,
608            coeff_lw_b,
609            coeff_ww,
610            basis: geom.basis,
611            basis_d1: geom.basis_d1,
612        })
613    }
614
615    pub(crate) fn exact_newton_joint_hessian_from_designs(
616        &self,
617        block_states: &[ParameterBlockState],
618        xmu: &Array2<f64>,
619        x_ls: &Array2<f64>,
620    ) -> Result<Option<Array2<f64>>, String> {
621        let pieces = self.wiggle_hessian_row_pieces(block_states)?;
622        Ok(Some(pieces.assemble_dense(xmu, x_ls)?))
623    }
624
625    pub(crate) fn exact_newton_joint_hessian_directional_derivative_from_designs(
626        &self,
627        block_states: &[ParameterBlockState],
628        xmu: &Array2<f64>,
629        x_ls: &Array2<f64>,
630        d_beta_flat: &Array1<f64>,
631    ) -> Result<Option<Array2<f64>>, String> {
632        validate_block_count::<GamlssError>(
633            "GaussianLocationScaleWiggleFamily",
634            3,
635            block_states.len(),
636        )?;
637        let pmu = xmu.ncols();
638        let p_ls = x_ls.ncols();
639        let q0 = &block_states[Self::BLOCK_MU].eta;
640        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
641        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
642        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
643        let n = self.y.len();
644        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
645        let (umu, u_ls, uw) = layout.split_three(
646            d_beta_flat,
647            "GaussianLocationScaleWiggleFamily exact joint directional Hessian",
648        )?;
649        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
650            return Err(GamlssError::DimensionMismatch {
651                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
652            }
653            .into());
654        }
655        let q = q0 + etaw;
656        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
657        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
658        let xi = fast_av(xmu, &umu);
659        let zeta = fast_av(x_ls, &u_ls);
660        // logb κ-scaled η_ls direction; κ' = dκ/dη_ls = κ(1−κ).
661        let szeta = &rows.kappa * &zeta;
662        let phi = fast_av(&geom.basis, &uw);
663        let mut q_u = &geom.dq_dq0 * &xi;
664        q_u += &phi;
665        let mut s1_u = &geom.d2q_dq02 * &xi;
666        s1_u += &fast_av(&geom.basis_d1, &uw);
667        let mut g2_u = &geom.d3q_dq03 * &xi;
668        g2_u += &fast_av(&geom.basis_d2, &uw);
669        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
670        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
671        let dw_u = -2.0 * &rows.w * &szeta;
672        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
673
674        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
675            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
676            - &(&dm_u * &geom.d2q_dq02)
677            - &(&rows.m * &g2_u);
678        // Static blocks: H_{μ,ls} = Fisher 0 (mean⊥scale); H_{ls,ls} = Fisher
679        // 2κ²a (#566). H_{μ,ls} ≡ 0 for all β, so its directional derivative is
680        // also identically 0. The Fisher (ls,ls) block 2κ²a depends only on
681        // η_ls (a is the constant prior weight), so its directional derivative
682        // is 4κκ'a·zeta.
683        let coeff_ml_u = Array1::<f64>::zeros(n);
684        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
685        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
686        let c_u = -&dm_u;
687        // ls↔wiggle cross block: Fisher 0 (wiggle is mean-side), so its
688        // directional derivative is 0 as well.
689        let l_u = Array1::<f64>::zeros(n);
690        let zeros_ls_b1 = Array1::<f64>::zeros(n);
691
692        let h_mm = xt_diag_x_dense(xmu, &coeff_mm_u)?;
693        let h_ml = xt_diag_y_dense(xmu, &coeff_ml_u, x_ls)?;
694        let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_u)?;
695        let h_mw = xt_diag_y_dense(xmu, &a_u, &geom.basis)?
696            + &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_u)?
697            + &xt_diag_y_dense(xmu, &c_u, &geom.basis_d1)?
698            + &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_u)?;
699        let h_lw = xt_diag_y_dense(x_ls, &l_u, &geom.basis)?
700            + &xt_diag_y_dense(x_ls, &zeros_ls_b1, &basis_u)?;
701        let a_ww = xt_diag_y_dense(&basis_u, &rows.w, &geom.basis)?;
702        let h_ww = &a_ww + &a_ww.t() + &xt_diag_x_dense(&geom.basis, &dw_u)?;
703        Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
704            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
705        )))
706    }
707
708    /// Build a matrix-free `RowCoeffOperator` for the GLS Wiggle joint
709    /// directional derivative `D_β H_L[u]`. Output dimension is
710    /// `pmu + p_ls + pw`. Channels (in order): X_mu, X_ls, B, B', B''.
711    pub(crate) fn gls_wiggle_directional_operator(
712        &self,
713        block_states: &[ParameterBlockState],
714        xmu_arc: Arc<Array2<f64>>,
715        x_ls_arc: Arc<Array2<f64>>,
716        d_beta_flat: &Array1<f64>,
717    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
718        validate_block_count::<GamlssError>(
719            "GaussianLocationScaleWiggleFamily",
720            3,
721            block_states.len(),
722        )?;
723        let pmu = xmu_arc.ncols();
724        let p_ls = x_ls_arc.ncols();
725        let q0_eta = &block_states[Self::BLOCK_MU].eta;
726        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
727        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
728        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
729        let n = self.y.len();
730        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
731        let (umu, u_ls, uw) =
732            layout.split_three(d_beta_flat, "GLS Wiggle joint dH operator d_beta")?;
733        if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
734            return Err(GamlssError::DimensionMismatch {
735                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
736            }
737            .into());
738        }
739        let q = q0_eta + etaw;
740        let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
741        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
742        let xi = fast_av(xmu_arc.as_ref(), &umu);
743        let zeta = fast_av(x_ls_arc.as_ref(), &u_ls);
744        let szeta = &rows.kappa * &zeta;
745        let phi = fast_av(&geom.basis, &uw);
746        let mut q_u = &geom.dq_dq0 * &xi;
747        q_u += &phi;
748        let mut s1_u = &geom.d2q_dq02 * &xi;
749        s1_u += &fast_av(&geom.basis_d1, &uw);
750        let mut g2_u = &geom.d3q_dq03 * &xi;
751        g2_u += &fast_av(&geom.basis_d2, &uw);
752        let dw_u = -2.0 * &rows.w * &szeta;
753        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
754
755        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
756            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
757            - &(&dm_u * &geom.d2q_dq02)
758            - &(&rows.m * &g2_u);
759        // H_{μ,ls} ≡ Fisher 0 (mean⊥scale); its directional derivative is 0.
760        let coeff_ml_u = Array1::<f64>::zeros(n);
761        // Fisher (ls,ls) 2κ²a directional derivative: 4κκ'a·zeta (#566).
762        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
763        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
764        let c_u = -&dm_u;
765        // H_{ls,w} ≡ Fisher 0 (wiggle is mean-side); its derivative is 0 in
766        // both the B channel (l_u) and the B' channel (coeff_ls_b1).
767        let l_u = Array1::<f64>::zeros(n);
768
769        // Pair-coefficient bundles. For (0=X_mu, 3=B'): combine
770        // `xt_diag_y_dense(xmu, &(w·dq_dq0), &basis_u=diag(xi)·B')`
771        // (giving coeff `w·dq_dq0·xi`) with `xt_diag_y_dense(xmu, &c_u, &B')`
772        // (coeff `c_u`).
773        let coeff_m_b1 = &(&rows.w * &geom.dq_dq0 * &xi) + &c_u;
774        // (0=X_mu, 4=B''): from `xt_diag_y_dense(xmu, &(-m), &basis1_u=diag(xi)·B'')`.
775        let coeff_m_b2 = -(&rows.m * &xi);
776        // (1=X_ls, 3=B'): ls↔wiggle Fisher-0 cross → zero.
777        let coeff_ls_b1 = Array1::<f64>::zeros(n);
778        // (2=B, 3=B'): a_ww + a_ww^T where a_ww = (diag(xi)·B')^T diag(w) B
779        // = B'^T diag(w·xi) B. The symmetric pair contribution in
780        // `RowCoeffOperator` reproduces a_ww + a_ww^T with c = w·xi.
781        let coeff_b_b1 = &rows.w * &xi;
782
783        let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
784        let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
785        let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
786        let pw = basis.ncols();
787
788        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
789            vec![pmu, p_ls, pw],
790            vec![
791                (0, xmu_arc),
792                (1, x_ls_arc),
793                (2, basis),
794                (2, basis_d1),
795                (2, basis_d2),
796            ],
797            vec![
798                // (X_mu, X_mu) ← `xt_diag_x_dense(xmu, &coeff_mm_u)`
799                (0, 0, coeff_mm_u),
800                // (X_mu, X_ls) ← `xt_diag_y_dense(xmu, &coeff_ml_u, x_ls)`
801                (0, 1, coeff_ml_u),
802                // (X_ls, X_ls) ← `xt_diag_x_dense(x_ls, &coeff_ll_u)`
803                (1, 1, coeff_ll_u),
804                // (X_mu, B) ← `xt_diag_y_dense(xmu, &a_u, &geom.basis)`
805                (0, 2, a_u),
806                // (X_mu, B') ← `xt_diag_y_dense(xmu, w·dq_dq0, basis_u=diag(ξ)·B') + xt_diag_y_dense(xmu, c_u, B')`
807                (0, 3, coeff_m_b1),
808                // (X_mu, B'') ← `xt_diag_y_dense(xmu, -m, basis1_u=diag(ξ)·B'')`
809                (0, 4, coeff_m_b2),
810                // (X_ls, B) ← `xt_diag_y_dense(x_ls, &l_u, &geom.basis)`
811                (1, 2, l_u),
812                // (X_ls, B') ← ls↔wiggle is mean⊥scale Fisher 0, so coeff_ls_b1 = 0
813                (1, 3, coeff_ls_b1),
814                // (B, B) ← `xt_diag_x_dense(&geom.basis, &dw_u)`
815                (2, 2, dw_u),
816                // (B, B') ← a_ww + a_ww^T = B^T diag(w·ξ) B' + B'^T diag(w·ξ) B
817                (2, 3, coeff_b_b1),
818            ],
819            n,
820        ))))
821    }
822
823    /// Build a matrix-free `RowCoeffOperator` for the GLS Wiggle joint
824    /// second directional derivative `D²_β H_L[u, v]`. Channels: X_mu,
825    /// X_ls, B, B', B'', B'''. Pair list mirrors the 8-term `xt_diag_*`
826    /// assembly in `_from_designs`, with row-coefficient bundles that
827    /// absorb the `ξ_u, ξ_v, ξ_u·ξ_v` row factors arising from
828    /// `basis_u = diag(ξ_u)·B'`, `basis_uv = diag(ξ_u·ξ_v)·B''`, etc.
829    pub(crate) fn gls_wiggle_second_directional_operator(
830        &self,
831        block_states: &[ParameterBlockState],
832        xmu_arc: Arc<Array2<f64>>,
833        x_ls_arc: Arc<Array2<f64>>,
834        d_beta_u: &Array1<f64>,
835        d_beta_v: &Array1<f64>,
836    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
837        validate_block_count::<GamlssError>(
838            "GaussianLocationScaleWiggleFamily",
839            3,
840            block_states.len(),
841        )?;
842        let pmu = xmu_arc.ncols();
843        let p_ls = x_ls_arc.ncols();
844        let q0_eta = &block_states[Self::BLOCK_MU].eta;
845        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
846        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
847        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
848        let n = self.y.len();
849        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
850        let (umu, u_ls, uw) = layout.split_three(d_beta_u, "GLS Wiggle d2H operator (u)")?;
851        let (vmu, v_ls, vw) = layout.split_three(d_beta_v, "GLS Wiggle d2H operator (v)")?;
852        if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
853            return Err(GamlssError::DimensionMismatch {
854                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
855            }
856            .into());
857        }
858        let q = q0_eta + etaw;
859        let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
860        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
861
862        let xi_u = fast_av(xmu_arc.as_ref(), &umu);
863        let xi_v = fast_av(xmu_arc.as_ref(), &vmu);
864        let zeta_u = fast_av(x_ls_arc.as_ref(), &u_ls);
865        let zeta_v = fast_av(x_ls_arc.as_ref(), &v_ls);
866        let phi_u = fast_av(&geom.basis, &uw);
867        let phi_v = fast_av(&geom.basis, &vw);
868        let b1u = fast_av(&geom.basis_d1, &uw);
869        let b1v = fast_av(&geom.basis_d1, &vw);
870        let b2u = fast_av(&geom.basis_d2, &uw);
871        let b2v = fast_av(&geom.basis_d2, &vw);
872        let b3u = fast_av(&geom.basis_d3, &uw);
873        let b3v = fast_av(&geom.basis_d3, &vw);
874
875        let mut q_u = &geom.dq_dq0 * &xi_u;
876        q_u += &phi_u;
877        let mut q_v = &geom.dq_dq0 * &xi_v;
878        q_v += &phi_v;
879        let mut s1_u = &geom.d2q_dq02 * &xi_u;
880        s1_u += &b1u;
881        let mut s1_v = &geom.d2q_dq02 * &xi_v;
882        s1_v += &b1v;
883        let mut g2_u = &geom.d3q_dq03 * &xi_u;
884        g2_u += &b2u;
885        let mut g2_v = &geom.d3q_dq03 * &xi_v;
886        g2_v += &b2v;
887        let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
888        let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
889        let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
890
891        let GlsWiggleSecondDirCoeffs {
892            coeff_mm_uv,
893            coeff_ml_uv,
894            coeff_ll_uv,
895            a_u,
896            a_v,
897            a_uv,
898            c_u,
899            c_v,
900            c_uv,
901            l_u,
902            l_v,
903            l_uv,
904            dw_u,
905            dw_v,
906            dw_uv,
907        } = gls_wiggle_second_directional_coeffs(
908            &rows,
909            &geom,
910            &GlsWiggleDirPieces {
911                zeta_u: &zeta_u,
912                zeta_v: &zeta_v,
913                q_u: &q_u,
914                q_v: &q_v,
915                q_uv: &q_uv,
916                s1_u: &s1_u,
917                s1_v: &s1_v,
918                s1_uv: &s1_uv,
919                g2_u: &g2_u,
920                g2_v: &g2_v,
921                g2_uv: &g2_uv,
922            },
923        );
924
925        // Pair-coefficient bundles. Cross-block (mu, B'/B'') absorb basis_u/v/uv row scaling.
926        let xi_u_xi_v = &xi_u * &xi_v;
927        let coeff_m_b1 = &(&a_u * &xi_v) + &(&a_v * &xi_u) + &c_uv;
928        let coeff_m_b2 = &(&rows.w * &geom.dq_dq0 * &xi_u_xi_v) + &(&c_u * &xi_v) + &(&c_v * &xi_u);
929        let coeff_m_b3 = -(&rows.m * &xi_u_xi_v);
930        // ls↔wiggle is Fisher-0 (mean⊥scale): the B' (coeff_ls_b1) and B''
931        // (coeff_ls_b2) channels of its second directional derivative vanish.
932        let coeff_ls_b1 = &(&l_u * &xi_v) + &(&l_v * &xi_u);
933        let coeff_ls_b2 = Array1::<f64>::zeros(n);
934        // Wiggle-wiggle from a_ab + a_ab^T + a_ij + a_ij^T + a_iwj + a_iwj^T + a_jwi + a_jwi^T:
935        //   a_ab = B''^T diag(w·ξ_uξ_v) B    → pair (B, B'', w·ξ_uξ_v)
936        //   a_ij = B'^T diag(w·ξ_uξ_v) B'   → pair (B', B', 2·w·ξ_uξ_v)  (a_ij + a_ij^T)
937        //   a_iwj+a_jwi = B'^T diag(dw_v·ξ_u + dw_u·ξ_v) B → pair (B, B', sum)
938        let coeff_b_b1 = &(&dw_u * &xi_v) + &(&dw_v * &xi_u);
939        let coeff_b_b2 = &rows.w * &xi_u_xi_v;
940        let coeff_b1_b1 = 2.0 * &(&rows.w * &xi_u_xi_v);
941
942        let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
943        let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
944        let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
945        let basis_d3: Arc<Array2<f64>> = Arc::new(geom.basis_d3.clone());
946        let pw = basis.ncols();
947
948        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
949            vec![pmu, p_ls, pw],
950            vec![
951                (0, xmu_arc),
952                (1, x_ls_arc),
953                (2, basis),
954                (2, basis_d1),
955                (2, basis_d2),
956                (2, basis_d3),
957            ],
958            vec![
959                // (X_mu, X_mu) ← `xt_diag_x_dense(xmu, &coeff_mm_uv)`
960                (0, 0, coeff_mm_uv),
961                // (X_mu, X_ls) ← `xt_diag_y_dense(xmu, &coeff_ml_uv, x_ls)`
962                (0, 1, coeff_ml_uv),
963                // (X_ls, X_ls) ← `xt_diag_x_dense(x_ls, &coeff_ll_uv)`
964                (1, 1, coeff_ll_uv),
965                // (X_mu, B) ← `xt_diag_y_dense(xmu, &a_uv, &geom.basis)`
966                (0, 2, a_uv),
967                // (X_mu, B') ← combined `a_u·ξ_v + a_v·ξ_u + c_uv` from
968                // `xt_diag_y_dense(xmu, a_u, basis_v) + xt_diag_y_dense(xmu,
969                // a_v, basis_u) + xt_diag_y_dense(xmu, c_uv, B')`
970                (0, 3, coeff_m_b1),
971                // (X_mu, B'') ← `xt_diag_y_dense(xmu, w·dq_dq0, basis_uv) +
972                // xt_diag_y_dense(xmu, c_u, basis1_v) + xt_diag_y_dense(xmu,
973                // c_v, basis1_u)` (basis_uv = diag(ξ_uξ_v)·B'';
974                // basis1_{u,v} = diag(ξ_{u,v})·B'')
975                (0, 4, coeff_m_b2),
976                // (X_mu, B''') ← `xt_diag_y_dense(xmu, -m, basis1_uv)`
977                // with basis1_uv = diag(ξ_uξ_v)·B'''
978                (0, 5, coeff_m_b3),
979                // (X_ls, B) ← `xt_diag_y_dense(x_ls, &l_uv, &geom.basis)`
980                (1, 2, l_uv),
981                // (X_ls, B') ← combined from `xt_diag_y_dense(x_ls, l_u,
982                // basis_v) + xt_diag_y_dense(x_ls, l_v, basis_u)` =
983                // `l_u·ξ_v + l_v·ξ_u`
984                (1, 3, coeff_ls_b1),
985                // (X_ls, B'') ← ls↔wiggle is mean⊥scale Fisher 0, so coeff_ls_b2 = 0
986                (1, 4, coeff_ls_b2),
987                // (B, B) ← `xt_diag_x_dense(&geom.basis, &dw_uv)`
988                (2, 2, dw_uv),
989                // (B, B') ← combined `a_iwj + a_iwj^T + a_jwi + a_jwi^T` =
990                // B^T diag(dw_u·ξ_v + dw_v·ξ_u) B' + B'^T diag(...) B
991                (2, 3, coeff_b_b1),
992                // (B, B'') ← `a_ab + a_ab^T` with a_ab = B''^T diag(w·ξ_uξ_v) B
993                (2, 4, coeff_b_b2),
994                // (B', B') ← `a_ij + a_ij^T = 2·B'^T diag(w·ξ_uξ_v) B'`;
995                // diagonal pair coeff doubles to absorb the factor of 2
996                (3, 3, coeff_b1_b1),
997            ],
998            n,
999        ))))
1000    }
1001
1002    pub(crate) fn exact_newton_joint_hessiansecond_directional_derivative_from_designs(
1003        &self,
1004        block_states: &[ParameterBlockState],
1005        xmu: &Array2<f64>,
1006        x_ls: &Array2<f64>,
1007        d_beta_u_flat: &Array1<f64>,
1008        d_beta_v_flat: &Array1<f64>,
1009    ) -> Result<Option<Array2<f64>>, String> {
1010        validate_block_count::<GamlssError>(
1011            "GaussianLocationScaleWiggleFamily",
1012            3,
1013            block_states.len(),
1014        )?;
1015        let pmu = xmu.ncols();
1016        let p_ls = x_ls.ncols();
1017        let q0 = &block_states[Self::BLOCK_MU].eta;
1018        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1019        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1020        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1021        let n = self.y.len();
1022        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
1023        let (umu, u_ls, uw) = layout.split_three(
1024            d_beta_u_flat,
1025            "GaussianLocationScaleWiggleFamily exact joint second directional Hessian (u)",
1026        )?;
1027        let (vmu, v_ls, vw) = layout.split_three(
1028            d_beta_v_flat,
1029            "GaussianLocationScaleWiggleFamily exact joint second directional Hessian (v)",
1030        )?;
1031        if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
1032            return Err(GamlssError::DimensionMismatch {
1033                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
1034            }
1035            .into());
1036        }
1037        let q = q0 + etaw;
1038        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1039        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1040
1041        let xi_u = fast_av(xmu, &umu);
1042        let xi_v = fast_av(xmu, &vmu);
1043        let zeta_u = fast_av(x_ls, &u_ls);
1044        let zeta_v = fast_av(x_ls, &v_ls);
1045        let phi_u = fast_av(&geom.basis, &uw);
1046        let phi_v = fast_av(&geom.basis, &vw);
1047        let b1u = fast_av(&geom.basis_d1, &uw);
1048        let b1v = fast_av(&geom.basis_d1, &vw);
1049        let b2u = fast_av(&geom.basis_d2, &uw);
1050        let b2v = fast_av(&geom.basis_d2, &vw);
1051        let b3u = fast_av(&geom.basis_d3, &uw);
1052        let b3v = fast_av(&geom.basis_d3, &vw);
1053
1054        let mut q_u = &geom.dq_dq0 * &xi_u;
1055        q_u += &phi_u;
1056        let mut q_v = &geom.dq_dq0 * &xi_v;
1057        q_v += &phi_v;
1058        let mut s1_u = &geom.d2q_dq02 * &xi_u;
1059        s1_u += &b1u;
1060        let mut s1_v = &geom.d2q_dq02 * &xi_v;
1061        s1_v += &b1v;
1062        let mut g2_u = &geom.d3q_dq03 * &xi_u;
1063        g2_u += &b2u;
1064        let mut g2_v = &geom.d3q_dq03 * &xi_v;
1065        g2_v += &b2v;
1066        let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
1067        let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
1068        let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
1069
1070        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi_u)?;
1071        let basis_v = scale_matrix_rows(&geom.basis_d1, &xi_v)?;
1072        let basis_uv = scale_matrix_rows(&geom.basis_d2, &(&xi_u * &xi_v))?;
1073        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi_u)?;
1074        let basis1_v = scale_matrix_rows(&geom.basis_d2, &xi_v)?;
1075        let basis1_uv = scale_matrix_rows(&geom.basis_d3, &(&xi_u * &xi_v))?;
1076
1077        // Shared κ-aware second-directional row coefficients (κ' = κ(1−κ),
1078        // κ'' = κ(1−κ)(1−2κ), κ''' = κ''(1−2κ) − 2(κ')²): identical to the
1079        // matrix-free operator path, factored into one helper.
1080        let GlsWiggleSecondDirCoeffs {
1081            coeff_mm_uv,
1082            coeff_ml_uv,
1083            coeff_ll_uv,
1084            a_u,
1085            a_v,
1086            a_uv,
1087            c_u,
1088            c_v,
1089            c_uv,
1090            l_u,
1091            l_v,
1092            l_uv,
1093            dw_u,
1094            dw_v,
1095            dw_uv,
1096        } = gls_wiggle_second_directional_coeffs(
1097            &rows,
1098            &geom,
1099            &GlsWiggleDirPieces {
1100                zeta_u: &zeta_u,
1101                zeta_v: &zeta_v,
1102                q_u: &q_u,
1103                q_v: &q_v,
1104                q_uv: &q_uv,
1105                s1_u: &s1_u,
1106                s1_v: &s1_v,
1107                s1_uv: &s1_uv,
1108                g2_u: &g2_u,
1109                g2_v: &g2_v,
1110                g2_uv: &g2_uv,
1111            },
1112        );
1113
1114        let h_mm = xt_diag_x_dense(xmu, &coeff_mm_uv)?;
1115        let h_ml = xt_diag_y_dense(xmu, &coeff_ml_uv, x_ls)?;
1116        let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_uv)?;
1117        let h_mw = xt_diag_y_dense(xmu, &a_uv, &geom.basis)?
1118            + &xt_diag_y_dense(xmu, &a_u, &basis_v)?
1119            + &xt_diag_y_dense(xmu, &a_v, &basis_u)?
1120            + &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_uv)?
1121            + &xt_diag_y_dense(xmu, &c_uv, &geom.basis_d1)?
1122            + &xt_diag_y_dense(xmu, &c_u, &basis1_v)?
1123            + &xt_diag_y_dense(xmu, &c_v, &basis1_u)?
1124            + &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_uv)?;
1125        // H_{ls,w} ≡ Fisher 0 (mean⊥scale): l_uv/l_u/l_v are 0 (shared helper)
1126        // and the 2κm·B'' channel vanishes too.
1127        let zeros_ls_b2 = Array1::<f64>::zeros(n);
1128        let h_lw = xt_diag_y_dense(x_ls, &l_uv, &geom.basis)?
1129            + &xt_diag_y_dense(x_ls, &l_u, &basis_v)?
1130            + &xt_diag_y_dense(x_ls, &l_v, &basis_u)?
1131            + &xt_diag_y_dense(x_ls, &zeros_ls_b2, &basis_uv)?;
1132        let a_ab = xt_diag_y_dense(&basis_uv, &rows.w, &geom.basis)?;
1133        let a_ij = xt_diag_y_dense(&basis_u, &rows.w, &basis_v)?;
1134        let a_iwj = xt_diag_y_dense(&basis_u, &dw_v, &geom.basis)?;
1135        let a_jwi = xt_diag_y_dense(&basis_v, &dw_u, &geom.basis)?;
1136        let h_ww = &a_ab
1137            + &a_ab.t()
1138            + &a_ij
1139            + a_ij.t()
1140            + &a_iwj
1141            + a_iwj.t()
1142            + &a_jwi
1143            + a_jwi.t()
1144            + &xt_diag_x_dense(&geom.basis, &dw_uv)?;
1145        Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
1146            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1147        )))
1148    }
1149
1150    pub(crate) fn exact_newton_joint_psi_terms_from_designs(
1151        &self,
1152        block_states: &[ParameterBlockState],
1153        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1154        psi_index: usize,
1155        xmu: &Array2<f64>,
1156        x_ls: &Array2<f64>,
1157    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
1158        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1159            block_states,
1160            derivative_blocks,
1161            psi_index,
1162            xmu,
1163            x_ls,
1164            &self.policy,
1165        )?
1166        else {
1167            return Ok(None);
1168        };
1169        let q0 = &block_states[Self::BLOCK_MU].eta;
1170        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1171        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1172        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1173        let q = q0 + etaw;
1174        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1175        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1176        let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
1177        let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
1178
1179        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1180        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1181        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1182        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1183        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1184        // logb κ-chain on η_ls; e_a = ∂η_ls/∂ψ_a row-direction.
1185        let e_a = &dir_a.z_ls_psi;
1186        let amn = &rows.obs_weight - &rows.n;
1187        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1188        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1189        let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
1190        let s_mu = -&rows.m * &geom.dq_dq0;
1191        let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
1192        let s_ls = &rows.kappa * &amn;
1193        let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
1194        let s_w = -&rows.m;
1195        let s_w_a = -&dm_a;
1196
1197        let objective_psi = (-&rows.m * &q_a + &s_ls * e_a).sum();
1198        let score_psi = gaussian_pack_wiggle_joint_score(
1199            &(xmu_map.transpose_mul(s_mu.view()) + fast_atv(xmu, &s_mu_a)),
1200            &(x_ls_map.transpose_mul(s_ls.view()) + fast_atv(x_ls, &s_ls_a)),
1201            &(fast_atv(&basis_a, &s_w) + fast_atv(&geom.basis, &s_w_a)),
1202        );
1203
1204        // Static blocks under logb. Gaussian mean⊥scale Fisher orthogonality:
1205        // μ AND the wiggle both enter the MEAN q = q0 + B(q0)·βw, so log σ is
1206        // the only scale-side block. The Fisher (expected) cross between any
1207        // mean-side parameter and log σ is exactly 0 because it carries
1208        // m = r·weight/σ² and E[m] = E[r]·weight/σ² = 0:
1209        //   coeff_ml = E[H_{μ,ls}] = 0  (observed 2κmD)
1210        //   l        = E[H_{ls,w}] = 0  (observed 2κm)
1211        // A function identically 0 has 0 ψ-derivatives, so coeff_ml_a and l_a
1212        // vanish too. This mirrors the non-wiggle psi path
1213        // (gaussian_joint_psi_firstweights: hmu_ls = dhmu_ls = 0) and the
1214        // wiggle Newton/REML Hessian path (wiggle_hessian_row_pieces:
1215        // coeff_ml = coeff_lw_b = 0). The observed SCORE (s_mu/s_ls/s_w above)
1216        // stays exact so Fisher scoring still hits the joint MLE; only the
1217        // curvature feeding the REML determinant / IFT correction is the
1218        // (orthogonal) expectation. coeff_ll is the residual-free Fisher
1219        // 2κ²a (#566); its ψ-derivative coeff_ll_a = 4κκ'a·e_a depends only on
1220        // η_ls. Same-side blocks (coeff_mm within mean, a/c the μ↔wiggle
1221        // within-mean cross, coeff_ww within mean) are untouched.
1222        let n = rows.m.len();
1223        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
1224        let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
1225            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
1226            - &(&dm_a * &geom.d2q_dq02)
1227            - &(&rows.m * &g2_a);
1228        let coeff_ml = Array1::<f64>::zeros(n);
1229        let coeff_ml_a = Array1::<f64>::zeros(n);
1230        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
1231        let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
1232        let a = &rows.w * &geom.dq_dq0;
1233        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1234        let c = -&rows.m;
1235        let c_a = -&dm_a;
1236        let l = Array1::<f64>::zeros(n);
1237        let l_a = Array1::<f64>::zeros(n);
1238        let h_mm_a1 = weighted_crossprod_psi_maps(
1239            xmu_map,
1240            coeff_mm.view(),
1241            CustomFamilyPsiLinearMapRef::Dense(xmu),
1242        )?;
1243        let h_mm = &h_mm_a1 + &h_mm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a)?;
1244        let h_ml = weighted_crossprod_psi_maps(
1245            xmu_map,
1246            coeff_ml.view(),
1247            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1248        )? + &weighted_crossprod_psi_maps(
1249            CustomFamilyPsiLinearMapRef::Dense(xmu),
1250            coeff_ml.view(),
1251            x_ls_map,
1252        )? + &xt_diag_y_dense(xmu, &coeff_ml_a, x_ls)?;
1253        let h_ll_a1 = weighted_crossprod_psi_maps(
1254            x_ls_map,
1255            coeff_ll.view(),
1256            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1257        )?;
1258        let h_ll = &h_ll_a1 + &h_ll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a)?;
1259        let h_mw = weighted_crossprod_psi_maps(
1260            xmu_map,
1261            a.view(),
1262            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1263        )? + &xt_diag_y_dense(xmu, &a_a, &geom.basis)?
1264            + &xt_diag_y_dense(xmu, &a, &basis_a)?
1265            + &weighted_crossprod_psi_maps(
1266                xmu_map,
1267                c.view(),
1268                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1269            )?
1270            + &xt_diag_y_dense(xmu, &c_a, &geom.basis_d1)?
1271            + &xt_diag_y_dense(xmu, &c, &basis1_a)?;
1272        let h_lw = weighted_crossprod_psi_maps(
1273            x_ls_map,
1274            l.view(),
1275            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1276        )? + &xt_diag_y_dense(x_ls, &l_a, &geom.basis)?
1277            + &xt_diag_y_dense(x_ls, &l, &basis_a)?;
1278        let h_ww_a1 = xt_diag_y_dense(&basis_a, &rows.w, &geom.basis)?;
1279        let h_ww = &h_ww_a1 + &h_ww_a1.t() + &xt_diag_x_dense(&geom.basis, &dw_a)?;
1280
1281        Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
1282            objective_psi,
1283            score_psi,
1284            hessian_psi: gaussian_pack_wiggle_joint_symmetrichessian(
1285                &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1286            ),
1287            hessian_psi_operator: None,
1288        }))
1289    }
1290
1291    pub(crate) fn exact_newton_joint_psisecond_order_terms_from_designs(
1292        &self,
1293        block_states: &[ParameterBlockState],
1294        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1295        psi_i: usize,
1296        psi_j: usize,
1297        xmu: &Array2<f64>,
1298        x_ls: &Array2<f64>,
1299    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
1300        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1301            block_states,
1302            derivative_blocks,
1303            psi_i,
1304            xmu,
1305            x_ls,
1306            &self.policy,
1307        )?
1308        else {
1309            return Ok(None);
1310        };
1311        let Some(dir_b) = self.exact_newton_joint_psi_direction(
1312            block_states,
1313            derivative_blocks,
1314            psi_j,
1315            xmu,
1316            x_ls,
1317            &self.policy,
1318        )?
1319        else {
1320            return Ok(None);
1321        };
1322        Ok(Some(
1323            self.exact_newton_joint_psisecond_order_terms_from_parts(
1324                block_states,
1325                derivative_blocks,
1326                &dir_a,
1327                &dir_b,
1328                xmu,
1329                x_ls,
1330            )?,
1331        ))
1332    }
1333
1334    pub(crate) fn exact_newton_joint_psisecond_order_terms_from_parts(
1335        &self,
1336        block_states: &[ParameterBlockState],
1337        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1338        dir_a: &LocationScaleJointPsiDirection,
1339        dir_b: &LocationScaleJointPsiDirection,
1340        xmu: &Array2<f64>,
1341        x_ls: &Array2<f64>,
1342    ) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
1343        let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
1344            block_states,
1345            derivative_blocks,
1346            dir_a,
1347            dir_b,
1348            xmu,
1349            x_ls,
1350        )?;
1351        let n = self.y.len();
1352        let xmu_a_map = dir_a.x_primary_psi.as_linear_map_ref();
1353        let x_ls_a_map = dir_a.x_ls_psi.as_linear_map_ref();
1354        let xmu_b_map = dir_b.x_primary_psi.as_linear_map_ref();
1355        let x_ls_b_map = dir_b.x_ls_psi.as_linear_map_ref();
1356        let xmu_ab_map = second_psi_linear_map(
1357            second_drifts.x_primary_ab_action.as_ref(),
1358            second_drifts.x_primary_ab.as_ref(),
1359            n,
1360            xmu.ncols(),
1361        );
1362        let x_ls_ab_map = second_psi_linear_map(
1363            second_drifts.x_ls_ab_action.as_ref(),
1364            second_drifts.x_ls_ab.as_ref(),
1365            n,
1366            x_ls.ncols(),
1367        );
1368        let q0 = &block_states[Self::BLOCK_MU].eta;
1369        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1370        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1371        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1372        let q = q0 + etaw;
1373        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1374        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1375
1376        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1377        let q_b = &geom.dq_dq0 * &dir_b.z_primary_psi;
1378        let q_ab = &(&geom.dq_dq0 * &second_drifts.z_primary_ab)
1379            + &(&geom.d2q_dq02 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi));
1380        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1381        let s1_b = &geom.d2q_dq02 * &dir_b.z_primary_psi;
1382        let s1_ab = &(&geom.d3q_dq03 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
1383            + &(&geom.d2q_dq02 * &second_drifts.z_primary_ab);
1384        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1385        let g2_b = &geom.d3q_dq03 * &dir_b.z_primary_psi;
1386        let g2_ab = &(&geom.d4q_dq04 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
1387            + &(&geom.d3q_dq03 * &second_drifts.z_primary_ab);
1388        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1389        let basis_b = scale_matrix_rows(&geom.basis_d1, &dir_b.z_primary_psi)?;
1390        let basis_ab = scale_matrix_rows(&geom.basis_d1, &second_drifts.z_primary_ab)?
1391            + &scale_matrix_rows(
1392                &geom.basis_d2,
1393                &(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
1394            )?;
1395        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1396        let basis1_b = scale_matrix_rows(&geom.basis_d2, &dir_b.z_primary_psi)?;
1397        let basis1_ab = scale_matrix_rows(&geom.basis_d2, &second_drifts.z_primary_ab)?
1398            + &scale_matrix_rows(
1399                &geom.basis_d3,
1400                &(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
1401            )?;
1402
1403        // logb κ-chain on η_ls; κ' = κ(1−κ), κ'' = κ(1−κ)(1−2κ),
1404        // κ''' = κ''(1−2κ) − 2(κ')².
1405        let e_a = &dir_a.z_ls_psi;
1406        let e_b = &dir_b.z_ls_psi;
1407        let e_ab = &second_drifts.z_ls_ab;
1408        let amn = &rows.obs_weight - &rows.n;
1409        // 4κ² − 2κ' (∂²w/∂η² style coefficient when both directions hit η_ls).
1410        let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
1411
1412        // Row drifts under logb. The η_ls direction picks up a κ on each step,
1413        // and η_ls·η_ls picks up (4κ²−2κ') from differentiating κ on the
1414        // second leg. The η_ab (z_ls_ab) leg uses just one κ from the chain.
1415        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1416        let dw_b = -2.0 * &rows.w * &rows.kappa * e_b;
1417        let dw_ab =
1418            &four_k2_minus_2kpi * &rows.w * &(e_a * e_b) - &(2.0 * &rows.w * &rows.kappa * e_ab);
1419        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1420        let dm_b = -(&rows.w * &q_b) - &(2.0 * &rows.m * &rows.kappa * e_b);
1421        let dm_ab = &(2.0 * &rows.w * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1422            - &(&rows.w * &q_ab)
1423            + &(&four_k2_minus_2kpi * &rows.m * &(e_a * e_b))
1424            - &(2.0 * &rows.m * &rows.kappa * e_ab);
1425        let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
1426        let dn_b = -(2.0 * &rows.m * &q_b) - &(2.0 * &rows.n * &rows.kappa * e_b);
1427        let dn_ab = &(2.0 * &rows.w * &(&q_a * &q_b))
1428            + &(4.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1429            - &(2.0 * &rows.m * &q_ab)
1430            + &(&four_k2_minus_2kpi * &rows.n * &(e_a * e_b))
1431            - &(2.0 * &rows.n * &rows.kappa * e_ab);
1432
1433        let s_mu = -&rows.m * &geom.dq_dq0;
1434        let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
1435        let s_mu_b = -(&dm_b * &geom.dq_dq0) - &(&rows.m * &s1_b);
1436        let s_mu_ab =
1437            -(&dm_ab * &geom.dq_dq0) - &(&dm_a * &s1_b) - &(&dm_b * &s1_a) - &(&rows.m * &s1_ab);
1438        // score_ls = κ(a−n); ψ derivatives carry κ' / κ'' from chain on κ.
1439        let s_ls = &rows.kappa * &amn;
1440        let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
1441        let s_ls_b = &rows.kappa_prime * &(e_b * &amn) - &rows.kappa * &dn_b;
1442        // s_ls_ab = κ''·e_a·e_b·(a−n) + κ'·e_ab·(a−n)
1443        //         − κ'·(e_a·n_b + e_b·n_a) − κ·n_ab
1444        let s_ls_ab = &rows.kappa_dprime * &(e_a * e_b) * &amn + &rows.kappa_prime * e_ab * &amn
1445            - &rows.kappa_prime * &(e_a * &dn_b + e_b * &dn_a)
1446            - &rows.kappa * &dn_ab;
1447        let s_w = -&rows.m;
1448        let s_w_a = -&dm_a;
1449        let s_w_b = -&dm_b;
1450        let s_w_ab = -&dm_ab;
1451
1452        let objective_psi_psi = (&rows.w * &(&q_a * &q_b)
1453            + &(2.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
1454            + &((2.0 * &rows.kappa * &rows.kappa * &rows.n + &rows.kappa_prime * &amn)
1455                * &(e_a * e_b))
1456            - &(&rows.m * &q_ab)
1457            + &(&rows.kappa * &amn * e_ab))
1458            .sum();
1459
1460        let score_psi_psi = gaussian_pack_wiggle_joint_score(
1461            &(xmu_ab_map.transpose_mul(s_mu.view())
1462                + xmu_a_map.transpose_mul(s_mu_b.view())
1463                + xmu_b_map.transpose_mul(s_mu_a.view())
1464                + fast_atv(xmu, &s_mu_ab)),
1465            &(x_ls_ab_map.transpose_mul(s_ls.view())
1466                + x_ls_a_map.transpose_mul(s_ls_b.view())
1467                + x_ls_b_map.transpose_mul(s_ls_a.view())
1468                + fast_atv(x_ls, &s_ls_ab)),
1469            &(fast_atv(&basis_ab, &s_w)
1470                + fast_atv(&basis_a, &s_w_b)
1471                + fast_atv(&basis_b, &s_w_a)
1472                + fast_atv(&geom.basis, &s_w_ab)),
1473        );
1474
1475        // Static blocks under logb. coeff_mm has no κ; coeff_ll = Fisher 2κ²a
1476        // (#566). Gaussian mean⊥scale Fisher orthogonality: the wiggle and μ
1477        // both enter the mean (q = q0 + B·βw), log σ is the only scale block,
1478        // so coeff_ml = E[H_{μ,ls}] = 0 and l = E[H_{ls,w}] = 0 (observed 2κm,
1479        // E[m]=0). All of their ψ-directional derivatives (a/b/ab) are 0 since
1480        // a function identically 0 has 0 derivatives. The Fisher (ls,ls) block
1481        // depends only on η_ls so its derivatives carry only κ.
1482        let n = rows.m.len();
1483        let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
1484        let coeff_ml = Array1::<f64>::zeros(n);
1485        let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
1486        // coeff_mm_a/b/ab: structurally κ-free; correctness now follows from
1487        // dw_a/_b/_ab and dm_a/_b/_ab carrying the κ chain on η_ls (above).
1488        let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
1489            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
1490            - &(&dm_a * &geom.d2q_dq02)
1491            - &(&rows.m * &g2_a);
1492        let coeff_mm_b = &(&dw_b * &geom.dq_dq0.mapv(|v| v * v))
1493            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_b)
1494            - &(&dm_b * &geom.d2q_dq02)
1495            - &(&rows.m * &g2_b);
1496        let coeff_mm_ab = &(&dw_ab * &geom.dq_dq0.mapv(|v| v * v))
1497            + &(2.0 * &dw_a * &geom.dq_dq0 * &s1_b)
1498            + &(2.0 * &dw_b * &geom.dq_dq0 * &s1_a)
1499            + &(2.0 * &rows.w * &s1_a * &s1_b)
1500            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_ab)
1501            - &(&dm_ab * &geom.d2q_dq02)
1502            - &(&dm_a * &g2_b)
1503            - &(&dm_b * &g2_a)
1504            - &(&rows.m * &g2_ab);
1505        // coeff_ml (μ↔logσ) is Fisher 0; its 1st/2nd ψ-directional derivatives
1506        // are 0 as well.
1507        let coeff_ml_a = Array1::<f64>::zeros(n);
1508        let coeff_ml_b = Array1::<f64>::zeros(n);
1509        let coeff_ml_ab = Array1::<f64>::zeros(n);
1510        // Fisher (ls,ls) coeff_ll = 2κ²a (a constant prior weight) depends only
1511        // on η_ls (#566): ∂(2κ²a)/∂η = 4κκ'a, so the ψ-first derivatives are
1512        // 4κκ'a·e_a / e_b. The η_ab leg carries one κ on top.
1513        let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
1514        let coeff_ll_b = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_b;
1515        // coeff_ll_ab = ∂²(2κ²a)/∂a∂b = 4a(κ'²+κκ'')·e_a·e_b + 4κκ'a·e_ab
1516        // (mirrors the dense helper `d2h_ls_ls`).
1517        let coeff_ll_ab = 4.0
1518            * &rows.obs_weight
1519            * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
1520            * &(e_a * e_b)
1521            + 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_ab;
1522        let a = &rows.w * &geom.dq_dq0;
1523        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1524        let a_b = &dw_b * &geom.dq_dq0 + &rows.w * &s1_b;
1525        let a_ab = &dw_ab * &geom.dq_dq0 + &dw_a * &s1_b + &dw_b * &s1_a + &rows.w * &s1_ab;
1526        let c = -&rows.m;
1527        let c_a = -&dm_a;
1528        let c_b = -&dm_b;
1529        let c_ab = -&dm_ab;
1530        // l (logσ↔wiggle) is Fisher 0 (wiggle is mean-side; mean⊥scale), so all
1531        // of its 1st/2nd ψ-directional derivatives vanish.
1532        let l = Array1::<f64>::zeros(n);
1533        let l_a = Array1::<f64>::zeros(n);
1534        let l_b = Array1::<f64>::zeros(n);
1535        let l_ab = Array1::<f64>::zeros(n);
1536
1537        let hmm_ab = weighted_crossprod_psi_maps(
1538            xmu_ab_map,
1539            coeff_mm.view(),
1540            CustomFamilyPsiLinearMapRef::Dense(xmu),
1541        )?;
1542        let hmm_ij = weighted_crossprod_psi_maps(xmu_a_map, coeff_mm.view(), xmu_b_map)?;
1543        let hmm_iwj = weighted_crossprod_psi_maps(
1544            xmu_a_map,
1545            coeff_mm_b.view(),
1546            CustomFamilyPsiLinearMapRef::Dense(xmu),
1547        )?;
1548        let hmm_jwi = weighted_crossprod_psi_maps(
1549            xmu_b_map,
1550            coeff_mm_a.view(),
1551            CustomFamilyPsiLinearMapRef::Dense(xmu),
1552        )?;
1553        let h_mm = &hmm_ab
1554            + &hmm_ab.t()
1555            + &hmm_ij
1556            + hmm_ij.t()
1557            + &hmm_iwj
1558            + hmm_iwj.t()
1559            + &hmm_jwi
1560            + hmm_jwi.t()
1561            + &xt_diag_x_dense(xmu, &coeff_mm_ab)?;
1562        let h_ml = weighted_crossprod_psi_maps(
1563            xmu_ab_map,
1564            coeff_ml.view(),
1565            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1566        )? + &weighted_crossprod_psi_maps(xmu_a_map, coeff_ml.view(), x_ls_b_map)?
1567            + &weighted_crossprod_psi_maps(xmu_b_map, coeff_ml.view(), x_ls_a_map)?
1568            + &weighted_crossprod_psi_maps(
1569                xmu_a_map,
1570                coeff_ml_b.view(),
1571                CustomFamilyPsiLinearMapRef::Dense(x_ls),
1572            )?
1573            + &weighted_crossprod_psi_maps(
1574                xmu_b_map,
1575                coeff_ml_a.view(),
1576                CustomFamilyPsiLinearMapRef::Dense(x_ls),
1577            )?
1578            + &weighted_crossprod_psi_maps(
1579                CustomFamilyPsiLinearMapRef::Dense(xmu),
1580                coeff_ml_a.view(),
1581                x_ls_b_map,
1582            )?
1583            + &weighted_crossprod_psi_maps(
1584                CustomFamilyPsiLinearMapRef::Dense(xmu),
1585                coeff_ml_b.view(),
1586                x_ls_a_map,
1587            )?
1588            + &xt_diag_y_dense(xmu, &coeff_ml_ab, x_ls)?
1589            + &weighted_crossprod_psi_maps(
1590                CustomFamilyPsiLinearMapRef::Dense(xmu),
1591                coeff_ml.view(),
1592                x_ls_ab_map,
1593            )?;
1594        let hll_ab = weighted_crossprod_psi_maps(
1595            x_ls_ab_map,
1596            coeff_ll.view(),
1597            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1598        )?;
1599        let hll_ij = weighted_crossprod_psi_maps(x_ls_a_map, coeff_ll.view(), x_ls_b_map)?;
1600        let hll_iwj = weighted_crossprod_psi_maps(
1601            x_ls_a_map,
1602            coeff_ll_b.view(),
1603            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1604        )?;
1605        let hll_jwi = weighted_crossprod_psi_maps(
1606            x_ls_b_map,
1607            coeff_ll_a.view(),
1608            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1609        )?;
1610        let h_ll = &hll_ab
1611            + &hll_ab.t()
1612            + &hll_ij
1613            + hll_ij.t()
1614            + &hll_iwj
1615            + hll_iwj.t()
1616            + &hll_jwi
1617            + hll_jwi.t()
1618            + &xt_diag_x_dense(x_ls, &coeff_ll_ab)?;
1619        let h_mw = weighted_crossprod_psi_maps(
1620            xmu_ab_map,
1621            a.view(),
1622            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1623        )? + &weighted_crossprod_psi_maps(
1624            xmu_a_map,
1625            a_b.view(),
1626            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1627        )? + &weighted_crossprod_psi_maps(
1628            xmu_a_map,
1629            a.view(),
1630            CustomFamilyPsiLinearMapRef::Dense(&basis_b),
1631        )? + &weighted_crossprod_psi_maps(
1632            xmu_b_map,
1633            a_a.view(),
1634            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1635        )? + &xt_diag_y_dense(xmu, &a_ab, &geom.basis)?
1636            + &xt_diag_y_dense(xmu, &a_a, &basis_b)?
1637            + &weighted_crossprod_psi_maps(
1638                xmu_b_map,
1639                a.view(),
1640                CustomFamilyPsiLinearMapRef::Dense(&basis_a),
1641            )?
1642            + &xt_diag_y_dense(xmu, &a_b, &basis_a)?
1643            + &xt_diag_y_dense(xmu, &a, &basis_ab)?
1644            + &weighted_crossprod_psi_maps(
1645                xmu_ab_map,
1646                c.view(),
1647                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1648            )?
1649            + &weighted_crossprod_psi_maps(
1650                xmu_a_map,
1651                c_b.view(),
1652                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1653            )?
1654            + &weighted_crossprod_psi_maps(
1655                xmu_a_map,
1656                c.view(),
1657                CustomFamilyPsiLinearMapRef::Dense(&basis1_b),
1658            )?
1659            + &weighted_crossprod_psi_maps(
1660                xmu_b_map,
1661                c_a.view(),
1662                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1663            )?
1664            + &xt_diag_y_dense(xmu, &c_ab, &geom.basis_d1)?
1665            + &xt_diag_y_dense(xmu, &c_a, &basis1_b)?
1666            + &weighted_crossprod_psi_maps(
1667                xmu_b_map,
1668                c.view(),
1669                CustomFamilyPsiLinearMapRef::Dense(&basis1_a),
1670            )?
1671            + &xt_diag_y_dense(xmu, &c_b, &basis1_a)?
1672            + &xt_diag_y_dense(xmu, &c, &basis1_ab)?;
1673        let h_lw = weighted_crossprod_psi_maps(
1674            x_ls_ab_map,
1675            l.view(),
1676            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1677        )? + &weighted_crossprod_psi_maps(
1678            x_ls_a_map,
1679            l_b.view(),
1680            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1681        )? + &weighted_crossprod_psi_maps(
1682            x_ls_a_map,
1683            l.view(),
1684            CustomFamilyPsiLinearMapRef::Dense(&basis_b),
1685        )? + &weighted_crossprod_psi_maps(
1686            x_ls_b_map,
1687            l_a.view(),
1688            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1689        )? + &xt_diag_y_dense(x_ls, &l_ab, &geom.basis)?
1690            + &xt_diag_y_dense(x_ls, &l_a, &basis_b)?
1691            + &weighted_crossprod_psi_maps(
1692                x_ls_b_map,
1693                l.view(),
1694                CustomFamilyPsiLinearMapRef::Dense(&basis_a),
1695            )?
1696            + &xt_diag_y_dense(x_ls, &l_b, &basis_a)?
1697            + &xt_diag_y_dense(x_ls, &l, &basis_ab)?;
1698        let hww_ab = xt_diag_y_dense(&basis_ab, &rows.w, &geom.basis)?;
1699        let hww_ij = xt_diag_y_dense(&basis_a, &rows.w, &basis_b)?;
1700        let hww_iwj = xt_diag_y_dense(&basis_a, &dw_b, &geom.basis)?;
1701        let hww_jwi = xt_diag_y_dense(&basis_b, &dw_a, &geom.basis)?;
1702        let h_ww = &hww_ab
1703            + &hww_ab.t()
1704            + &hww_ij
1705            + hww_ij.t()
1706            + &hww_iwj
1707            + hww_iwj.t()
1708            + &hww_jwi
1709            + hww_jwi.t()
1710            + &xt_diag_x_dense(&geom.basis, &dw_ab)?;
1711
1712        Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
1713            objective_psi_psi,
1714            score_psi_psi,
1715            hessian_psi_psi: gaussian_pack_wiggle_joint_symmetrichessian(
1716                &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1717            ),
1718            hessian_psi_psi_operator: None,
1719        })
1720    }
1721
1722    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_from_designs(
1723        &self,
1724        block_states: &[ParameterBlockState],
1725        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1726        psi_index: usize,
1727        d_beta_flat: &Array1<f64>,
1728        xmu: &Array2<f64>,
1729        x_ls: &Array2<f64>,
1730    ) -> Result<Option<Array2<f64>>, String> {
1731        let Some(dir_a) = self.exact_newton_joint_psi_direction(
1732            block_states,
1733            derivative_blocks,
1734            psi_index,
1735            xmu,
1736            x_ls,
1737            &self.policy,
1738        )?
1739        else {
1740            return Ok(None);
1741        };
1742        Ok(Some(
1743            self.exact_newton_joint_psihessian_directional_derivative_from_parts(
1744                block_states,
1745                &dir_a,
1746                d_beta_flat,
1747                xmu,
1748                x_ls,
1749            )?,
1750        ))
1751    }
1752
1753    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_from_parts(
1754        &self,
1755        block_states: &[ParameterBlockState],
1756        dir_a: &LocationScaleJointPsiDirection,
1757        d_beta_flat: &Array1<f64>,
1758        xmu: &Array2<f64>,
1759        x_ls: &Array2<f64>,
1760    ) -> Result<Array2<f64>, String> {
1761        let pmu = xmu.ncols();
1762        let p_ls = x_ls.ncols();
1763        let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
1764        let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
1765        let q0 = &block_states[Self::BLOCK_MU].eta;
1766        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
1767        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1768        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1769        let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
1770        let (umu, u_ls, uw) = layout.split_three(
1771            d_beta_flat,
1772            "GaussianLocationScaleWiggleFamily joint psi hessian directional derivative",
1773        )?;
1774        let q = q0 + etaw;
1775        let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
1776        let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
1777
1778        let xi = fast_av(xmu, &umu);
1779        let zeta = fast_av(x_ls, &u_ls);
1780        let zmu_a_u = xmu_map.forward_mul(umu.view());
1781        let zls_a_u = x_ls_map.forward_mul(u_ls.view());
1782        let b1u = fast_av(&geom.basis_d1, &uw);
1783        let b2u = fast_av(&geom.basis_d2, &uw);
1784        let b3u = fast_av(&geom.basis_d3, &uw);
1785
1786        let q_u = &(&geom.dq_dq0 * &xi) + &fast_av(&geom.basis, &uw);
1787        let s1_u = &(&geom.d2q_dq02 * &xi) + &b1u;
1788        let g2_u = &(&geom.d3q_dq03 * &xi) + &b2u;
1789        let g3_u = &(&geom.d4q_dq04 * &xi) + &b3u;
1790
1791        let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
1792        let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
1793        let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
1794        let q_a_u = &(&s1_u * &dir_a.z_primary_psi) + &(&geom.dq_dq0 * &zmu_a_u);
1795        let s1_a_u = &(&g2_u * &dir_a.z_primary_psi) + &(&geom.d2q_dq02 * &zmu_a_u);
1796        let g2_a_u = &(&g3_u * &dir_a.z_primary_psi) + &(&geom.d3q_dq03 * &zmu_a_u);
1797
1798        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
1799        let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
1800        let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
1801        let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
1802        let basis_a_u = scale_matrix_rows(&geom.basis_d2, &(&xi * &dir_a.z_primary_psi))?
1803            + &scale_matrix_rows(&geom.basis_d1, &zmu_a_u)?;
1804        let basis1_a_u = scale_matrix_rows(&geom.basis_d3, &(&xi * &dir_a.z_primary_psi))?
1805            + &scale_matrix_rows(&geom.basis_d2, &zmu_a_u)?;
1806
1807        // logb κ-chain on η_ls; e_a = ψ_a's η_ls direction, ζ = β-direction.
1808        // η_au = zls_a_u is the second mixed derivative (β·ψ).
1809        let e_a = &dir_a.z_ls_psi;
1810        let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
1811        let dw_u = -2.0 * &rows.w * &rows.kappa * &zeta;
1812        let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &rows.kappa * &zeta);
1813        let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
1814        let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
1815        let dw_a_u = &four_k2_minus_2kpi * &rows.w * &(e_a * &zeta)
1816            - &(2.0 * &rows.w * &rows.kappa * &zls_a_u);
1817        let dm_a_u = &(2.0 * &rows.w * &rows.kappa * &(&q_a * &zeta + &q_u * e_a))
1818            - &(&rows.w * &q_a_u)
1819            + &(&four_k2_minus_2kpi * &rows.m * &(e_a * &zeta))
1820            - &(2.0 * &rows.m * &rows.kappa * &zls_a_u);
1821
1822        let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
1823            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
1824            - &(&dm_u * &geom.d2q_dq02)
1825            - &(&rows.m * &g2_u);
1826        // coeff_ml (μ↔logσ) is mean⊥scale Fisher 0 (E[m]=0), so both its
1827        // β-drift derivative coeff_ml_u and the mixed coeff_ml_a_u are 0.
1828        let n = rows.m.len();
1829        let coeff_ml_u = Array1::<f64>::zeros(n);
1830        // Fisher (ls,ls) coeff_ll = 2κ²a (#566); ∂(2κ²a)/∂η = 4κκ'a, so the
1831        // β-drift derivative along ζ is 4κκ'a·ζ.
1832        let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * &zeta;
1833        let coeff_mm_a_u = &(&dw_a_u * &geom.dq_dq0.mapv(|v| v * v))
1834            + &(2.0 * &dw_a * &geom.dq_dq0 * &s1_u)
1835            + &(2.0 * &dw_u * &geom.dq_dq0 * &s1_a)
1836            + &(2.0 * &rows.w * &s1_u * &s1_a)
1837            + &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a_u)
1838            - &(&dm_a_u * &geom.d2q_dq02)
1839            - &(&dm_a * &g2_u)
1840            - &(&dm_u * &g2_a)
1841            - &(&rows.m * &g2_a_u);
1842        // coeff_ml_a_u = ∂²(coeff_ml)/∂a∂u = 0 (coeff_ml ≡ Fisher 0).
1843        let coeff_ml_a_u = Array1::<f64>::zeros(n);
1844        // coeff_ll_a_u = ∂²(2κ²a)/∂a∂u for the Fisher (ls,ls) block (#566):
1845        // 4a(κ'²+κκ'')·e_a·ζ + 4κκ'a·η_au (the η_au=zls_a_u mixed leg), mirroring
1846        // the dense mixed-drift helper.
1847        let coeff_ll_a_u = 4.0
1848            * &rows.obs_weight
1849            * &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
1850            * &(e_a * &zeta)
1851            + 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * &zls_a_u;
1852
1853        let a = &rows.w * &geom.dq_dq0;
1854        let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
1855        let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
1856        let a_a_u = &dw_a_u * &geom.dq_dq0 + &dw_a * &s1_u + &dw_u * &s1_a + &rows.w * &s1_a_u;
1857        let c = -&rows.m;
1858        let c_u = -&dm_u;
1859        let c_a = -&dm_a;
1860        let c_a_u = -&dm_a_u;
1861        // l (logσ↔wiggle) is mean⊥scale Fisher 0 (wiggle is mean-side), so its
1862        // β-drift (l_u), ψ (l_a), and mixed (l_a_u) derivatives all vanish.
1863        let l = Array1::<f64>::zeros(n);
1864        let l_u = Array1::<f64>::zeros(n);
1865        let l_a = Array1::<f64>::zeros(n);
1866        let l_a_u = Array1::<f64>::zeros(n);
1867
1868        let hmm_a1 = weighted_crossprod_psi_maps(
1869            xmu_map,
1870            coeff_mm_u.view(),
1871            CustomFamilyPsiLinearMapRef::Dense(xmu),
1872        )?;
1873        let h_mm = &hmm_a1 + &hmm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a_u)?;
1874        let h_ml = weighted_crossprod_psi_maps(
1875            xmu_map,
1876            coeff_ml_u.view(),
1877            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1878        )? + &weighted_crossprod_psi_maps(
1879            CustomFamilyPsiLinearMapRef::Dense(xmu),
1880            coeff_ml_u.view(),
1881            x_ls_map,
1882        )? + &xt_diag_y_dense(xmu, &coeff_ml_a_u, x_ls)?;
1883        let hll_a1 = weighted_crossprod_psi_maps(
1884            x_ls_map,
1885            coeff_ll_u.view(),
1886            CustomFamilyPsiLinearMapRef::Dense(x_ls),
1887        )?;
1888        let h_ll = &hll_a1 + &hll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a_u)?;
1889        let h_mw = weighted_crossprod_psi_maps(
1890            xmu_map,
1891            a_u.view(),
1892            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1893        )? + &weighted_crossprod_psi_maps(
1894            xmu_map,
1895            a.view(),
1896            CustomFamilyPsiLinearMapRef::Dense(&basis_u),
1897        )? + &xt_diag_y_dense(xmu, &a_a_u, &geom.basis)?
1898            + &xt_diag_y_dense(xmu, &a_a, &basis_u)?
1899            + &xt_diag_y_dense(xmu, &a_u, &basis_a)?
1900            + &xt_diag_y_dense(xmu, &a, &basis_a_u)?
1901            + &weighted_crossprod_psi_maps(
1902                xmu_map,
1903                c_u.view(),
1904                CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
1905            )?
1906            + &weighted_crossprod_psi_maps(
1907                xmu_map,
1908                c.view(),
1909                CustomFamilyPsiLinearMapRef::Dense(&basis1_u),
1910            )?
1911            + &xt_diag_y_dense(xmu, &c_a_u, &geom.basis_d1)?
1912            + &xt_diag_y_dense(xmu, &c_a, &basis1_u)?
1913            + &xt_diag_y_dense(xmu, &c_u, &basis1_a)?
1914            + &xt_diag_y_dense(xmu, &c, &basis1_a_u)?;
1915        let h_lw = weighted_crossprod_psi_maps(
1916            x_ls_map,
1917            l_u.view(),
1918            CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
1919        )? + &weighted_crossprod_psi_maps(
1920            x_ls_map,
1921            l.view(),
1922            CustomFamilyPsiLinearMapRef::Dense(&basis_u),
1923        )? + &xt_diag_y_dense(x_ls, &l_a_u, &geom.basis)?
1924            + &xt_diag_y_dense(x_ls, &l_a, &basis_u)?
1925            + &xt_diag_y_dense(x_ls, &l_u, &basis_a)?
1926            + &xt_diag_y_dense(x_ls, &l, &basis_a_u)?;
1927        let hww_a_u = xt_diag_y_dense(&basis_a_u, &rows.w, &geom.basis)?;
1928        let hww_aw = xt_diag_y_dense(&basis_a, &dw_u, &geom.basis)?;
1929        let hww_au = xt_diag_y_dense(&basis_a, &rows.w, &basis_u)?;
1930        let h_ww = &hww_a_u
1931            + &hww_a_u.t()
1932            + &hww_aw
1933            + hww_aw.t()
1934            + &hww_au
1935            + hww_au.t()
1936            + &xt_diag_x_dense(&geom.basis, &dw_a_u)?;
1937
1938        Ok(gaussian_pack_wiggle_joint_symmetrichessian(
1939            &h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
1940        ))
1941    }
1942
1943    pub(crate) fn exact_newton_joint_psi_terms_for_specs(
1944        &self,
1945        block_states: &[ParameterBlockState],
1946        specs: &[ParameterBlockSpec],
1947        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1948        psi_index: usize,
1949    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
1950        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1951            return Ok(None);
1952        };
1953        self.exact_newton_joint_psi_terms_from_designs(
1954            block_states,
1955            derivative_blocks,
1956            psi_index,
1957            &xmu,
1958            &x_ls,
1959        )
1960    }
1961
1962    pub(crate) fn exact_newton_joint_psisecond_order_terms_for_specs(
1963        &self,
1964        block_states: &[ParameterBlockState],
1965        specs: &[ParameterBlockSpec],
1966        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1967        psi_i: usize,
1968        psi_j: usize,
1969    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
1970        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1971            return Ok(None);
1972        };
1973        self.exact_newton_joint_psisecond_order_terms_from_designs(
1974            block_states,
1975            derivative_blocks,
1976            psi_i,
1977            psi_j,
1978            &xmu,
1979            &x_ls,
1980        )
1981    }
1982
1983    pub(crate) fn exact_newton_joint_psihessian_directional_derivative_for_specs(
1984        &self,
1985        block_states: &[ParameterBlockState],
1986        specs: &[ParameterBlockSpec],
1987        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
1988        psi_index: usize,
1989        d_beta_flat: &Array1<f64>,
1990    ) -> Result<Option<Array2<f64>>, String> {
1991        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
1992            return Ok(None);
1993        };
1994        self.exact_newton_joint_psihessian_directional_derivative_from_designs(
1995            block_states,
1996            derivative_blocks,
1997            psi_index,
1998            d_beta_flat,
1999            &xmu,
2000            &x_ls,
2001        )
2002    }
2003}
2004
2005impl CustomFamily for GaussianLocationScaleWiggleFamily {
2006    // Preserve the pre-gam#1395 behavior: the trait default flipped to OFF (the
2007    // flat-prior exact-Newton objective carries no Jeffreys term), so families
2008    // that historically armed the term by default opt back in explicitly.
2009    fn joint_jeffreys_term_required(&self) -> bool {
2010        true
2011    }
2012
2013    fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
2014        true
2015    }
2016
2017    /// Non-spatial location-scale seeding classification — see
2018    /// `GaussianLocationScaleFamily::outer_seed_config` for the full rationale.
2019    /// The wiggle variant has the identical non-profiled log-σ predictor, so it
2020    /// needs the same `GaussianLocationScale` classification (flexible Gaussian
2021    /// seed grid + lowest-cost keep-best + interior-extreme promotion) on the
2022    /// rho-only path; without it the log-σ block over-smooths exactly as the
2023    /// non-wiggle family does.
2024    fn outer_seed_config(&self, n_params: usize) -> crate::seeding::SeedConfig {
2025        if n_params == 0 {
2026            return crate::seeding::SeedConfig::default();
2027        }
2028        let mut config = crate::seeding::SeedConfig::default();
2029        config.risk_profile = crate::seeding::SeedRiskProfile::GaussianLocationScale;
2030        config.max_seeds = 4;
2031        config.seed_budget = 2;
2032        config
2033    }
2034
2035    fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
2036        // Operator-aware (see GaussianLocationScaleFamily for derivation): when
2037        // `use_joint_matrix_free_path` selects the workspace operator, joint
2038        // Hv apply is O(n · (p_t + p_ℓ + p_w)) — the row-streaming RowCoeffOperator
2039        // never materializes the dense (p_t + p_ℓ + p_w)² matrix.
2040        crate::location_scale_engine::location_scale_coefficient_hessian_cost(
2041            self.y.len() as u64,
2042            specs,
2043        )
2044    }
2045
2046    fn block_linear_constraints(
2047        &self,
2048        _: &[ParameterBlockState],
2049        block_idx: usize,
2050        spec: &ParameterBlockSpec,
2051    ) -> Result<Option<LinearInequalityConstraints>, String> {
2052        if block_idx != Self::BLOCK_WIGGLE {
2053            return Ok(None);
2054        }
2055        Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
2056    }
2057
2058    fn post_update_block_beta(
2059        &self,
2060        _: &[ParameterBlockState],
2061        block_idx: usize,
2062        block_spec: &ParameterBlockSpec,
2063        beta: Array1<f64>,
2064    ) -> Result<Array1<f64>, String> {
2065        assert!(!block_spec.name.is_empty());
2066        if block_idx != Self::BLOCK_WIGGLE {
2067            return Ok(beta);
2068        }
2069        let beta = project_monotone_wiggle_beta_nonnegative(beta);
2070        validate_monotone_wiggle_beta_nonnegative(
2071            &beta,
2072            "GaussianLocationScaleWiggleFamily post-update",
2073        )?;
2074        Ok(beta)
2075    }
2076
2077    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
2078        validate_block_count::<GamlssError>(
2079            "GaussianLocationScaleWiggleFamily",
2080            3,
2081            block_states.len(),
2082        )?;
2083        let n = self.y.len();
2084        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2085        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2086        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2087        if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
2088            return Err(GamlssError::DimensionMismatch {
2089                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2090            }
2091            .into());
2092        }
2093        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2094        // Per-row kernel emits 6 working values into pre-allocated outputs;
2095        // ll is reduced via Rayon's sum. Independent across rows. Note
2096        // wmu == ww (both equal location_working_weight) and the mean+wiggle
2097        // working responses share row.location_working_shift, applied to
2098        // eta_mu[i] and etaw[i] respectively. The previous `q = eta_mu + etaw`
2099        // intermediate is inlined to avoid an extra n-vector allocation.
2100        let mut zmu = Array1::<f64>::zeros(n);
2101        let mut wmu = Array1::<f64>::zeros(n);
2102        let mut zls = Array1::<f64>::zeros(n);
2103        let mut wls = Array1::<f64>::zeros(n);
2104        let mut zw = Array1::<f64>::zeros(n);
2105        let mut ww = Array1::<f64>::zeros(n);
2106        const CHUNK: usize = 1024;
2107        let zmu_s = zmu
2108            .as_slice_memory_order_mut()
2109            .expect("zeros is contiguous");
2110        let wmu_s = wmu
2111            .as_slice_memory_order_mut()
2112            .expect("zeros is contiguous");
2113        let zls_s = zls
2114            .as_slice_memory_order_mut()
2115            .expect("zeros is contiguous");
2116        let wls_s = wls
2117            .as_slice_memory_order_mut()
2118            .expect("zeros is contiguous");
2119        let zw_s = zw.as_slice_memory_order_mut().expect("zeros is contiguous");
2120        let ww_s = ww.as_slice_memory_order_mut().expect("zeros is contiguous");
2121        let y_view = self.y.view();
2122        let w_view = self.weights.view();
2123        let eta_mu_view = eta_mu.view();
2124        let eta_ls_view = eta_ls.view();
2125        let etaw_view = etaw.view();
2126        let ll: f64 = zmu_s
2127            .par_chunks_mut(CHUNK)
2128            .zip(wmu_s.par_chunks_mut(CHUNK))
2129            .zip(zls_s.par_chunks_mut(CHUNK))
2130            .zip(wls_s.par_chunks_mut(CHUNK))
2131            .zip(zw_s.par_chunks_mut(CHUNK))
2132            .zip(ww_s.par_chunks_mut(CHUNK))
2133            .enumerate()
2134            .map(
2135                |(chunk_idx, (((((zmu_c, wmu_c), zls_c), wls_c), zw_c), ww_c))| {
2136                    let start = chunk_idx * CHUNK;
2137                    let mut local_ll = 0.0;
2138                    for local in 0..zmu_c.len() {
2139                        let i = start + local;
2140                        let q_i = eta_mu_view[i] + etaw_view[i];
2141                        let row = gaussian_diagonal_row_kernel(
2142                            y_view[i],
2143                            q_i,
2144                            eta_ls_view[i],
2145                            w_view[i],
2146                            ln2pi,
2147                        );
2148                        let w_i = row.location_working_weight;
2149                        let shift = row.location_working_shift;
2150                        zmu_c[local] = eta_mu_view[i] + shift;
2151                        wmu_c[local] = w_i;
2152                        zw_c[local] = etaw_view[i] + shift;
2153                        ww_c[local] = w_i;
2154                        zls_c[local] = row.log_sigma_working_response;
2155                        wls_c[local] = row.log_sigma_working_weight;
2156                        local_ll += row.log_likelihood;
2157                    }
2158                    local_ll
2159                },
2160            )
2161            .sum();
2162
2163        Ok(FamilyEvaluation {
2164            log_likelihood: ll,
2165            blockworking_sets: vec![
2166                BlockWorkingSet::diagonal_checked(zmu, wmu)?,
2167                BlockWorkingSet::diagonal_checked(zls, wls)?,
2168                BlockWorkingSet::diagonal_checked(zw, ww)?,
2169            ],
2170        })
2171    }
2172
2173    fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
2174        validate_block_count::<GamlssError>(
2175            "GaussianLocationScaleWiggleFamily",
2176            3,
2177            block_states.len(),
2178        )?;
2179        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2180        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2181        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2182        if eta_mu.len() != self.y.len()
2183            || eta_ls.len() != self.y.len()
2184            || etaw.len() != self.y.len()
2185            || self.weights.len() != self.y.len()
2186        {
2187            return Err(GamlssError::DimensionMismatch {
2188                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2189            }
2190            .into());
2191        }
2192        let q = eta_mu + etaw;
2193        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2194        let mut ll = 0.0;
2195        for i in 0..self.y.len() {
2196            let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
2197            let inv_s2 = (sigma_i * sigma_i).recip();
2198            let r = self.y[i] - q[i];
2199            ll += self.weights[i] * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()));
2200        }
2201        Ok(ll)
2202    }
2203
2204    /// Outer-only log-likelihood with optional row subsample.
2205    ///
2206    /// When `options.outer_score_subsample` is `Some`, only the sampled rows
2207    /// contribute; each row's per-row log-likelihood term is multiplied by
2208    /// `WeightedOuterRow.weight`, the Horvitz–Thompson inverse-inclusion
2209    /// factor 1/π_i (uniform or stratified sampling both supported), so the
2210    /// partial sum is an unbiased estimator of the full-data log-likelihood.
2211    /// When `None`, this returns the full-data `log_likelihood_only`. Inner
2212    /// PIRLS line searches never install the subsample option, so they
2213    /// continue to score the exact full-data log-likelihood.
2214    fn log_likelihood_only_with_options(
2215        &self,
2216        block_states: &[ParameterBlockState],
2217        options: &BlockwiseFitOptions,
2218    ) -> Result<f64, String> {
2219        let Some(subsample) = options.outer_score_subsample.as_ref() else {
2220            return self.log_likelihood_only(block_states);
2221        };
2222        validate_block_count::<GamlssError>(
2223            "GaussianLocationScaleWiggleFamily",
2224            3,
2225            block_states.len(),
2226        )?;
2227        let n = self.y.len();
2228        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2229        let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2230        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
2231        if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
2232            return Err(GamlssError::DimensionMismatch {
2233                reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
2234            }
2235            .into());
2236        }
2237        let ln2pi = (2.0 * std::f64::consts::PI).ln();
2238        use rayon::iter::ParallelIterator;
2239        let ll: f64 = subsample
2240            .rows
2241            .par_iter()
2242            .map(|row| {
2243                let i = row.index;
2244                let wi = self.weights[i];
2245                if wi == 0.0 {
2246                    return 0.0;
2247                }
2248                let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
2249                let inv_s2 = (sigma_i * sigma_i).recip();
2250                let r = self.y[i] - eta_mu[i] - etaw[i];
2251                row.weight * wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
2252            })
2253            .sum();
2254        Ok(ll)
2255    }
2256
2257    fn requires_joint_outer_hyper_path(&self) -> bool {
2258        true
2259    }
2260
2261    fn exact_newton_hessian_directional_derivative(
2262        &self,
2263        block_states: &[ParameterBlockState],
2264        block_idx: usize,
2265        d_beta: &Array1<f64>,
2266    ) -> Result<Option<Array2<f64>>, String> {
2267        validate_block_count::<GamlssError>(
2268            "GaussianLocationScaleWiggleFamily",
2269            3,
2270            block_states.len(),
2271        )?;
2272        let pmu = self
2273            .mu_design
2274            .as_ref()
2275            .ok_or_else(|| {
2276                "GaussianLocationScaleWiggleFamily exact path is missing mu design".to_string()
2277            })?
2278            .ncols();
2279        let p_ls = self
2280            .log_sigma_design
2281            .as_ref()
2282            .ok_or_else(|| {
2283                "GaussianLocationScaleWiggleFamily exact path is missing log-sigma design"
2284                    .to_string()
2285            })?
2286            .ncols();
2287        let pw = block_states[Self::BLOCK_WIGGLE].beta.len();
2288        let total = pmu + p_ls + pw;
2289        let (start, end) = match block_idx {
2290            Self::BLOCK_MU => (0usize, pmu),
2291            Self::BLOCK_LOG_SIGMA => (pmu, pmu + p_ls),
2292            Self::BLOCK_WIGGLE => (pmu + p_ls, total),
2293            _ => return Ok(None),
2294        };
2295        if d_beta.len() != end - start {
2296            return Err(GamlssError::DimensionMismatch { reason: format!(
2297                "GaussianLocationScaleWiggleFamily block {block_idx} d_beta length mismatch: got {}, expected {}",
2298                d_beta.len(),
2299                end - start
2300            ) }.into());
2301        }
2302        let mut d_beta_flat = Array1::<f64>::zeros(total);
2303        d_beta_flat.slice_mut(s![start..end]).assign(d_beta);
2304        let (xmu, x_ls) = self.dense_block_designs()?;
2305        let d_joint = self
2306            .exact_newton_joint_hessian_directional_derivative_from_designs(
2307                block_states,
2308                &xmu,
2309                &x_ls,
2310                &d_beta_flat,
2311            )?
2312            .ok_or_else(|| "missing Gaussian wiggle exact joint directional Hessian".to_string())?;
2313        Ok(Some(d_joint.slice(s![start..end, start..end]).to_owned()))
2314    }
2315
2316    fn exact_newton_joint_hessian(
2317        &self,
2318        block_states: &[ParameterBlockState],
2319    ) -> Result<Option<Array2<f64>>, String> {
2320        self.exact_newton_joint_hessian_for_specs(block_states, None)
2321    }
2322
2323    fn has_explicit_joint_hessian(&self) -> bool {
2324        true
2325    }
2326
2327    fn exact_newton_joint_hessian_directional_derivative(
2328        &self,
2329        block_states: &[ParameterBlockState],
2330        d_beta_flat: &Array1<f64>,
2331    ) -> Result<Option<Array2<f64>>, String> {
2332        self.exact_newton_joint_hessian_directional_derivative_for_specs(
2333            block_states,
2334            None,
2335            d_beta_flat,
2336        )
2337    }
2338
2339    fn exact_newton_joint_hessiansecond_directional_derivative(
2340        &self,
2341        block_states: &[ParameterBlockState],
2342        d_beta_u_flat: &Array1<f64>,
2343        d_beta_v_flat: &Array1<f64>,
2344    ) -> Result<Option<Array2<f64>>, String> {
2345        self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
2346            block_states,
2347            None,
2348            d_beta_u_flat,
2349            d_beta_v_flat,
2350        )
2351    }
2352
2353    fn exact_newton_joint_hessian_with_specs(
2354        &self,
2355        block_states: &[ParameterBlockState],
2356        specs: &[ParameterBlockSpec],
2357    ) -> Result<Option<Array2<f64>>, String> {
2358        self.exact_newton_joint_hessian_for_specs(block_states, Some(specs))
2359    }
2360
2361    fn exact_newton_joint_hessian_directional_derivative_with_specs(
2362        &self,
2363        block_states: &[ParameterBlockState],
2364        specs: &[ParameterBlockSpec],
2365        d_beta_flat: &Array1<f64>,
2366    ) -> Result<Option<Array2<f64>>, String> {
2367        self.exact_newton_joint_hessian_directional_derivative_for_specs(
2368            block_states,
2369            Some(specs),
2370            d_beta_flat,
2371        )
2372    }
2373
2374    fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
2375        &self,
2376        block_states: &[ParameterBlockState],
2377        specs: &[ParameterBlockSpec],
2378        d_beta_u_flat: &Array1<f64>,
2379        d_beta_v_flat: &Array1<f64>,
2380    ) -> Result<Option<Array2<f64>>, String> {
2381        self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
2382            block_states,
2383            Some(specs),
2384            d_beta_u_flat,
2385            d_beta_v_flat,
2386        )
2387    }
2388
2389    fn exact_newton_joint_psi_terms(
2390        &self,
2391        block_states: &[ParameterBlockState],
2392        specs: &[ParameterBlockSpec],
2393        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2394        psi_index: usize,
2395    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
2396        self.exact_newton_joint_psi_terms_for_specs(
2397            block_states,
2398            specs,
2399            derivative_blocks,
2400            psi_index,
2401        )
2402    }
2403
2404    fn exact_newton_joint_psisecond_order_terms(
2405        &self,
2406        block_states: &[ParameterBlockState],
2407        specs: &[ParameterBlockSpec],
2408        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2409        psi_i: usize,
2410        psi_j: usize,
2411    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
2412        self.exact_newton_joint_psisecond_order_terms_for_specs(
2413            block_states,
2414            specs,
2415            derivative_blocks,
2416            psi_i,
2417            psi_j,
2418        )
2419    }
2420
2421    fn exact_newton_joint_psihessian_directional_derivative(
2422        &self,
2423        block_states: &[ParameterBlockState],
2424        specs: &[ParameterBlockSpec],
2425        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2426        psi_index: usize,
2427        d_beta_flat: &Array1<f64>,
2428    ) -> Result<Option<Array2<f64>>, String> {
2429        self.exact_newton_joint_psihessian_directional_derivative_for_specs(
2430            block_states,
2431            specs,
2432            derivative_blocks,
2433            psi_index,
2434            d_beta_flat,
2435        )
2436    }
2437
2438    fn exact_newton_joint_psi_workspace(
2439        &self,
2440        block_states: &[ParameterBlockState],
2441        specs: &[ParameterBlockSpec],
2442        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2443    ) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
2444        if !self.exact_joint_supported() {
2445            return Ok(None);
2446        }
2447        Ok(Some(Arc::new(
2448            GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new(
2449                self.clone(),
2450                block_states.to_vec(),
2451                specs,
2452                derivative_blocks.to_vec(),
2453            )?,
2454        )))
2455    }
2456
2457    /// Outer-aware joint ψ workspace with optional row subsample.
2458    ///
2459    /// The wiggle ψ workspace shares the generic `LocationScaleJointPsiWorkspace`
2460    /// with the non-wiggle GLS family, and the subsample is plumbed through
2461    /// the trait. The wiggle's `ws_psi_*_from_parts` impls currently drop the
2462    /// subsample and fall back to the full-data exact wiggle ψ path; see
2463    /// their inline rationale and the `apply_ht_mask_*` helpers used by the
2464    /// non-wiggle GLS family. Storing the subsample here keeps the workspace
2465    /// signature uniform across both families and leaves a hook for the
2466    /// follow-up that refactors the wiggle inline arrays into a weights
2467    /// struct so HT masking can be applied in one place. Even without that
2468    /// refactor, the total outer score under subsampling remains an unbiased
2469    /// estimator of the full-data outer score: HT-unbiased LL
2470    /// (`log_likelihood_only_with_options`) + HT-unbiased ρ-Hessian
2471    /// (`exact_newton_joint_hessian_workspace_with_options`) + exact-unbiased
2472    /// ψ (the wiggle workspace path) = unbiased.
2473    fn exact_newton_joint_psi_workspace_with_options(
2474        &self,
2475        block_states: &[ParameterBlockState],
2476        specs: &[ParameterBlockSpec],
2477        derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
2478        options: &BlockwiseFitOptions,
2479    ) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
2480        if !self.exact_joint_supported() {
2481            return Ok(None);
2482        }
2483        Ok(Some(Arc::new(
2484            GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new_with_subsample(
2485                self.clone(),
2486                block_states.to_vec(),
2487                specs,
2488                derivative_blocks.to_vec(),
2489                options.outer_score_subsample.clone(),
2490            )?,
2491        )))
2492    }
2493
2494    fn block_geometry(
2495        &self,
2496        block_states: &[ParameterBlockState],
2497        spec: &ParameterBlockSpec,
2498    ) -> Result<(DesignMatrix, Array1<f64>), String> {
2499        if spec.name != "wiggle" {
2500            return Ok((spec.design.clone(), spec.offset.clone()));
2501        }
2502        if block_states.is_empty() {
2503            return Err(GamlssError::UnsupportedConfiguration {
2504                reason: "Gaussian wiggle geometry requires mean block".to_string(),
2505            }
2506            .into());
2507        }
2508        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2509        if eta_mu.len() != self.y.len() {
2510            return Err(GamlssError::DimensionMismatch {
2511                reason: "Gaussian wiggle geometry input size mismatch".to_string(),
2512            }
2513            .into());
2514        }
2515        let x = self.wiggle_design(eta_mu.view())?;
2516        if x.ncols() != spec.design.ncols() {
2517            return Err(GamlssError::DimensionMismatch {
2518                reason: format!(
2519                    "Gaussian dynamic wiggle design col mismatch: got {}, expected {}",
2520                    x.ncols(),
2521                    spec.design.ncols()
2522                ),
2523            }
2524            .into());
2525        }
2526        let nrows = x.nrows();
2527        Ok((
2528            DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(x)),
2529            Array1::zeros(nrows),
2530        ))
2531    }
2532
2533    fn block_geometry_is_dynamic(&self) -> bool {
2534        true
2535    }
2536
2537    fn exact_newton_joint_hessian_workspace(
2538        &self,
2539        block_states: &[ParameterBlockState],
2540        specs: &[ParameterBlockSpec],
2541    ) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
2542        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
2543            return Ok(None);
2544        };
2545        let workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
2546            self.clone(),
2547            block_states.to_vec(),
2548            xmu.into_owned(),
2549            x_ls.into_owned(),
2550        )?;
2551        Ok(Some(Arc::new(workspace)))
2552    }
2553
2554    /// Outer-aware joint-Hessian workspace with optional row subsample.
2555    ///
2556    /// When `options.outer_score_subsample` is `None`, this is byte-identical
2557    /// to `exact_newton_joint_hessian_workspace`. When `Some`, the precomputed
2558    /// per-row coefficient arrays in `pieces` (`coeff_mm`, `coeff_ml`,
2559    /// `coeff_ll`, `coeff_mw_b`, `coeff_mw_d`, `coeff_lw_b`, `coeff_ww`) —
2560    /// which every downstream assembly (`hessian_dense`, `hessian_matvec`,
2561    /// `hessian_diagonal`) consumes row-linearly via `Xᵀ diag(W) Y` — are
2562    /// replaced by a Horvitz–Thompson mask: each sampled row's coefficient
2563    /// is multiplied by `WeightedOuterRow.weight` (the inverse-inclusion
2564    /// factor 1/π_i; uniform or stratified sampling both supported), and
2565    /// non-sampled rows are zeroed. The `basis`/`basis_d1` matrices are
2566    /// row-weight-independent and remain unchanged. Note that the Gaussian
2567    /// wiggle has one fewer cross-coefficient than the binomial wiggle
2568    /// (no `coeff_lw_d`) because the wiggle enters the Gaussian likelihood
2569    /// only through `q = η_μ + η_w` (no σ-chain). The resulting joint Hessian
2570    /// is an unbiased estimator of the full-data joint Hessian. Inner PIRLS
2571    /// never installs the option, so the inner solve continues to consume
2572    /// the exact full-data Hessian.
2573    fn exact_newton_joint_hessian_workspace_with_options(
2574        &self,
2575        block_states: &[ParameterBlockState],
2576        specs: &[ParameterBlockSpec],
2577        options: &BlockwiseFitOptions,
2578    ) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
2579        let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
2580            return Ok(None);
2581        };
2582        let mut workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
2583            self.clone(),
2584            block_states.to_vec(),
2585            xmu.into_owned(),
2586            x_ls.into_owned(),
2587        )?;
2588        if let Some(subsample) = options.outer_score_subsample.as_ref() {
2589            workspace.apply_outer_subsample(subsample.rows.as_ref());
2590        }
2591        Ok(Some(Arc::new(workspace)))
2592    }
2593
2594    /// Outer-derivative policy: declare HT-subsample capability.
2595    ///
2596    /// GaussianLocationScaleWiggleFamily overrides
2597    /// `log_likelihood_only_with_options` and
2598    /// `exact_newton_joint_hessian_workspace_with_options` to consume
2599    /// `options.outer_score_subsample` with per-row Horvitz–Thompson weights
2600    /// (each sampled row's contribution is multiplied by
2601    /// `WeightedOuterRow.weight = 1/π_i`; non-sampled rows are zeroed),
2602    /// yielding unbiased estimators of the full-data log-likelihood and
2603    /// joint Hessian. The ψ-workspace path is also subsample-aware via
2604    /// `exact_newton_joint_psi_workspace_with_options`, which threads the
2605    /// subsample down to per-row weight masking inside the joint-ψ second-
2606    /// order and directional-derivative reductions. Inner-PIRLS and final-
2607    /// covariance paths never install the option, so they continue to
2608    /// consume the exact full-data quantities.
2609    fn outer_derivative_subsample_capable(&self) -> bool {
2610        true
2611    }
2612
2613    fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
2614        // Same gating as the workspace impl above: matrix-free fires when
2615        // `exact_joint_dense_block_designs` is satisfiable, which requires
2616        // both location and scale block designs to be present.  The wiggle
2617        // block is folded into the operator via the per-row pieces — its
2618        // presence is implied by reaching the wiggle family in the first
2619        // place — so the predicate matches the non-wiggle case.
2620        self.exact_joint_supported()
2621            && matches!(
2622                self.exact_joint_dense_block_designs(Some(specs)),
2623                Ok(Some(_))
2624            )
2625    }
2626}
2627
2628/// Matrix-free joint-Hessian operator for the 3-block Gaussian
2629/// location-scale wiggle family. See `GaussianLocationScaleWiggleHessianRowPieces`
2630/// for the per-row weight structure. The matvec applies
2631///
2632///   r_μ  = D_mm u_μ + D_ml u_ls + D_mw_b (B v_w) + D_mw_d (B' v_w),
2633///   r_ls = D_ml u_μ + D_ll u_ls + D_lw_b (B v_w),
2634///   r_b  = D_mw_b u_μ + D_lw_b u_ls + D_ww (B v_w),
2635///   r_d  = D_mw_d u_μ,
2636///
2637/// then forms `out_w = B^T r_b + (B')^T r_d`. The ls-wiggle cross block has
2638/// no B' contribution because the wiggle enters the Gaussian likelihood only
2639/// through `q = η_μ + η_w` (no σ-chain), so the Gaussian wiggle has one
2640/// fewer cross-coefficient than the binomial wiggle.
2641pub(crate) struct GaussianLocationScaleWiggleHessianWorkspace {
2642    pub(crate) family: GaussianLocationScaleWiggleFamily,
2643    pub(crate) block_states: Vec<ParameterBlockState>,
2644    pub(crate) xmu: Arc<Array2<f64>>,
2645    pub(crate) x_ls: Arc<Array2<f64>>,
2646    pub(crate) pieces: GaussianLocationScaleWiggleHessianRowPieces,
2647}
2648
2649impl GaussianLocationScaleWiggleHessianWorkspace {
2650    pub(crate) fn new(
2651        family: GaussianLocationScaleWiggleFamily,
2652        block_states: Vec<ParameterBlockState>,
2653        xmu: Array2<f64>,
2654        x_ls: Array2<f64>,
2655    ) -> Result<Self, String> {
2656        let pieces = family.wiggle_hessian_row_pieces(&block_states)?;
2657        Ok(Self {
2658            family,
2659            block_states,
2660            xmu: Arc::new(xmu),
2661            x_ls: Arc::new(x_ls),
2662            pieces,
2663        })
2664    }
2665
2666    /// Apply a Horvitz–Thompson outer-row subsample mask to the precomputed
2667    /// per-row coefficient arrays in place.
2668    ///
2669    /// Each sampled row's `coeff_*[i]` is multiplied by its
2670    /// `WeightedOuterRow.weight` (the HT inverse-inclusion factor 1/π_i —
2671    /// uniform or stratified sampling both supported). All non-sampled rows
2672    /// are zeroed. Because every downstream assembly (`hessian_dense`,
2673    /// `hessian_matvec`, `hessian_diagonal`) is row-linear in these arrays
2674    /// via `Xᵀ diag(W) Y`, the resulting joint-Hessian is an unbiased
2675    /// estimator of the full-data joint Hessian. The `basis`/`basis_d1`
2676    /// matrices are independent of the per-row weights and remain unchanged.
2677    /// The Gaussian wiggle has 7 coefficient arrays (no `coeff_lw_d`, unlike
2678    /// the binomial wiggle's 8) because the wiggle enters the Gaussian
2679    /// likelihood only through `q = η_μ + η_w` (no σ-chain).
2680    pub(crate) fn apply_outer_subsample(
2681        &mut self,
2682        rows: &[crate::outer_subsample::WeightedOuterRow],
2683    ) {
2684        let n = self.pieces.coeff_mm.len();
2685        let mut mask_mm = Array1::<f64>::zeros(n);
2686        let mut mask_ml = Array1::<f64>::zeros(n);
2687        let mut mask_ll = Array1::<f64>::zeros(n);
2688        let mut mask_mw_b = Array1::<f64>::zeros(n);
2689        let mut mask_mw_d = Array1::<f64>::zeros(n);
2690        let mut mask_lw_b = Array1::<f64>::zeros(n);
2691        let mut maskww = Array1::<f64>::zeros(n);
2692        for r in rows {
2693            let i = r.index;
2694            let w = r.weight;
2695            mask_mm[i] = self.pieces.coeff_mm[i] * w;
2696            mask_ml[i] = self.pieces.coeff_ml[i] * w;
2697            mask_ll[i] = self.pieces.coeff_ll[i] * w;
2698            mask_mw_b[i] = self.pieces.coeff_mw_b[i] * w;
2699            mask_mw_d[i] = self.pieces.coeff_mw_d[i] * w;
2700            mask_lw_b[i] = self.pieces.coeff_lw_b[i] * w;
2701            maskww[i] = self.pieces.coeff_ww[i] * w;
2702        }
2703        self.pieces.coeff_mm = mask_mm;
2704        self.pieces.coeff_ml = mask_ml;
2705        self.pieces.coeff_ll = mask_ll;
2706        self.pieces.coeff_mw_b = mask_mw_b;
2707        self.pieces.coeff_mw_d = mask_mw_d;
2708        self.pieces.coeff_lw_b = mask_lw_b;
2709        self.pieces.coeff_ww = maskww;
2710    }
2711}
2712
2713impl ExactNewtonJointHessianWorkspace for GaussianLocationScaleWiggleHessianWorkspace {
2714    fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
2715        // Same Hv structure as `hessian_matvec`, but routed through the
2716        // already-existing `assemble_dense` row-pieces helper (six GEMMs:
2717        // h_mm, h_ml, h_mw_b, h_mw_d, h_lw, h_ww). Avoids `total` canonical-
2718        // basis HVPs in `MatrixFreeSpdOperator::materialize_dense_operator`,
2719        // which at large scale (n≈320k, p_total≈82) costs ~568s per κ-iter
2720        // versus ~1s for the dense build.
2721        let dense = self
2722            .pieces
2723            .assemble_dense(self.xmu.as_ref(), self.x_ls.as_ref())?;
2724        Ok(Some(dense))
2725    }
2726
2727    fn hessian_matvec_available(&self) -> bool {
2728        true
2729    }
2730
2731    fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
2732        let pmu = self.xmu.ncols();
2733        let p_ls = self.x_ls.ncols();
2734        let pw = self.pieces.basis.ncols();
2735        let total = pmu + p_ls + pw;
2736        if v.len() != total {
2737            return Err(GamlssError::DimensionMismatch {
2738                reason: format!(
2739                    "GaussianLocationScaleWiggle matvec dimension mismatch: got {}, expected {}",
2740                    v.len(),
2741                    total
2742                ),
2743            }
2744            .into());
2745        }
2746        let v_mu = v.slice(s![0..pmu]);
2747        let v_ls = v.slice(s![pmu..pmu + p_ls]);
2748        let v_w = v.slice(s![pmu + p_ls..total]);
2749
2750        let u_mu = fast_av(self.xmu.as_ref(), &v_mu);
2751        let u_ls = fast_av(self.x_ls.as_ref(), &v_ls);
2752        let u_b = fast_av(&self.pieces.basis, &v_w);
2753        let u_d = fast_av(&self.pieces.basis_d1, &v_w);
2754
2755        let r_mu = &self.pieces.coeff_mm * &u_mu
2756            + &self.pieces.coeff_ml * &u_ls
2757            + &self.pieces.coeff_mw_b * &u_b
2758            + &self.pieces.coeff_mw_d * &u_d;
2759        let r_ls = &self.pieces.coeff_ml * &u_mu
2760            + &self.pieces.coeff_ll * &u_ls
2761            + &self.pieces.coeff_lw_b * &u_b;
2762        let r_b = &self.pieces.coeff_mw_b * &u_mu
2763            + &self.pieces.coeff_lw_b * &u_ls
2764            + &self.pieces.coeff_ww * &u_b;
2765        let r_d = &self.pieces.coeff_mw_d * &u_mu;
2766
2767        let out_mu = fast_atv(self.xmu.as_ref(), &r_mu);
2768        let out_ls = fast_atv(self.x_ls.as_ref(), &r_ls);
2769        let out_w = fast_atv(&self.pieces.basis, &r_b) + &fast_atv(&self.pieces.basis_d1, &r_d);
2770
2771        let mut out = Array1::<f64>::zeros(total);
2772        out.slice_mut(s![0..pmu]).assign(&out_mu);
2773        out.slice_mut(s![pmu..pmu + p_ls]).assign(&out_ls);
2774        out.slice_mut(s![pmu + p_ls..total]).assign(&out_w);
2775        Ok(Some(out))
2776    }
2777
2778    fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
2779        let pmu = self.xmu.ncols();
2780        let p_ls = self.x_ls.ncols();
2781        let pw = self.pieces.basis.ncols();
2782        let total = pmu + p_ls + pw;
2783        // Diagonals are independent column-wise reductions: parallelize.
2784        use rayon::iter::{IntoParallelIterator, ParallelIterator};
2785        let diag_mu: Vec<f64> = (0..pmu)
2786            .into_par_iter()
2787            .map(|j| {
2788                let col = self.xmu.column(j);
2789                col.iter()
2790                    .zip(self.pieces.coeff_mm.iter())
2791                    .map(|(&v, &c)| c * v * v)
2792                    .sum()
2793            })
2794            .collect();
2795        let diag_ls: Vec<f64> = (0..p_ls)
2796            .into_par_iter()
2797            .map(|j| {
2798                let col = self.x_ls.column(j);
2799                col.iter()
2800                    .zip(self.pieces.coeff_ll.iter())
2801                    .map(|(&v, &c)| c * v * v)
2802                    .sum()
2803            })
2804            .collect();
2805        let diag_w: Vec<f64> = (0..pw)
2806            .into_par_iter()
2807            .map(|j| {
2808                let col = self.pieces.basis.column(j);
2809                col.iter()
2810                    .zip(self.pieces.coeff_ww.iter())
2811                    .map(|(&v, &c)| c * v * v)
2812                    .sum()
2813            })
2814            .collect();
2815        let mut diag = Array1::<f64>::zeros(total);
2816        for (j, v) in diag_mu.into_iter().enumerate() {
2817            diag[j] = v;
2818        }
2819        for (j, v) in diag_ls.into_iter().enumerate() {
2820            diag[pmu + j] = v;
2821        }
2822        for (j, v) in diag_w.into_iter().enumerate() {
2823            diag[pmu + p_ls + j] = v;
2824        }
2825        Ok(Some(diag))
2826    }
2827
2828    fn directional_derivative(
2829        &self,
2830        d_beta_flat: &Array1<f64>,
2831    ) -> Result<Option<Array2<f64>>, String> {
2832        self.family
2833            .exact_newton_joint_hessian_directional_derivative_from_designs(
2834                &self.block_states,
2835                self.xmu.as_ref(),
2836                self.x_ls.as_ref(),
2837                d_beta_flat,
2838            )
2839    }
2840
2841    fn directional_derivative_operator(
2842        &self,
2843        d_beta_flat: &Array1<f64>,
2844    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
2845        self.family.gls_wiggle_directional_operator(
2846            &self.block_states,
2847            self.xmu.clone(),
2848            self.x_ls.clone(),
2849            d_beta_flat,
2850        )
2851    }
2852
2853    fn second_directional_derivative(
2854        &self,
2855        d_beta_u_flat: &Array1<f64>,
2856        d_beta_v_flat: &Array1<f64>,
2857    ) -> Result<Option<Array2<f64>>, String> {
2858        self.family
2859            .exact_newton_joint_hessiansecond_directional_derivative_from_designs(
2860                &self.block_states,
2861                self.xmu.as_ref(),
2862                self.x_ls.as_ref(),
2863                d_beta_u_flat,
2864                d_beta_v_flat,
2865            )
2866    }
2867
2868    fn second_directional_derivative_operator(
2869        &self,
2870        d_beta_u: &Array1<f64>,
2871        d_beta_v: &Array1<f64>,
2872    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
2873        self.family.gls_wiggle_second_directional_operator(
2874            &self.block_states,
2875            self.xmu.clone(),
2876            self.x_ls.clone(),
2877            d_beta_u,
2878            d_beta_v,
2879        )
2880    }
2881}
2882
2883impl CustomFamilyGenerative for GaussianLocationScaleWiggleFamily {
2884    fn generativespec(
2885        &self,
2886        block_states: &[ParameterBlockState],
2887    ) -> Result<GenerativeSpec, String> {
2888        validate_block_count::<GamlssError>(
2889            "GaussianLocationScaleWiggleFamily",
2890            3,
2891            block_states.len(),
2892        )?;
2893        let eta_mu = &block_states[Self::BLOCK_MU].eta;
2894        let eta_wiggle = &block_states[Self::BLOCK_WIGGLE].eta;
2895        let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
2896        let n = eta_mu.len();
2897        let mean = gamlss_rowwise_map(n, |i| eta_mu[i] + eta_wiggle[i]);
2898        let sigma = gamlss_rowwise_map(n, |i| logb_sigma_from_eta_scalar(eta_log_sigma[i]));
2899        Ok(GenerativeSpec {
2900            mean,
2901            noise: NoiseModel::Gaussian { sigma },
2902        })
2903    }
2904}
2905
2906pub(crate) fn expect_single_block<'a>(
2907    block_states: &'a [ParameterBlockState],
2908    family_name: &str,
2909) -> Result<&'a ParameterBlockState, String> {
2910    validate_block_count::<GamlssError>(family_name, 1, block_states.len())?;
2911    Ok(&block_states[0])
2912}