Skip to main content

gam_models/gamlss/gaussian/
binomial_mean_wiggle.rs

1// Real concern-organized submodule of the gamlss family stack.
2// Cross-module items are re-exported flat through the parent (`gamlss.rs`),
3// so `use super::*;` makes the sibling-concern symbols this module references
4// resolve through the parent namespace.
5use super::*;
6
7#[derive(Clone)]
8pub struct BinomialMeanWiggleFamily {
9    pub y: Array1<f64>,
10    pub weights: Array1<f64>,
11    pub link_kind: InverseLink,
12    pub wiggle_knots: Array1<f64>,
13    pub wiggle_degree: usize,
14    /// Resource policy threaded into PsiDesignMap construction during
15    /// exact-Newton joint psi evaluation. Defaults to
16    /// `ResourcePolicy::default_library()` when the family is built without
17    /// an explicit policy.
18    pub policy: gam_runtime::resource::ResourcePolicy,
19    /// The **frozen, identifiable** warp design `B̃ = B(η̂)·Z` used for the
20    /// duration of one inner joint-Newton solve (#1596). `None` preserves the
21    /// original dynamic-basis behaviour; `Some(B̃)` switches the family into the
22    /// frozen-basis Gauss-Newton mode.
23    ///
24    /// **Why frozen.** The fully-coupled `q = η + B(η)·β_w` model regenerates the
25    /// monotone I-spline basis at the *moving* `η` every cycle. The trust-region
26    /// quadratic model freezes `B` at the cycle-start `η`, but the line search
27    /// re-evaluates the objective with `B` rebuilt at the trial `η`; for any step
28    /// that moves `η` the actual reduction diverges from the model, the trust
29    /// radius collapses, and the constrained KKT certificate refuses every
30    /// iterate (`active_set_incomplete`) even when the optimal warp is flat.
31    /// Freezing `B(η̂)` makes `q = η + B̃·β_w` linear in `(β_η, β_w)` with
32    /// `∂q/∂η = 1` and no `∂B/∂η` chain term — a well-conditioned two-block GLM
33    /// that certifies. The caller re-freezes at the refit `η̂` across a few outer
34    /// Gauss-Newton iterations (`fit_binomial_mean_wiggle`) until `η̂` stops
35    /// moving.
36    ///
37    /// **Why `Z`-reduced (identifiable).** A monotone I-spline of the linear
38    /// predictor `η` can represent `η` itself, so the raw `B(η̂)` columns alias
39    /// the eta block's design `X`; the canonical-gauge RRQR then drops a column
40    /// from the (interpretable) mean block, leaving it rank-deficient and the
41    /// warp unable to reach the baseline. `Z` is an orthonormal basis for
42    /// `null(Xᵀ B(η̂))`, so `B̃ = B(η̂)·Z` is exactly the part of the warp
43    /// **orthogonal to the mean's column space** — the link *curvature* the base
44    /// link cannot represent. `[X | B̃]` is full rank: the mean block stays full,
45    /// no gauge drop, and the warp nests the base link (`β_w = 0`). The fitted
46    /// `γ` maps back to the standard I-spline coefficients as `β_w = Z·γ` for the
47    /// predict-time warp reconstruction `B(η_new)·β_w`.
48    pub frozen_warp_design: Option<Arc<Array2<f64>>>,
49}
50
51pub(crate) struct BinomialMeanWiggleGeometry {
52    pub(crate) basis: Array2<f64>,
53    pub(crate) basis_d1: Array2<f64>,
54    pub(crate) basis_d2: Array2<f64>,
55    pub(crate) basis_d3: Array2<f64>,
56    pub(crate) dq_dq0: Array1<f64>,
57    pub(crate) d2q_dq02: Array1<f64>,
58    pub(crate) d3q_dq03: Array1<f64>,
59    pub(crate) d4q_dq04: Array1<f64>,
60}
61
62pub(crate) struct BinomialMeanWiggleJointPsiDirection {
63    pub(crate) x_eta_psi: Option<Array2<f64>>,
64    pub(crate) z_eta_psi: Array1<f64>,
65}
66
67impl BinomialMeanWiggleFamily {
68    pub const BLOCK_ETA: usize = 0;
69    pub const BLOCK_WIGGLE: usize = 1;
70
71    pub(crate) fn wiggle_basiswith_options(
72        &self,
73        q0: ArrayView1<'_, f64>,
74        options: BasisOptions,
75    ) -> Result<Array2<f64>, String> {
76        monotone_wiggle_basis_with_derivative_order(
77            q0,
78            &self.wiggle_knots,
79            self.wiggle_degree,
80            options.derivative_order,
81        )
82    }
83
84    pub(crate) fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
85        self.wiggle_basiswith_options(q0, BasisOptions::value())
86    }
87
88    pub(crate) fn wiggle_dq_dq0(
89        &self,
90        q0: ArrayView1<'_, f64>,
91        beta_link_wiggle: ArrayView1<'_, f64>,
92    ) -> Result<Array1<f64>, String> {
93        let d_constrained = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
94        if d_constrained.ncols() != beta_link_wiggle.len() {
95            return Err(GamlssError::DimensionMismatch { reason: format!(
96                "wiggle derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
97                d_constrained.ncols(),
98                beta_link_wiggle.len()
99            ) }.into());
100        }
101        Ok(d_constrained.dot(&beta_link_wiggle) + 1.0)
102    }
103
104    pub(crate) fn wiggle_d2q_dq02(
105        &self,
106        q0: ArrayView1<'_, f64>,
107        beta_link_wiggle: ArrayView1<'_, f64>,
108    ) -> Result<Array1<f64>, String> {
109        let d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
110        if d2.ncols() != beta_link_wiggle.len() {
111            return Err(GamlssError::DimensionMismatch { reason: format!(
112                "wiggle second-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
113                d2.ncols(),
114                beta_link_wiggle.len()
115            ) }.into());
116        }
117        Ok(d2.dot(&beta_link_wiggle))
118    }
119
120    pub(crate) fn wiggle_d3basis_constrained(
121        &self,
122        q0: ArrayView1<'_, f64>,
123    ) -> Result<Array2<f64>, String> {
124        monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
125    }
126
127    pub(crate) fn wiggle_d3q_dq03(
128        &self,
129        q0: ArrayView1<'_, f64>,
130        beta_link_wiggle: ArrayView1<'_, f64>,
131    ) -> Result<Array1<f64>, String> {
132        let d3 = self.wiggle_d3basis_constrained(q0)?;
133        if d3.ncols() != beta_link_wiggle.len() {
134            return Err(GamlssError::DimensionMismatch { reason: format!(
135                "wiggle third-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
136                d3.ncols(),
137                beta_link_wiggle.len()
138            ) }.into());
139        }
140        Ok(d3.dot(&beta_link_wiggle))
141    }
142
143    pub(crate) fn wiggle_d4q_dq04(
144        &self,
145        q0: ArrayView1<'_, f64>,
146        beta_link_wiggle: ArrayView1<'_, f64>,
147    ) -> Result<Array1<f64>, String> {
148        let d4 = monotone_wiggle_basis_with_derivative_order(
149            q0,
150            &self.wiggle_knots,
151            self.wiggle_degree,
152            4,
153        )?;
154        if d4.ncols() != beta_link_wiggle.len() {
155            return Err(GamlssError::DimensionMismatch { reason: format!(
156                "wiggle fourth-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
157                d4.ncols(),
158                beta_link_wiggle.len()
159            ) }.into());
160        }
161        Ok(d4.dot(&beta_link_wiggle))
162    }
163
164    pub(crate) fn wiggle_geometry(
165        &self,
166        q0: ArrayView1<'_, f64>,
167        beta_link_wiggle: ArrayView1<'_, f64>,
168    ) -> Result<BinomialMeanWiggleGeometry, String> {
169        // Frozen-basis (#1596): the warp offset `s = B̃·β_w` uses the pinned,
170        // identifiable design `B̃ = B(η̂)·Z`, so it is a per-row constant w.r.t.
171        // the *live* linear predictor `η`. Then `q = η + s` gives `∂q/∂η = 1`
172        // exactly and every higher derivative of `q` in `η` vanishes, and the
173        // `∂B/∂η` chain bases drop out (the warp basis does not move with `η`).
174        // The value basis `B̃` — the column block carrying `∂q/∂β_w` — is the
175        // only surviving geometry term.
176        if let Some(frozen) = self.frozen_warp_design.as_ref() {
177            let n = frozen.nrows();
178            let pw = frozen.ncols();
179            return Ok(BinomialMeanWiggleGeometry {
180                basis: frozen.as_ref().clone(),
181                basis_d1: Array2::zeros((n, pw)),
182                basis_d2: Array2::zeros((n, pw)),
183                basis_d3: Array2::zeros((n, pw)),
184                dq_dq0: Array1::ones(n),
185                d2q_dq02: Array1::zeros(n),
186                d3q_dq03: Array1::zeros(n),
187                d4q_dq04: Array1::zeros(n),
188            });
189        }
190        let basis = self.wiggle_design(q0)?;
191        let basis_d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
192        let basis_d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
193        let basis_d3 = self.wiggle_d3basis_constrained(q0)?;
194        let dq_dq0 = self.wiggle_dq_dq0(q0, beta_link_wiggle)?;
195        let d2q_dq02 = self.wiggle_d2q_dq02(q0, beta_link_wiggle)?;
196        let d3q_dq03 = self.wiggle_d3q_dq03(q0, beta_link_wiggle)?;
197        let d4q_dq04 = self.wiggle_d4q_dq04(q0, beta_link_wiggle)?;
198        Ok(BinomialMeanWiggleGeometry {
199            basis,
200            basis_d1,
201            basis_d2,
202            basis_d3,
203            dq_dq0,
204            d2q_dq02,
205            d3q_dq03,
206            d4q_dq04,
207        })
208    }
209
210    pub(crate) fn neglog_q_derivatives(
211        &self,
212        y: f64,
213        weight: f64,
214        q: f64,
215    ) -> Result<(f64, f64, f64), String> {
216        let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q)
217            .map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
218        // Pass μ RAW: the dispatch returns the exact q-derivatives of the
219        // evaluated loss for every representable μ in (0,1) and handles the
220        // saturated boundary itself. See binomial_location_scalerow (#948).
221        Ok(binomial_neglog_q_derivatives_dispatch(
222            y,
223            weight,
224            q,
225            jet.mu,
226            jet.d1,
227            jet.d2,
228            jet.d3,
229            &self.link_kind,
230        ))
231    }
232
233    pub(crate) fn neglog_q_fourth_derivative(
234        &self,
235        y: f64,
236        weight: f64,
237        q: f64,
238    ) -> Result<f64, String> {
239        let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q)
240            .map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
241        // Pass μ RAW — see neglog_q_derivatives above (#948).
242        binomial_neglog_q_fourth_derivative_dispatch(
243            y,
244            weight,
245            q,
246            jet.mu,
247            jet.d1,
248            jet.d2,
249            jet.d3,
250            &self.link_kind,
251        )
252    }
253
254    pub(crate) fn dense_eta_design_fromspecs<'a>(
255        &self,
256        specs: &'a [ParameterBlockSpec],
257    ) -> Result<Cow<'a, Array2<f64>>, String> {
258        if specs.len() != 2 {
259            return Err(GamlssError::DimensionMismatch {
260                reason: format!(
261                    "BinomialMeanWiggleFamily expects 2 specs, got {}",
262                    specs.len()
263                ),
264            }
265            .into());
266        }
267        Ok(match specs[Self::BLOCK_ETA].design.as_dense_ref() {
268            Some(d) => Cow::Borrowed(d),
269            None => Cow::Owned(
270                specs[Self::BLOCK_ETA]
271                    .design
272                    .try_to_dense_with_policy(
273                        &self.policy.material_policy(),
274                        "BinomialMeanWiggle dense_eta_design_fromspecs eta",
275                    )
276                    .map_err(|e| e.to_string())?
277                    .as_ref()
278                    .clone(),
279            ),
280        })
281    }
282
283    pub(crate) fn exact_newton_joint_psi_direction(
284        &self,
285        block_states: &[ParameterBlockState],
286        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
287        psi_index: usize,
288        x_eta: &Array2<f64>,
289    ) -> Result<Option<BinomialMeanWiggleJointPsiDirection>, String> {
290        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
291        if derivative_blocks.len() != 2 {
292            return Err(GamlssError::DimensionMismatch { reason: format!(
293                "BinomialMeanWiggleFamily joint psi direction expects 2 derivative block lists, got {}",
294                derivative_blocks.len()
295            ) }.into());
296        }
297        let n = self.y.len();
298        let p_eta = x_eta.ncols();
299        let beta_eta = &block_states[Self::BLOCK_ETA].beta;
300        let mut global = 0usize;
301        for (block_idx, block_derivs) in derivative_blocks.iter().enumerate() {
302            for deriv in block_derivs {
303                if global == psi_index {
304                    if block_idx != Self::BLOCK_ETA {
305                        return Ok(None);
306                    }
307                    let x_eta_psi_map = resolve_custom_family_x_psi_map(
308                        deriv,
309                        n,
310                        p_eta,
311                        0..n,
312                        "BinomialMeanWiggleFamily eta",
313                        &self.policy,
314                    )?;
315                    let x_eta_psi = x_eta_psi_map.row_chunk(0..n)?;
316                    let z_eta_psi = x_eta_psi.dot(beta_eta);
317                    return Ok(Some(BinomialMeanWiggleJointPsiDirection {
318                        x_eta_psi: Some(x_eta_psi),
319                        z_eta_psi,
320                    }));
321                }
322                global += 1;
323            }
324        }
325        Ok(None)
326    }
327
328    pub(crate) fn exact_newton_joint_psi_action(
329        &self,
330        block_states: &[ParameterBlockState],
331        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
332        psi_index: usize,
333        p_eta: usize,
334    ) -> Result<Option<(CustomFamilyPsiDesignAction, Array1<f64>)>, String> {
335        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
336        if derivative_blocks.len() != 2 {
337            return Err(GamlssError::DimensionMismatch { reason: format!(
338                "BinomialMeanWiggleFamily joint psi action expects 2 derivative block lists, got {}",
339                derivative_blocks.len()
340            ) }.into());
341        }
342        let n = self.y.len();
343        let beta_eta = &block_states[Self::BLOCK_ETA].beta;
344        let mut global = 0usize;
345        for (block_idx, block_derivs) in derivative_blocks.iter().enumerate() {
346            for deriv in block_derivs {
347                if global == psi_index {
348                    if block_idx != Self::BLOCK_ETA {
349                        return Ok(None);
350                    }
351                    let action = match CustomFamilyPsiDesignAction::from_first_derivative(
352                        deriv,
353                        n,
354                        p_eta,
355                        0..n,
356                        "BinomialMeanWiggleFamily eta",
357                    ) {
358                        Ok(action) => action,
359                        Err(_) => return Ok(None),
360                    };
361                    let z_eta_psi = action.forward_mul(beta_eta.view());
362                    return Ok(Some((action, z_eta_psi)));
363                }
364                global += 1;
365            }
366        }
367        Ok(None)
368    }
369
370    pub(crate) fn bmw_static_hessian_operator(
371        &self,
372        block_states: &[ParameterBlockState],
373        x_eta_arc: Arc<Array2<f64>>,
374    ) -> Result<Arc<RowCoeffOperator>, String> {
375        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
376        let eta = &block_states[Self::BLOCK_ETA].eta;
377        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
378        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
379        let n = self.y.len();
380        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
381            return Err(GamlssError::DimensionMismatch {
382                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
383            }
384            .into());
385        }
386        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
387        let p_eta = x_eta_arc.ncols();
388        let pw = geom.basis.ncols();
389        let mut coeff_eta = Array1::<f64>::zeros(n);
390        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
391        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
392        let mut coeff_ww = Array1::<f64>::zeros(n);
393        for row in 0..n {
394            let q = eta[row] + etaw[row];
395            let (m1, m2, _) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
396            let a = geom.dq_dq0[row];
397            let b = geom.d2q_dq02[row];
398            coeff_eta[row] = hessian_coeff_fromobjective_q_terms(m1, m2, a, a, b);
399            coeff_etaw_b[row] = m2 * a;
400            coeff_etaw_d1[row] = m1;
401            coeff_ww[row] = m2;
402        }
403        Ok(Arc::new(RowCoeffOperator::from_directions(
404            vec![p_eta, pw],
405            vec![
406                (0, x_eta_arc),
407                (1, Arc::new(geom.basis)),
408                (1, Arc::new(geom.basis_d1)),
409            ],
410            vec![
411                (0, 0, coeff_eta),
412                (0, 1, coeff_etaw_b),
413                (0, 2, coeff_etaw_d1),
414                (1, 1, coeff_ww),
415            ],
416            n,
417        )))
418    }
419
420    pub(crate) fn bmw_directional_operator(
421        &self,
422        block_states: &[ParameterBlockState],
423        x_eta_arc: Arc<Array2<f64>>,
424        d_beta_flat: &Array1<f64>,
425    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
426        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
427        let eta = &block_states[Self::BLOCK_ETA].eta;
428        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
429        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
430        let n = self.y.len();
431        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
432            return Err(GamlssError::DimensionMismatch {
433                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
434            }
435            .into());
436        }
437        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
438        let p_eta = x_eta_arc.ncols();
439        let pw = geom.basis.ncols();
440        let total = p_eta + pw;
441        if d_beta_flat.len() != total {
442            return Err(GamlssError::DimensionMismatch {
443                reason: format!(
444                    "BinomialMeanWiggleFamily joint d_beta length mismatch: got {}, expected {}",
445                    d_beta_flat.len(),
446                    total
447                ),
448            }
449            .into());
450        }
451        let u_eta = d_beta_flat.slice(s![0..p_eta]).to_owned();
452        let uw = d_beta_flat.slice(s![p_eta..total]).to_owned();
453        let xi = fast_av(x_eta_arc.as_ref(), &u_eta);
454        let phi = fast_av(&geom.basis, &uw);
455        let basis1_u = fast_av(&geom.basis_d1, &uw);
456        let basis2_u = fast_av(&geom.basis_d2, &uw);
457
458        let mut coeff_eta = Array1::<f64>::zeros(n);
459        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
460        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
461        let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
462        let mut coeff_ww_bb = Array1::<f64>::zeros(n);
463        let mut coeff_ww_db = Array1::<f64>::zeros(n);
464        for row in 0..n {
465            let q = eta[row] + etaw[row];
466            let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
467            let a = geom.dq_dq0[row];
468            let b = geom.d2q_dq02[row];
469            let c = geom.d3q_dq03[row];
470            let q_u = a * xi[row] + phi[row];
471            let a_u = b * xi[row] + basis1_u[row];
472            let b_u = c * xi[row] + basis2_u[row];
473            coeff_eta[row] = directionalhessian_coeff_fromobjective_q_terms(
474                m1, m2, m3, q_u, a, a, b, a_u, a_u, b_u,
475            );
476            coeff_etaw_b[row] = m3 * q_u * a + m2 * a_u;
477            coeff_etaw_d1[row] = m2 * (a * xi[row] + q_u);
478            coeff_etaw_d2[row] = m1 * xi[row];
479            coeff_ww_bb[row] = m3 * q_u;
480            coeff_ww_db[row] = m2 * xi[row];
481        }
482        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
483            vec![p_eta, pw],
484            vec![
485                (0, x_eta_arc),
486                (1, Arc::new(geom.basis)),
487                (1, Arc::new(geom.basis_d1)),
488                (1, Arc::new(geom.basis_d2)),
489            ],
490            vec![
491                (0, 0, coeff_eta),
492                (0, 1, coeff_etaw_b),
493                (0, 2, coeff_etaw_d1),
494                (0, 3, coeff_etaw_d2),
495                (1, 1, coeff_ww_bb),
496                (1, 2, coeff_ww_db),
497            ],
498            n,
499        ))))
500    }
501
502    pub(crate) fn bmw_second_directional_operator(
503        &self,
504        block_states: &[ParameterBlockState],
505        x_eta_arc: Arc<Array2<f64>>,
506        d_beta_u_flat: &Array1<f64>,
507        d_beta_v_flat: &Array1<f64>,
508    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
509        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
510        let eta = &block_states[Self::BLOCK_ETA].eta;
511        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
512        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
513        let n = self.y.len();
514        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
515            return Err(GamlssError::DimensionMismatch {
516                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
517            }
518            .into());
519        }
520        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
521        let p_eta = x_eta_arc.ncols();
522        let pw = geom.basis.ncols();
523        let total = p_eta + pw;
524        if d_beta_u_flat.len() != total || d_beta_v_flat.len() != total {
525            return Err(GamlssError::DimensionMismatch { reason: format!(
526                "BinomialMeanWiggleFamily joint second d_beta length mismatch: got {} and {}, expected {}",
527                d_beta_u_flat.len(),
528                d_beta_v_flat.len(),
529                total
530            ) }.into());
531        }
532        let u_eta = d_beta_u_flat.slice(s![0..p_eta]).to_owned();
533        let v_eta = d_beta_v_flat.slice(s![0..p_eta]).to_owned();
534        let uw = d_beta_u_flat.slice(s![p_eta..total]).to_owned();
535        let vw = d_beta_v_flat.slice(s![p_eta..total]).to_owned();
536
537        let xi_u = fast_av(x_eta_arc.as_ref(), &u_eta);
538        let xi_v = fast_av(x_eta_arc.as_ref(), &v_eta);
539        let phi_u = fast_av(&geom.basis, &uw);
540        let phi_v = fast_av(&geom.basis, &vw);
541        let b1u = fast_av(&geom.basis_d1, &uw);
542        let b1v = fast_av(&geom.basis_d1, &vw);
543        let b2u = fast_av(&geom.basis_d2, &uw);
544        let b2v = fast_av(&geom.basis_d2, &vw);
545        let b3u = fast_av(&geom.basis_d3, &uw);
546        let b3v = fast_av(&geom.basis_d3, &vw);
547
548        let mut coeff_eta = Array1::<f64>::zeros(n);
549        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
550        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
551        let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
552        let mut coeff_etaw_d3 = Array1::<f64>::zeros(n);
553        let mut coeff_ww_bb = Array1::<f64>::zeros(n);
554        let mut coeff_ww_db = Array1::<f64>::zeros(n);
555        let mut coeff_ww_ddb = Array1::<f64>::zeros(n);
556        let mut coeff_ww_dd = Array1::<f64>::zeros(n);
557
558        for row in 0..n {
559            let q = eta[row] + etaw[row];
560            let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
561            let m4 = self.neglog_q_fourth_derivative(self.y[row], self.weights[row], q)?;
562            let a = geom.dq_dq0[row];
563            let b = geom.d2q_dq02[row];
564            let c = geom.d3q_dq03[row];
565            let d = geom.d4q_dq04[row];
566
567            let q_u = a * xi_u[row] + phi_u[row];
568            let a_u = b * xi_u[row] + b1u[row];
569            let b_u = c * xi_u[row] + b2u[row];
570            let q_v = a * xi_v[row] + phi_v[row];
571            let a_v = b * xi_v[row] + b1v[row];
572            let b_v = c * xi_v[row] + b2v[row];
573            let q_uv = b * xi_u[row] * xi_v[row] + b1u[row] * xi_v[row] + b1v[row] * xi_u[row];
574            let a_uv = c * xi_u[row] * xi_v[row] + b2u[row] * xi_v[row] + b2v[row] * xi_u[row];
575            let b_uv = d * xi_u[row] * xi_v[row] + b3u[row] * xi_v[row] + b3v[row] * xi_u[row];
576
577            coeff_eta[row] = second_directionalhessian_coeff_fromobjective_q_terms(
578                m1, m2, m3, m4, q_u, q_v, q_uv, a, a, b, a_u, a_v, a_u, a_v, a_uv, a_uv, b_u, b_v,
579                b_uv,
580            );
581            let d2_c_b = m4 * q_u * q_v * a + m3 * (q_uv * a + q_u * a_v + q_v * a_u) + m2 * a_uv;
582            let dc_b_u = m3 * q_u * a + m2 * a_u;
583            let dc_b_v = m3 * q_v * a + m2 * a_v;
584            let c_b_static = m2 * a;
585            let d2_c_b1 = m3 * q_u * q_v + m2 * q_uv;
586            let dc_b1_u = m2 * q_u;
587            let dc_b1_v = m2 * q_v;
588
589            coeff_etaw_b[row] = d2_c_b;
590            coeff_etaw_d1[row] = dc_b_u * xi_v[row] + dc_b_v * xi_u[row] + d2_c_b1;
591            coeff_etaw_d2[row] =
592                c_b_static * xi_u[row] * xi_v[row] + dc_b1_u * xi_v[row] + dc_b1_v * xi_u[row];
593            coeff_etaw_d3[row] = m1 * xi_u[row] * xi_v[row];
594
595            let dw = m2;
596            let dw_u = m3 * q_u;
597            let dw_v = m3 * q_v;
598            let dw_uv = m4 * q_u * q_v + m3 * q_uv;
599            let xixj = xi_u[row] * xi_v[row];
600            coeff_ww_bb[row] = dw_uv;
601            coeff_ww_db[row] = dw_v * xi_u[row] + dw_u * xi_v[row];
602            coeff_ww_ddb[row] = dw * xixj;
603            coeff_ww_dd[row] = 2.0 * dw * xixj;
604        }
605
606        Ok(Some(Arc::new(RowCoeffOperator::from_directions(
607            vec![p_eta, pw],
608            vec![
609                (0, x_eta_arc),
610                (1, Arc::new(geom.basis)),
611                (1, Arc::new(geom.basis_d1)),
612                (1, Arc::new(geom.basis_d2)),
613                (1, Arc::new(geom.basis_d3)),
614            ],
615            vec![
616                (0, 0, coeff_eta),
617                (0, 1, coeff_etaw_b),
618                (0, 2, coeff_etaw_d1),
619                (0, 3, coeff_etaw_d2),
620                (0, 4, coeff_etaw_d3),
621                (1, 1, coeff_ww_bb),
622                (1, 2, coeff_ww_db),
623                (1, 3, coeff_ww_ddb),
624                (2, 2, coeff_ww_dd),
625            ],
626            n,
627        ))))
628    }
629
630    /// Build the [`BlockEffectiveJacobian`] for block `block_idx`.
631    ///
632    /// `BinomialMeanWiggle` has a single location output (n_outputs = 1):
633    /// - block 0 (eta):    output 0 = design rows
634    /// - block 1 (wiggle): all zeros (nonlinear link modulation)
635    pub fn block_effective_jacobian(
636        specs: &[ParameterBlockSpec],
637        block_idx: usize,
638    ) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
639        crate::block_layout::block_jacobian::AdditiveWiggleBlockLayout {
640            family: "BinomialMeanWiggleFamily",
641            n_outputs: 1,
642            additive_blocks: &[Self::BLOCK_ETA],
643            wiggle_block: Some(Self::BLOCK_WIGGLE),
644        }
645        .block_effective_jacobian(specs, block_idx)
646    }
647}
648
649impl CustomFamily for BinomialMeanWiggleFamily {
650    fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
651        true
652    }
653
654    /// The binomial mean link-wiggle refit must NOT carry the full-span
655    /// Jeffreys/Firth augmentation, for the same structural reason
656    /// `GaussianLocationScaleWiggleFamily` opts out (#684–#688) — and the
657    /// binomial wiggle hits it harder. This is a *second-stage* refit: the
658    /// pilot binomial mean fit has already converged through the ordinary
659    /// PIRLS path (which is itself un-Firthed unless the user opts in — the
660    /// standard binomial fit logs `firth=false` / `jeffreys_logdet=none`), so
661    /// the wiggle refit only adds a *penalized*, *monotone-constrained*
662    /// I-spline link-shape correction `q = η + B(η)·β_w` around an
663    /// already-finite mode. Two failure modes follow from leaving the term on
664    /// (default `true`):
665    ///
666    /// 1. **Phantom stationarity residual.** When `H_pen` is full-rank and
667    ///    well-conditioned (the normal case — e.g. `cond ≈ 5.5e2` on the #872
668    ///    pure-probit repro) the Jeffreys gate smooth-steps the curvature
669    ///    `H_Φ → 0`, but the matching score `∇Φ` does not vanish in lock-step,
670    ///    so it leaks a nonzero `|∇L − Sβ + ∇Φ|` into the inner joint-Newton
671    ///    KKT residual. The certificate then refuses every iterate and the
672    ///    outer REML rejects all seeds (exactly the #684–#688 abort signature).
673    /// 2. **Saturation barrier / divergence.** `−Φ = −½log|I_J|` is folded into
674    ///    the objective and `∇Φ ∝ I_J⁻¹` into the gradient. The I-spline warp
675    ///    can drive the binomial linear predictor toward saturation, where the
676    ///    reduced Fisher information `I_J` goes singular: `−Φ → +∞` and
677    ///    `∇Φ → ∞`. The augmented objective grows a barrier that the joint
678    ///    Newton diverges into — the #872 repro runs the full 1200-cycle budget
679    ///    with the augmented objective pinned at ~4.6e9 and the augmented
680    ///    residual at ~5.8e9 while the plain data gradient is only ~2.3e2,
681    ///    aborting the documented `link(type=flexible(...)) + linkwiggle(...)`
682    ///    fit.
683    ///
684    /// Separation robustness is not lost: the wiggle block carries both a
685    /// difference penalty (λ selected by REML) and a hard non-negativity
686    /// constraint, and the underlying mean is fit by the pilot; a penalized,
687    /// constrained refit around a finite pilot mode does not run away to
688    /// `β → ∞` the way an unpenalized MLE can. Turning the term off here makes
689    /// the wiggle refit consistent with the un-Firthed pilot and removes the
690    /// phantom residual that blocked convergence.
691    fn joint_jeffreys_term_required(&self) -> bool {
692        false
693    }
694
695    fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
696        // The mean-wiggle Hessian is exposed as a row-coefficient operator,
697        // so the hot representation cost is one Θ(n · (p_eta + p_w)) HVP
698        // rather than dense Θ(n · (p_eta + p_w)^2) assembly.
699        let p_total = specs
700            .iter()
701            .map(|s| s.design.ncols() as u64)
702            .fold(0u64, |acc, p| acc.saturating_add(p));
703        (self.y.len() as u64).saturating_mul(p_total.max(1))
704    }
705
706    fn block_linear_constraints(
707        &self,
708        _: &[ParameterBlockState],
709        block_idx: usize,
710        spec: &ParameterBlockSpec,
711    ) -> Result<Option<LinearInequalityConstraints>, String> {
712        if block_idx != Self::BLOCK_WIGGLE {
713            return Ok(None);
714        }
715        // Frozen-basis (#1596): the warp is fit in the reduced, identifiable
716        // coordinate `γ` (`β_w = Z·γ`), where the per-coefficient nonnegativity
717        // `β_w ≥ 0` becomes the coupled inequality `Z·γ ≥ 0`. The frozen problem
718        // is convex (binomial deviance + quadratic penalty, linear predictor),
719        // so the penalized optimum is unique; monotonicity is enforced by a
720        // post-fit projection in `fit_binomial_mean_wiggle` rather than an inner
721        // active set (which the coupled inequality would re-introduce).
722        if self.frozen_warp_design.is_some() {
723            return Ok(None);
724        }
725        Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
726    }
727
728    fn post_update_block_beta(
729        &self,
730        _: &[ParameterBlockState],
731        block_idx: usize,
732        block_spec: &ParameterBlockSpec,
733        beta: Array1<f64>,
734    ) -> Result<Array1<f64>, String> {
735        assert!(!block_spec.name.is_empty());
736        if block_idx != Self::BLOCK_WIGGLE {
737            return Ok(beta);
738        }
739        // Frozen-basis: γ is unconstrained (see `block_linear_constraints`).
740        if self.frozen_warp_design.is_some() {
741            return Ok(beta);
742        }
743        let beta = project_monotone_wiggle_beta_nonnegative(beta);
744        validate_monotone_wiggle_beta_nonnegative(&beta, "BinomialMeanWiggleFamily post-update")?;
745        Ok(beta)
746    }
747
748    fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
749        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
750        let eta = &block_states[Self::BLOCK_ETA].eta;
751        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
752        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
753        let n = self.y.len();
754        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
755            return Err(GamlssError::DimensionMismatch {
756                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
757            }
758            .into());
759        }
760        // Frozen-basis (#1596): with `B̃` pinned, `q = η + B̃·β_w` so
761        // `∂q/∂η = 1` exactly (the warp offset is constant in the live `η`).
762        // Otherwise `∂q/∂η = 1 + B'(η)·β_w`, the dynamic warp slope.
763        let dq_dq0 = if self.frozen_warp_design.is_some() {
764            Array1::<f64>::ones(n)
765        } else {
766            self.wiggle_dq_dq0(eta.view(), betaw.view())?
767        };
768        if dq_dq0.len() != n {
769            return Err(GamlssError::DimensionMismatch {
770                reason: format!(
771                    "BinomialMeanWiggleFamily dq/dq0 length mismatch: got {}, expected {}",
772                    dq_dq0.len(),
773                    n
774                ),
775            }
776            .into());
777        }
778
779        let mut ll = 0.0;
780        let mut z_eta = Array1::<f64>::zeros(n);
781        let mut w_eta = Array1::<f64>::zeros(n);
782        let mut z_wiggle = Array1::<f64>::zeros(n);
783        let mut w_wiggle = Array1::<f64>::zeros(n);
784        for i in 0..n {
785            let q = eta[i] + etaw[i];
786            let (mu_q, d1_q) = inverse_link_mu_d1_for_inverse_link(&self.link_kind, q)
787                .map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
788            let yi = self.y[i];
789            let wi = self.weights[i];
790            ll += binomial_location_scale_log_likelihood(yi, wi, q, &self.link_kind, mu_q)?;
791
792            let mu = mu_q.clamp(1e-12, 1.0 - 1e-12);
793            let var = (mu * (1.0 - mu)).max(MIN_PROB);
794            let dmu_deta = d1_q * dq_dq0[i];
795            let dmu_dw = d1_q;
796            if wi == 0.0 || !var.is_finite() {
797                z_eta[i] = eta[i];
798                z_wiggle[i] = etaw[i];
799                continue;
800            }
801
802            if dmu_deta.is_finite() {
803                w_eta[i] = floor_positiveweight(wi * (dmu_deta * dmu_deta / var), MIN_WEIGHT);
804                z_eta[i] = eta[i] + (yi - mu) / signedwith_floor(dmu_deta, MIN_DERIV);
805            } else {
806                z_eta[i] = eta[i];
807            }
808
809            if dmu_dw.is_finite() {
810                w_wiggle[i] = floor_positiveweight(wi * (dmu_dw * dmu_dw / var), MIN_WEIGHT);
811                z_wiggle[i] = etaw[i] + (yi - mu) / signedwith_floor(dmu_dw, MIN_DERIV);
812            } else {
813                z_wiggle[i] = etaw[i];
814            }
815        }
816
817        Ok(FamilyEvaluation {
818            log_likelihood: ll,
819            blockworking_sets: vec![
820                BlockWorkingSet::diagonal_checked(z_eta, w_eta)?,
821                BlockWorkingSet::diagonal_checked(z_wiggle, w_wiggle)?,
822            ],
823        })
824    }
825
826    fn block_geometry(
827        &self,
828        block_states: &[ParameterBlockState],
829        spec: &ParameterBlockSpec,
830    ) -> Result<(DesignMatrix, Array1<f64>), String> {
831        if spec.name != "wiggle" {
832            return Ok((spec.design.clone(), spec.offset.clone()));
833        }
834        if block_states.is_empty() {
835            return Err(GamlssError::UnsupportedConfiguration {
836                reason: "wiggle geometry requires eta block".to_string(),
837            }
838            .into());
839        }
840        let eta = &block_states[Self::BLOCK_ETA].eta;
841        if eta.len() != self.y.len() {
842            return Err(GamlssError::DimensionMismatch {
843                reason: "BinomialMeanWiggleFamily eta size mismatch".to_string(),
844            }
845            .into());
846        }
847        // Frozen-basis (#1596): return the pinned, identifiable warp design
848        // `B̃ = B(η̂)·Z` rather than the live `B(η)`. `B̃` is constant across
849        // inner cycles, so the engine rebuilds the *same* matrix every cycle —
850        // the death-spiral source (a basis that moves under the line search) is
851        // gone, while the dynamic-geometry plumbing is preserved unchanged.
852        let x = match self.frozen_warp_design.as_ref() {
853            Some(frozen) => frozen.as_ref().clone(),
854            None => self.wiggle_design(eta.view())?,
855        };
856        if x.ncols() != spec.design.ncols() {
857            return Err(GamlssError::DimensionMismatch {
858                reason: format!(
859                    "dynamic wiggle design col mismatch: got {}, expected {}",
860                    x.ncols(),
861                    spec.design.ncols()
862                ),
863            }
864            .into());
865        }
866        let nrows = x.nrows();
867        Ok((
868            DesignMatrix::Dense(gam_linalg::matrix::DenseDesignMatrix::from(x)),
869            Array1::zeros(nrows),
870        ))
871    }
872
873    fn block_geometry_is_dynamic(&self) -> bool {
874        true
875    }
876
877    fn exact_newton_joint_hessian_workspace(
878        &self,
879        block_states: &[ParameterBlockState],
880        specs: &[ParameterBlockSpec],
881    ) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
882        let x_eta = self.dense_eta_design_fromspecs(specs)?.into_owned();
883        let workspace =
884            BinomialMeanWiggleHessianWorkspace::new(self.clone(), block_states.to_vec(), x_eta)?;
885        Ok(Some(Arc::new(workspace)))
886    }
887
888    fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
889        self.dense_eta_design_fromspecs(specs).is_ok()
890    }
891
892    fn exact_newton_joint_hessian_with_specs(
893        &self,
894        block_states: &[ParameterBlockState],
895        specs: &[ParameterBlockSpec],
896    ) -> Result<Option<Array2<f64>>, String> {
897        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
898        let x_eta = self.dense_eta_design_fromspecs(specs)?;
899        let eta = &block_states[Self::BLOCK_ETA].eta;
900        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
901        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
902        let n = self.y.len();
903        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
904            return Err(GamlssError::DimensionMismatch {
905                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
906            }
907            .into());
908        }
909        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
910        let p_eta = x_eta.ncols();
911        let pw = geom.basis.ncols();
912        let mut coeff_eta = Array1::<f64>::zeros(n);
913        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
914        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
915        let mut coeff_ww = Array1::<f64>::zeros(n);
916        for row in 0..n {
917            let q = eta[row] + etaw[row];
918            let (m1, m2, _) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
919            let a = geom.dq_dq0[row];
920            let b = geom.d2q_dq02[row];
921            coeff_eta[row] = hessian_coeff_fromobjective_q_terms(m1, m2, a, a, b);
922            coeff_etaw_b[row] = m2 * a;
923            coeff_etaw_d1[row] = m1;
924            coeff_ww[row] = m2;
925        }
926        let h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
927        let h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
928            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?;
929        let h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww)?;
930        assert_eq!(h_eta_eta.nrows(), p_eta);
931        assert_eq!(h_ww.nrows(), pw);
932        Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
933            &h_eta_eta, &h_eta_w, &h_ww,
934        )))
935    }
936
937    fn exact_newton_joint_hessian_directional_derivative_with_specs(
938        &self,
939        block_states: &[ParameterBlockState],
940        specs: &[ParameterBlockSpec],
941        d_beta_flat: &Array1<f64>,
942    ) -> Result<Option<Array2<f64>>, String> {
943        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
944        let x_eta = self.dense_eta_design_fromspecs(specs)?;
945        let eta = &block_states[Self::BLOCK_ETA].eta;
946        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
947        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
948        let n = self.y.len();
949        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
950            return Err(GamlssError::DimensionMismatch {
951                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
952            }
953            .into());
954        }
955        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
956        let p_eta = x_eta.ncols();
957        let pw = geom.basis.ncols();
958        if d_beta_flat.len() != p_eta + pw {
959            return Err(GamlssError::DimensionMismatch {
960                reason: format!(
961                    "BinomialMeanWiggleFamily joint d_beta length mismatch: got {}, expected {}",
962                    d_beta_flat.len(),
963                    p_eta + pw
964                ),
965            }
966            .into());
967        }
968        let u_eta = d_beta_flat.slice(s![0..p_eta]).to_owned();
969        let uw = d_beta_flat.slice(s![p_eta..p_eta + pw]).to_owned();
970        let xi = x_eta.dot(&u_eta);
971        let phi = geom.basis.dot(&uw);
972        let basis1_u = geom.basis_d1.dot(&uw);
973        let basis2_u = geom.basis_d2.dot(&uw);
974
975        let mut coeff_eta = Array1::<f64>::zeros(n);
976        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
977        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
978        let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
979        let mut coeff_ww_bb = Array1::<f64>::zeros(n);
980        let mut coeff_ww_db = Array1::<f64>::zeros(n);
981        for row in 0..n {
982            let q = eta[row] + etaw[row];
983            let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
984            let a = geom.dq_dq0[row];
985            let b = geom.d2q_dq02[row];
986            let c = geom.d3q_dq03[row];
987            let q_u = a * xi[row] + phi[row];
988            let a_u = b * xi[row] + basis1_u[row];
989            let b_u = c * xi[row] + basis2_u[row];
990            coeff_eta[row] = directionalhessian_coeff_fromobjective_q_terms(
991                m1, m2, m3, q_u, a, a, b, a_u, a_u, b_u,
992            );
993            coeff_etaw_b[row] = m3 * q_u * a + m2 * a_u;
994            coeff_etaw_d1[row] = m2 * (a * xi[row] + q_u);
995            coeff_etaw_d2[row] = m1 * xi[row];
996            coeff_ww_bb[row] = m3 * q_u;
997            coeff_ww_db[row] = m2 * xi[row];
998        }
999
1000        let d_h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
1001        let d_h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
1002            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?
1003            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d2, &geom.basis_d2)?;
1004        let a_ww = xt_diag_y_dense(&geom.basis_d1, &coeff_ww_db, &geom.basis)?;
1005        let d_h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww_bb)? + &a_ww + a_ww.t();
1006        Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
1007            &d_h_eta_eta,
1008            &d_h_eta_w,
1009            &d_h_ww,
1010        )))
1011    }
1012
1013    /// Exact second-order directional derivative D²H[u,v] of the joint Hessian
1014    /// for the BinomialMeanWiggle two-block model (eta, wiggle).
1015    ///
1016    /// # Mathematical derivation
1017    ///
1018    /// The negative log-likelihood Hessian element for indices (a, b) in the
1019    /// joint coefficient vector is:
1020    ///
1021    ///   H_ab = m2 * q_a * q_b + m1 * q_ab
1022    ///
1023    /// where m_k = d^k F / dq^k (k-th derivative of the negative log-likelihood
1024    /// w.r.t. the effective predictor q), q_a = dq/d(beta_a), and q_ab =
1025    /// d²q/(d(beta_a) d(beta_b)).
1026    ///
1027    /// The effective predictor is q = q0 + w(q0) where q0 = X_eta * beta_eta
1028    /// and w(q0) = B(q0) * beta_w is the link wiggle.  Write:
1029    ///   a = dq/dq0 = 1 + B'·beta_w       (geometry first derivative)
1030    ///   b = d²q/dq0² = B''·beta_w         (geometry second derivative)
1031    ///   c = d³q/dq0³ = B'''·beta_w        (geometry third derivative)
1032    ///   d = d⁴q/dq0⁴ = B''''·beta_w       (geometry fourth derivative)
1033    ///
1034    /// For a perturbation direction u = (u_eta, u_w), the chain-rule
1035    /// perturbations are:
1036    ///   q_u   = a·xi_u + phi_u             (first-order predictor perturbation)
1037    ///   a_u   = b·xi_u + basis1_u          (perturbation of geometry factor a)
1038    ///   b_u   = c·xi_u + basis2_u          (perturbation of geometry factor b)
1039    ///   c_u   = d·xi_u + basis3_u          (perturbation of geometry factor c)
1040    ///
1041    /// where xi_u = X_eta·u_eta, phi_u = B·u_w, basis_k_u = B^(k)·u_w.
1042    ///
1043    /// Mixed second-order perturbations (u,v) are:
1044    ///   q_uv  = b·xi_u·xi_v + basis1_u·xi_v + basis1_v·xi_u
1045    ///   a_uv  = c·xi_u·xi_v + basis2_u·xi_v + basis2_v·xi_u
1046    ///   b_uv  = d·xi_u·xi_v + basis3_u·xi_v + basis3_v·xi_u
1047    ///
1048    /// ## Block decomposition
1049    ///
1050    /// **eta-eta block** (X_eta' diag(coeff) X_eta):
1051    ///   The Hessian element for eta indices (i,j) factors as
1052    ///     H(eta_i, eta_j) = [m2·a² + m1·b] · x_eta(i)·x_eta(j)
1053    ///   so D²H_eta_eta[u,v] = X_eta' diag(coeff_eta) X_eta
1054    ///   where coeff_eta uses `second_directionalhessian_coeff_fromobjective_q_terms`
1055    ///   with q_a=a, q_b=a, q_ab=b and their chain-rule perturbations.
1056    ///
1057    /// **eta-w block** (X_eta' diag(...) [B, B', B'', B''']):
1058    ///   The static Hessian is:
1059    ///     H(eta_i, w_j) = (m2·a)·x_eta(i)·B_j + m1·x_eta(i)·B'_j
1060    ///   Taking D²[u,v] requires differentiating both the scalar coefficients
1061    ///   (m2·a, m1) and the basis matrices (B, B' depend on q0 via the chain
1062    ///   rule dB_j/du = B'_j·xi_u).  The full product rule gives four basis-matrix
1063    ///   tiers: B, B', B'', B'''.
1064    ///
1065    /// **w-w block** (B' diag(...) B, etc.):
1066    ///   The static Hessian is H(w_i, w_j) = m2·B_i·B_j.
1067    ///   D²[u,v] expands via the product rule on m2, B_i, B_j, each of which
1068    ///   depends on beta through q and q0.  This gives terms involving
1069    ///   B·B, B'·B, B'·B', and B''·B (all symmetrised).
1070    fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
1071        &self,
1072        block_states: &[ParameterBlockState],
1073        specs: &[ParameterBlockSpec],
1074        d_beta_u_flat: &Array1<f64>,
1075        d_beta_v_flat: &Array1<f64>,
1076    ) -> Result<Option<Array2<f64>>, String> {
1077        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
1078        let x_eta = self.dense_eta_design_fromspecs(specs)?;
1079        let eta = &block_states[Self::BLOCK_ETA].eta;
1080        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1081        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1082        let n = self.y.len();
1083        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
1084            return Err(GamlssError::DimensionMismatch {
1085                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
1086            }
1087            .into());
1088        }
1089        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
1090        let p_eta = x_eta.ncols();
1091        let pw = geom.basis.ncols();
1092        let total = p_eta + pw;
1093        if d_beta_u_flat.len() != total || d_beta_v_flat.len() != total {
1094            return Err(GamlssError::DimensionMismatch { reason: format!(
1095                "BinomialMeanWiggleFamily joint second d_beta length mismatch: got {} and {}, expected {}",
1096                d_beta_u_flat.len(),
1097                d_beta_v_flat.len(),
1098                total
1099            ) }.into());
1100        }
1101
1102        // Split directions into eta and wiggle components.
1103        let u_eta = d_beta_u_flat.slice(s![0..p_eta]).to_owned();
1104        let v_eta = d_beta_v_flat.slice(s![0..p_eta]).to_owned();
1105        let uw = d_beta_u_flat.slice(s![p_eta..total]).to_owned();
1106        let vw = d_beta_v_flat.slice(s![p_eta..total]).to_owned();
1107
1108        // Per-row linear-predictor perturbations from each direction.
1109        let xi_u = x_eta.dot(&u_eta); // eta perturbation in direction u
1110        let xi_v = x_eta.dot(&v_eta); // eta perturbation in direction v
1111        let phi_u = geom.basis.dot(&uw); // direct wiggle basis, direction u
1112        let phi_v = geom.basis.dot(&vw); // direct wiggle basis, direction v
1113        let b1u = geom.basis_d1.dot(&uw); // first-derivative basis, direction u
1114        let b1v = geom.basis_d1.dot(&vw);
1115        let b2u = geom.basis_d2.dot(&uw); // second-derivative basis, direction u
1116        let b2v = geom.basis_d2.dot(&vw);
1117        let b3u = geom.basis_d3.dot(&uw); // third-derivative basis, direction u
1118        let b3v = geom.basis_d3.dot(&vw);
1119
1120        // Per-row chain-rule perturbations of q, a = dq/dq0, b = d²q/dq0²:
1121        //   q_u = a·xi_u + phi_u
1122        //   a_u = b·xi_u + basis1_u
1123        //   b_u = c·xi_u + basis2_u
1124        //   c_u = d·xi_u + basis3_u
1125        // Mixed second-order perturbations:
1126        //   q_uv = b·xi_u·xi_v + basis1_u·xi_v + basis1_v·xi_u
1127        //   a_uv = c·xi_u·xi_v + basis2_u·xi_v + basis2_v·xi_u
1128        //   b_uv = d·xi_u·xi_v + basis3_u·xi_v + basis3_v·xi_u
1129
1130        // Scaled basis matrices for the cross-product terms in the w-w and eta-w
1131        // blocks (same pattern as GaussianLocationScaleWiggleFamily).
1132        let basis_u = scale_matrix_rows(&geom.basis_d1, &xi_u)?; // dB/du = B'·xi_u
1133        let basis_v = scale_matrix_rows(&geom.basis_d1, &xi_v)?; // dB/dv = B'·xi_v
1134        let basis_uv = scale_matrix_rows(&geom.basis_d2, &(&xi_u * &xi_v))?; // d²B/dudv = B''·xi_u·xi_v
1135        // Per-row coefficient arrays for assembling the block-matrix products.
1136        let mut coeff_eta = Array1::<f64>::zeros(n);
1137
1138        // Coefficients for the eta-w block: X_eta' diag(c_*) M where M ∈ {B, B', B'', B'''}
1139        //
1140        // The static cross-Hessian is:
1141        //   H(eta_i, w_j) = (m2·a)·x_i·B_j + m1·x_i·B'_j
1142        // where B_j and B'_j are row evaluations of basis column j.
1143        //
1144        // Write C_B = m2·a (scalar coefficient multiplying B in the cross block)
1145        // and   C_B1 = m1  (scalar coefficient multiplying B' in the cross block).
1146        //
1147        // Product rule on C_B·B:
1148        //   d(C_B·B)/du = (dC_B/du)·B + C_B·B'·xi_u
1149        //   d²(C_B·B)/dudv = (d²C_B/dudv)·B + (dC_B/du)·B'·xi_v
1150        //                   + (dC_B/dv)·B'·xi_u + C_B·B''·xi_u·xi_v
1151        //
1152        // Product rule on C_B1·B':
1153        //   d²(C_B1·B')/dudv = (d²C_B1/dudv)·B' + (dC_B1/du)·B''·xi_v
1154        //                     + (dC_B1/dv)·B''·xi_u + C_B1·B'''·xi_u·xi_v
1155        //
1156        // Derivatives of the scalar coefficients:
1157        //   C_B  = m2·a
1158        //   dC_B/du  = m3·q_u·a + m2·a_u
1159        //   dC_B/dv  = m3·q_v·a + m2·a_v
1160        //   d²C_B/dudv = m4·q_u·q_v·a + m3·(q_uv·a + q_u·a_v + q_v·a_u) + m2·a_uv
1161        //
1162        //   C_B1 = m1
1163        //   dC_B1/du = m2·q_u
1164        //   dC_B1/dv = m2·q_v
1165        //   d²C_B1/dudv = m3·q_u·q_v + m2·q_uv
1166        //
1167        // Grouping by basis-matrix tier:
1168        //   B:   d²C_B/dudv
1169        //   B':  (dC_B/du)·xi_v + (dC_B/dv)·xi_u + d²C_B1/dudv
1170        //   B'': C_B·xi_u·xi_v + (dC_B1/du)·xi_v + (dC_B1/dv)·xi_u
1171        //   B''': C_B1·xi_u·xi_v
1172        let mut coeff_etaw_b = Array1::<f64>::zeros(n);
1173        let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
1174        let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
1175        let mut coeff_etaw_d3 = Array1::<f64>::zeros(n);
1176
1177        // Coefficients for the w-w block.
1178        //
1179        // The static w-w Hessian is:
1180        //   H(w_i, w_j) = m2·B_i·B_j
1181        //
1182        // Note: there is no m1·q_ij term because d²q/(d(beta_w_i) d(beta_w_j)) = 0
1183        // (the basis vectors B_i enter q linearly in beta_w).
1184        //
1185        // Product rule on m2·B_i·B_j, treating each factor as depending on beta:
1186        //   d²(m2·B_i·B_j)/dudv
1187        //     = (d²m2/dudv)·B_i·B_j                        → B'diag B  (symmetrised)
1188        //     + (dm2/du)·(B'_i·xi_v·B_j + B_i·B'_j·xi_v)  → dw_u terms
1189        //     + (dm2/dv)·(B'_i·xi_u·B_j + B_i·B'_j·xi_u)  → dw_v terms
1190        //     + m2·(B''_i·xi_u·xi_v·B_j + B'_i·xi_u·B'_j·xi_v
1191        //          + B'_i·xi_v·B'_j·xi_u + B_i·B''_j·xi_u·xi_v)
1192        //
1193        // where dm2/du = m3·q_u, dm2/dv = m3·q_v, d²m2/dudv = m4·q_u·q_v + m3·q_uv.
1194        //
1195        // Following the Gaussian LS wiggle pattern, we express this via:
1196        //   xt_diag_x_dense(B, dw_uv)                    — coeff: d²m2
1197        //   xt_diag_y_dense(basis_u, dw_v, B) + transpose — dB/du weighted by dm2/dv
1198        //   xt_diag_y_dense(basis_v, dw_u, B) + transpose — dB/dv weighted by dm2/du
1199        //   xt_diag_y_dense(basis_uv, w, B) + transpose   — d²B/dudv weighted by m2
1200        //   xt_diag_y_dense(basis_u, w, basis_v) + transpose — dB/du·dB/dv weighted by m2
1201        let mut dw = Array1::<f64>::zeros(n);
1202        let mut dw_u = Array1::<f64>::zeros(n);
1203        let mut dw_v = Array1::<f64>::zeros(n);
1204        let mut dw_uv = Array1::<f64>::zeros(n);
1205
1206        for row in 0..n {
1207            let q = eta[row] + etaw[row];
1208            let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
1209            let m4 = self.neglog_q_fourth_derivative(self.y[row], self.weights[row], q)?;
1210            let a = geom.dq_dq0[row];
1211            let b = geom.d2q_dq02[row];
1212            let c = geom.d3q_dq03[row];
1213            let d = geom.d4q_dq04[row];
1214
1215            // Chain-rule perturbations in direction u.
1216            let q_u = a * xi_u[row] + phi_u[row];
1217            let a_u = b * xi_u[row] + b1u[row];
1218            let b_u = c * xi_u[row] + b2u[row];
1219
1220            // Chain-rule perturbations in direction v.
1221            let q_v = a * xi_v[row] + phi_v[row];
1222            let a_v = b * xi_v[row] + b1v[row];
1223            let b_v = c * xi_v[row] + b2v[row];
1224
1225            // Mixed second-order perturbations.
1226            let q_uv = b * xi_u[row] * xi_v[row] + b1u[row] * xi_v[row] + b1v[row] * xi_u[row];
1227            let a_uv = c * xi_u[row] * xi_v[row] + b2u[row] * xi_v[row] + b2v[row] * xi_u[row];
1228            let b_uv = d * xi_u[row] * xi_v[row] + b3u[row] * xi_v[row] + b3v[row] * xi_u[row];
1229
1230            // ── eta-eta block ──
1231            // H(eta_i, eta_j) uses q_a = a, q_b = a, q_ab = b (absorbing x_eta
1232            // into the matrix product).  The perturbations of these geometric
1233            // quantities are: dq_a/du = a_u, dq_b/du = a_u (since q_a = q_b = a),
1234            // dq_ab/du = b_u (since q_ab = b), and analogously for v.
1235            coeff_eta[row] = second_directionalhessian_coeff_fromobjective_q_terms(
1236                m1, m2, m3, m4, q_u, q_v, q_uv, a, a, b, // q_a, q_b, q_ab
1237                a_u, a_v, // dq_a_u, dq_a_v
1238                a_u, a_v, // dq_b_u, dq_b_v  (q_b = a so same perturbation)
1239                a_uv, a_uv, // d2q_a_uv, d2q_b_uv
1240                b_u, b_v,  // dq_ab_u, dq_ab_v  (q_ab = b)
1241                b_uv, // d2q_ab_uv
1242            );
1243
1244            // ── eta-w block coefficients ──
1245            // See the derivation in the docstring above.  We group by which basis
1246            // matrix tier (B, B', B'', B''') the coefficient multiplies.
1247
1248            // d²(m2·a)/dudv
1249            let d2_c_b = m4 * q_u * q_v * a + m3 * (q_uv * a + q_u * a_v + q_v * a_u) + m2 * a_uv;
1250            // d(m2·a)/du and d(m2·a)/dv
1251            let dc_b_u = m3 * q_u * a + m2 * a_u;
1252            let dc_b_v = m3 * q_v * a + m2 * a_v;
1253            // m2·a (static coefficient for B in the cross block)
1254            let c_b_static = m2 * a;
1255            // d²(m1)/dudv
1256            let d2_c_b1 = m3 * q_u * q_v + m2 * q_uv;
1257            // d(m1)/du and d(m1)/dv
1258            let dc_b1_u = m2 * q_u;
1259            let dc_b1_v = m2 * q_v;
1260
1261            coeff_etaw_b[row] = d2_c_b;
1262            coeff_etaw_d1[row] = dc_b_u * xi_v[row] + dc_b_v * xi_u[row] + d2_c_b1;
1263            coeff_etaw_d2[row] =
1264                c_b_static * xi_u[row] * xi_v[row] + dc_b1_u * xi_v[row] + dc_b1_v * xi_u[row];
1265            coeff_etaw_d3[row] = m1 * xi_u[row] * xi_v[row];
1266
1267            // ── w-w block coefficients ──
1268            // The w-w static Hessian coefficient is m2 (for B'diag B).
1269            dw[row] = m2;
1270            dw_u[row] = m3 * q_u;
1271            dw_v[row] = m3 * q_v;
1272            dw_uv[row] = m4 * q_u * q_v + m3 * q_uv;
1273        }
1274
1275        // ── Assemble eta-eta block ──
1276        let d2_h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
1277
1278        // ── Assemble eta-w block ──
1279        // The second-order directional derivative of the cross block H_eta_w is:
1280        //   d²H_eta_w[u,v] = X_eta' diag(coeff_etaw_b)  B
1281        //                   + X_eta' diag(coeff_etaw_d1) B'
1282        //                   + X_eta' diag(coeff_etaw_d2) B''
1283        //                   + X_eta' diag(coeff_etaw_d3) B'''
1284        let d2_h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
1285            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?
1286            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d2, &geom.basis_d2)?
1287            + &xt_diag_y_dense(&x_eta, &coeff_etaw_d3, &geom.basis_d3)?;
1288
1289        // ── Assemble w-w block ──
1290        // Following the Gaussian LS wiggle pattern (lines 6351-6363), the w-w
1291        // second directional derivative is assembled from scaled basis products:
1292        //
1293        //   d²(m2·B_i·B_j)/dudv decomposition:
1294        //     (d²m2)     · B_i·B_j        → xt_diag_x(B, dw_uv)
1295        //     (dm2/du)   · dB_j/dv · B_i  → xt_diag_y(basis_v, dw_u, B) + transpose
1296        //     (dm2/dv)   · dB_j/du · B_i  → xt_diag_y(basis_u, dw_v, B) + transpose
1297        //     m2 · d²B_j/dudv · B_i       → xt_diag_y(basis_uv, dw, B) + transpose
1298        //     m2 · dB_i/du · dB_j/dv      → xt_diag_y(basis_u, dw, basis_v) + transpose
1299        let a_ab = xt_diag_y_dense(&basis_uv, &dw, &geom.basis)?;
1300        let a_ij = xt_diag_y_dense(&basis_u, &dw, &basis_v)?;
1301        let a_iwj = xt_diag_y_dense(&basis_u, &dw_v, &geom.basis)?;
1302        let a_jwi = xt_diag_y_dense(&basis_v, &dw_u, &geom.basis)?;
1303        let d2_h_ww = &a_ab
1304            + &a_ab.t()
1305            + &a_ij
1306            + a_ij.t()
1307            + &a_iwj
1308            + a_iwj.t()
1309            + &a_jwi
1310            + a_jwi.t()
1311            + &xt_diag_x_dense(&geom.basis, &dw_uv)?;
1312
1313        Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
1314            &d2_h_eta_eta,
1315            &d2_h_eta_w,
1316            &d2_h_ww,
1317        )))
1318    }
1319
1320    fn exact_newton_joint_psi_terms(
1321        &self,
1322        block_states: &[ParameterBlockState],
1323        specs: &[ParameterBlockSpec],
1324        derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
1325        psi_index: usize,
1326    ) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
1327        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
1328        if derivative_blocks.len() != 2 {
1329            return Err(GamlssError::DimensionMismatch { reason: format!(
1330                "BinomialMeanWiggleFamily joint psi terms expect 2 derivative block lists, got {}",
1331                derivative_blocks.len()
1332            ) }.into());
1333        }
1334        let x_eta = self.dense_eta_design_fromspecs(specs)?;
1335        let eta = &block_states[Self::BLOCK_ETA].eta;
1336        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1337        let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
1338        let n = self.y.len();
1339        if eta.len() != n || etaw.len() != n || self.weights.len() != n {
1340            return Err(GamlssError::DimensionMismatch {
1341                reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
1342            }
1343            .into());
1344        }
1345        let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
1346        let p_eta = x_eta.ncols();
1347        let pw = geom.basis.ncols();
1348        let implicit_dir =
1349            self.exact_newton_joint_psi_action(block_states, derivative_blocks, psi_index, p_eta)?;
1350        let dense_dir = if implicit_dir.is_none() {
1351            self.exact_newton_joint_psi_direction(
1352                block_states,
1353                derivative_blocks,
1354                psi_index,
1355                &x_eta,
1356            )?
1357        } else {
1358            None
1359        };
1360        let z_eta_psi = if let Some((_, ref z_eta_psi)) = implicit_dir {
1361            z_eta_psi
1362        } else if let Some(ref dir_a) = dense_dir {
1363            &dir_a.z_eta_psi
1364        } else {
1365            return Ok(None);
1366        };
1367
1368        let mut objective_psi = 0.0;
1369        let mut score_eta_xa = Array1::<f64>::zeros(n);
1370        let mut score_eta_x = Array1::<f64>::zeros(n);
1371        let mut score_w_b = Array1::<f64>::zeros(n);
1372        let mut score_w_d1 = Array1::<f64>::zeros(n);
1373
1374        let mut coeff_eta_eta_xx = Array1::<f64>::zeros(n);
1375        let mut coeff_eta_eta_xa_x = Array1::<f64>::zeros(n);
1376        let mut coeff_eta_w_xa_b = Array1::<f64>::zeros(n);
1377        let mut coeff_eta_w_x_b = Array1::<f64>::zeros(n);
1378        let mut coeff_eta_w_x_d1 = Array1::<f64>::zeros(n);
1379        let mut coeff_eta_w_xa_d1 = Array1::<f64>::zeros(n);
1380        let mut coeff_eta_w_x_d2 = Array1::<f64>::zeros(n);
1381        let mut coeff_ww_bb = Array1::<f64>::zeros(n);
1382        let mut coeff_ww_db = Array1::<f64>::zeros(n);
1383
1384        for row in 0..n {
1385            let q = eta[row] + etaw[row];
1386            let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
1387            let z_a = z_eta_psi[row];
1388            let a = geom.dq_dq0[row];
1389            let b = geom.d2q_dq02[row];
1390            let c = geom.d3q_dq03[row];
1391            let q_a = a * z_a;
1392
1393            objective_psi += m1 * q_a;
1394
1395            score_eta_xa[row] = m1 * a;
1396            score_eta_x[row] = m2 * q_a * a + m1 * b * z_a;
1397            score_w_b[row] = m2 * q_a;
1398            score_w_d1[row] = m1 * z_a;
1399
1400            coeff_eta_eta_xx[row] =
1401                m3 * q_a * a * a + m2 * (2.0 * a * b * z_a + q_a * b) + m1 * c * z_a;
1402            coeff_eta_eta_xa_x[row] = m2 * a * a + m1 * b;
1403            coeff_eta_w_xa_b[row] = m2 * a;
1404            coeff_eta_w_x_b[row] = m3 * q_a * a + m2 * b * z_a;
1405            coeff_eta_w_x_d1[row] = m2 * (a * z_a + q_a);
1406            coeff_eta_w_xa_d1[row] = m1;
1407            coeff_eta_w_x_d2[row] = m1 * z_a;
1408            coeff_ww_bb[row] = m3 * q_a;
1409            coeff_ww_db[row] = m2 * z_a;
1410        }
1411
1412        let score_w = gam_linalg::faer_ndarray::fast_atv(&geom.basis, &score_w_b)
1413            + gam_linalg::faer_ndarray::fast_atv(&geom.basis_d1, &score_w_d1);
1414
1415        if let Some((action, _)) = implicit_dir {
1416            let score_eta = action.transpose_mul(score_eta_xa.view())
1417                + gam_linalg::faer_ndarray::fast_atv(x_eta.as_ref(), &score_eta_x);
1418            let score_psi = binomial_pack_mean_wiggle_joint_score(&score_eta, &score_w);
1419            let x_eta_arc = shared_dense_arc(x_eta.as_ref());
1420            let basis_arc = Arc::new(geom.basis.clone());
1421            let basis_d1_arc = Arc::new(geom.basis_d1.clone());
1422            let basis_d2_arc = Arc::new(geom.basis_d2.clone());
1423            let zeros = Array1::<f64>::zeros(n);
1424            let operator = CustomFamilyJointPsiOperator::new(
1425                p_eta + pw,
1426                vec![
1427                    CustomFamilyJointDesignChannel::new(
1428                        0..p_eta,
1429                        Arc::clone(&x_eta_arc),
1430                        Some(action),
1431                    ),
1432                    CustomFamilyJointDesignChannel::new(
1433                        p_eta..p_eta + pw,
1434                        Arc::clone(&basis_arc),
1435                        None,
1436                    ),
1437                    CustomFamilyJointDesignChannel::new(
1438                        p_eta..p_eta + pw,
1439                        Arc::clone(&basis_d1_arc),
1440                        None,
1441                    ),
1442                    CustomFamilyJointDesignChannel::new(
1443                        p_eta..p_eta + pw,
1444                        Arc::clone(&basis_d2_arc),
1445                        None,
1446                    ),
1447                ],
1448                vec![
1449                    CustomFamilyJointDesignPairContribution::new(
1450                        0,
1451                        0,
1452                        coeff_eta_eta_xa_x.clone(),
1453                        coeff_eta_eta_xx.clone(),
1454                    ),
1455                    CustomFamilyJointDesignPairContribution::new(
1456                        0,
1457                        1,
1458                        coeff_eta_w_xa_b.clone(),
1459                        coeff_eta_w_x_b.clone(),
1460                    ),
1461                    CustomFamilyJointDesignPairContribution::new(
1462                        1,
1463                        0,
1464                        coeff_eta_w_xa_b.clone(),
1465                        coeff_eta_w_x_b.clone(),
1466                    ),
1467                    CustomFamilyJointDesignPairContribution::new(
1468                        0,
1469                        2,
1470                        coeff_eta_w_xa_d1.clone(),
1471                        coeff_eta_w_x_d1.clone(),
1472                    ),
1473                    CustomFamilyJointDesignPairContribution::new(
1474                        2,
1475                        0,
1476                        coeff_eta_w_xa_d1.clone(),
1477                        coeff_eta_w_x_d1.clone(),
1478                    ),
1479                    CustomFamilyJointDesignPairContribution::new(
1480                        0,
1481                        3,
1482                        zeros.clone(),
1483                        coeff_eta_w_x_d2.clone(),
1484                    ),
1485                    CustomFamilyJointDesignPairContribution::new(
1486                        3,
1487                        0,
1488                        zeros.clone(),
1489                        coeff_eta_w_x_d2.clone(),
1490                    ),
1491                    CustomFamilyJointDesignPairContribution::new(
1492                        1,
1493                        1,
1494                        zeros.clone(),
1495                        coeff_ww_bb.clone(),
1496                    ),
1497                    CustomFamilyJointDesignPairContribution::new(
1498                        2,
1499                        1,
1500                        zeros.clone(),
1501                        coeff_ww_db.clone(),
1502                    ),
1503                    CustomFamilyJointDesignPairContribution::new(1, 2, zeros, coeff_ww_db.clone()),
1504                ],
1505            );
1506            return Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
1507                objective_psi,
1508                score_psi,
1509                hessian_psi: Array2::zeros((0, 0)),
1510                hessian_psi_operator: Some(std::sync::Arc::new(operator)),
1511            }));
1512        }
1513
1514        let dir_a =
1515            dense_dir.expect("dense psi direction should exist when implicit direction is absent");
1516        let x_eta_psi = dir_a
1517            .x_eta_psi
1518            .as_ref()
1519            .expect("dense eta psi design should exist when implicit direction is absent");
1520        let score_psi = binomial_pack_mean_wiggle_joint_score(
1521            &(gam_linalg::faer_ndarray::fast_atv(x_eta_psi, &score_eta_xa)
1522                + gam_linalg::faer_ndarray::fast_atv(x_eta.as_ref(), &score_eta_x)),
1523            &score_w,
1524        );
1525        let a_eta_eta = xt_diag_y_dense(x_eta_psi, &coeff_eta_eta_xa_x, &x_eta)?;
1526        let h_eta_eta = &a_eta_eta + &a_eta_eta.t() + &xt_diag_x_dense(&x_eta, &coeff_eta_eta_xx)?;
1527        let h_eta_w = xt_diag_y_dense(x_eta_psi, &coeff_eta_w_xa_b, &geom.basis)?
1528            + &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_b, &geom.basis)?
1529            + &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_d1, &geom.basis_d1)?
1530            + &xt_diag_y_dense(x_eta_psi, &coeff_eta_w_xa_d1, &geom.basis_d1)?
1531            + &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_d2, &geom.basis_d2)?;
1532        let a_ww = xt_diag_y_dense(&geom.basis_d1, &coeff_ww_db, &geom.basis)?;
1533        let h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww_bb)? + &a_ww + a_ww.t();
1534
1535        Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
1536            objective_psi,
1537            score_psi,
1538            hessian_psi: binomial_pack_mean_wiggle_joint_symmetrichessian(
1539                &h_eta_eta, &h_eta_w, &h_ww,
1540            ),
1541            hessian_psi_operator: None,
1542        }))
1543    }
1544}
1545
1546pub(crate) struct BinomialMeanWiggleHessianWorkspace {
1547    pub(crate) family: BinomialMeanWiggleFamily,
1548    pub(crate) block_states: Vec<ParameterBlockState>,
1549    pub(crate) x_eta: Arc<Array2<f64>>,
1550    pub(crate) hessian_operator: Arc<RowCoeffOperator>,
1551}
1552
1553impl BinomialMeanWiggleHessianWorkspace {
1554    pub(crate) fn new(
1555        family: BinomialMeanWiggleFamily,
1556        block_states: Vec<ParameterBlockState>,
1557        x_eta: Array2<f64>,
1558    ) -> Result<Self, String> {
1559        let x_eta = Arc::new(x_eta);
1560        let hessian_operator = family.bmw_static_hessian_operator(&block_states, x_eta.clone())?;
1561        Ok(Self {
1562            family,
1563            block_states,
1564            x_eta,
1565            hessian_operator,
1566        })
1567    }
1568}
1569
1570impl ExactNewtonJointHessianWorkspace for BinomialMeanWiggleHessianWorkspace {
1571    fn hessian_matvec_available(&self) -> bool {
1572        true
1573    }
1574
1575    fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
1576        Ok(Some(gam_problem::HyperOperator::mul_vec(
1577            self.hessian_operator.as_ref(),
1578            v,
1579        )))
1580    }
1581
1582    fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
1583        Ok(None)
1584    }
1585
1586    fn directional_derivative(
1587        &self,
1588        d_beta_flat: &Array1<f64>,
1589    ) -> Result<Option<Array2<f64>>, String> {
1590        Ok(self
1591            .directional_derivative_operator(d_beta_flat)?
1592            .map(|operator| operator.to_dense()))
1593    }
1594
1595    fn directional_derivative_operator(
1596        &self,
1597        d_beta_flat: &Array1<f64>,
1598    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
1599        self.family
1600            .bmw_directional_operator(&self.block_states, self.x_eta.clone(), d_beta_flat)
1601    }
1602
1603    fn second_directional_derivative(
1604        &self,
1605        d_beta_u_flat: &Array1<f64>,
1606        d_beta_v_flat: &Array1<f64>,
1607    ) -> Result<Option<Array2<f64>>, String> {
1608        Ok(self
1609            .second_directional_derivative_operator(d_beta_u_flat, d_beta_v_flat)?
1610            .map(|operator| operator.to_dense()))
1611    }
1612
1613    fn second_directional_derivative_operator(
1614        &self,
1615        d_beta_u: &Array1<f64>,
1616        d_beta_v: &Array1<f64>,
1617    ) -> Result<Option<Arc<dyn gam_problem::HyperOperator>>, String> {
1618        self.family.bmw_second_directional_operator(
1619            &self.block_states,
1620            self.x_eta.clone(),
1621            d_beta_u,
1622            d_beta_v,
1623        )
1624    }
1625}
1626
1627impl CustomFamilyGenerative for BinomialMeanWiggleFamily {
1628    fn generativespec(
1629        &self,
1630        block_states: &[ParameterBlockState],
1631    ) -> Result<GenerativeSpec, String> {
1632        validate_block_count::<GamlssError>("BinomialMeanWiggleFamily", 2, block_states.len())?;
1633        let eta = &block_states[Self::BLOCK_ETA].eta;
1634        let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
1635        if eta.len() != self.y.len() || etaw.len() != self.y.len() {
1636            return Err(GamlssError::DimensionMismatch {
1637                reason: "BinomialMeanWiggleFamily generative size mismatch".to_string(),
1638            }
1639            .into());
1640        }
1641        let mean = gamlss_rowwise_map_result(self.y.len(), |i| {
1642            let jet = inverse_link_jet_for_inverse_link(&self.link_kind, eta[i] + etaw[i])
1643                .map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
1644            Ok(jet.mu)
1645        })?;
1646        Ok(GenerativeSpec {
1647            mean,
1648            noise: NoiseModel::Bernoulli,
1649        })
1650    }
1651}