gam-models 0.3.127

Model families (GAMLSS, survival location-scale, BMS) for the gam penalized-likelihood engine
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
//! Pure scalar objective-derivative coefficient helpers for the binomial
//! location-scale family.
//!
//! Self-contained seam extracted from the gamlss monolith (issue #780): the
//! three closed-form arithmetic kernels that assemble the Hessian coefficient,
//! its first directional derivative, and its mixed second directional
//! derivative from the per-row objective derivative magnitudes
//! `m_k = F^(k)(q)` and the scalar `q`-map derivative terms.
//!
//! ## #932: the outer chain rule is single-sourced
//!
//! Each `coeff` is one entry of the composite `F(q(θ))` where `F` is the scalar
//! per-row objective (its derivative stack `m1..m4 = F^(k)(q)` arrives already
//! single-sourced from the `binomial_loglik_q_tower` Faà-di-Bruno path) and
//! `q(θ)` is the latent-position map carrying its own partials in up to four
//! distinct differentiation directions (`a`, `b`, `u`, `v`).
//!
//! * `hessian_coeff` (`H_ab`, order 2) is the **hot inner-Newton** path and is
//!   kept as the hand-tuned closed form `m2·q_a q_b + m1·q_ab` — the cheapest
//!   possible spelling, evaluated once per row per inner iterate. It is pinned
//!   bit-for-bit against the `Tower2` composition in the oracle_tests test below.
//! * `directionalhessian_coeff` (`D_u H_ab`, order 3) and
//!   `second_directionalhessian_coeff` (`D²_{uv} H_ab`, order 4) are the
//!   **outer-loop cross-block** chains — the exact #736/#947/#948 bug genus
//!   (a dropped or double-counted product-rule term that is invisible until a
//!   new consumer touches the full mixed block). They are now derived
//!   MECHANICALLY: a `Tower4` jet seeded with the `q`-map partials in the four
//!   distinct seed directions, composed with the `F`-derivative stack, and the
//!   single mixed channel `t3[a,b,u]` / `t4[a,b,u,v]` read out. The
//!   Leibniz/Faà-di-Bruno coefficients are produced by the shared partition
//!   walker, not by hand, so no cross term can be silently dropped.
//!
//! The pre-migration hand formulas are retained verbatim as the bit-identity
//! oracle (`mod oracle_tests`) the new tower path is pinned against (the
//! `verify_kernel_channels` discipline — the hand calculus is the witness, the
//! tower is the single source of truth).

use crate::fast_channel::{faa_top2, faa_top3, faa_top4};

#[inline]
pub(crate) fn hessian_coeff_fromobjective_q_terms(
    m1: f64,
    m2: f64,
    q_a: f64,
    q_b: f64,
    q_ab: f64,
) -> f64 {
    // #932 unified source: `H_ab = ∂²(F∘q)/∂a∂b` is the fully-mixed order-2 top
    // channel of the composition, so it IS `fast_channel::faa_top2` — the
    // universal Faà di Bruno partition sum `m₂·q_a q_b + m₁·q_ab`. No
    // per-family chain rule; the primitive is pinned to the runtime partition
    // walker in `fast_channel`, and compiles to the same straight line as the
    // closed form (a packed order-2 dual matched hand asm).
    faa_top2([m1, m2], &[0.0, q_a, q_b, q_ab])
}

#[inline]
pub(crate) fn directionalhessian_coeff_fromobjective_q_terms(
    m1: f64,
    m2: f64,
    m3: f64,
    dq: f64,
    q_a: f64,
    q_b: f64,
    q_ab: f64,
    dq_a: f64,
    dq_b: f64,
    dq_ab: f64,
) -> f64 {
    // #932 unified source: `D_u H_ab = ∂³(F∘q)/∂a∂b∂u` is the fully-mixed
    // order-3 top channel, so it IS `fast_channel::faa_top3` — the universal
    // partition sum over the three distinct directions {a, b, u}. Pack the
    // q-map block partials into the bitmask array (a=1, b=2, u=4) and read the
    // top channel. This is the SAME jet truth as `Tower4<3>::compose.t3[a][b][u]`
    // (pinned in `oracle_tests`) but computes ONLY the read channel as a
    // compile-time-unrolled sum — measured at ~hand instruction count, vs ~19×
    // for the dense tower that materializes the whole 3⁴ tensor.
    let q = [
        0.0, q_a, q_b, q_ab, // _, a, b, ab
        dq, dq_a, dq_b, dq_ab, // u, au, bu, abu
    ];
    faa_top3([m1, m2, m3], &q)
}

#[inline]
pub(crate) fn second_directionalhessian_coeff_fromobjective_q_terms(
    m1: f64,
    m2: f64,
    m3: f64,
    m4: f64,
    dq_u: f64,
    dqv: f64,
    d2q_uv: f64,
    q_a: f64,
    q_b: f64,
    q_ab: f64,
    dq_a_u: f64,
    dq_av: f64,
    dq_b_u: f64,
    dq_bv: f64,
    d2q_a_uv: f64,
    d2q_b_uv: f64,
    dq_ab_u: f64,
    dq_abv: f64,
    d2q_ab_uv: f64,
) -> f64 {
    // #932 unified source: `D²_{uv} H_ab = ∂⁴(F∘q)/∂a∂b∂u∂v` is the fully-mixed
    // order-4 top channel, so it IS `fast_channel::faa_top4` — the universal
    // partition sum over the four distinct directions {a, b, u, v}. Pack the
    // q-map block partials into the bitmask array (a=1, b=2, u=4, v=8) and read
    // the top channel. Same jet truth as `Tower4<4>::compose.t4[a][b][u][v]`
    // (pinned in `oracle_tests`) but computes ONLY that channel as a
    // compile-time-unrolled 15-term sum — measured at ≤ hand instruction count,
    // vs ~19× for the dense tower. The single `dq_u·dqv·q_ab` term an even-older
    // hand path once DOUBLE-counted is one partition (`{u}{v}` over the `q_ab`
    // block) the universal rule emits exactly once.
    let q = [
        0.0, q_a, q_b, q_ab, // _, a, b, ab
        dq_u, dq_a_u, dq_b_u, dq_ab_u, // u, au, bu, abu
        dqv, dq_av, dq_bv, dq_abv, // v, av, bv, abv
        d2q_uv, d2q_a_uv, d2q_b_uv, d2q_ab_uv, // uv, auv, buv, abuv
    ];
    faa_top4([m1, m2, m3, m4], &q)
}

#[cfg(test)]
mod oracle_tests {
    //! #932 single-source oracles: the production coefficients carry the
    //! hand-factored straight-line form (fast — the dense tower is ~19× the
    //! instruction count to read one channel), and these tests pin them
    //! BIT-FOR-BIT against the mechanical `Tower` composition that IS the
    //! single source of truth. The jet is the truth; the hand spelling is the
    //! compiled form. If the hand factorization ever drifts from the jet these
    //! channels disagree.
    use super::*;
    use gam_math::jet_tower::{Tower2, Tower4};

    /// Distinct seed-direction indices for the `q`-map jet: the two
    /// Hessian-block axes `a`/`b` and the two directional axes `u`/`v`.
    const A: usize = 0;
    const B: usize = 1;
    const U: usize = 2;
    const V: usize = 3;

    /// The hand `Tower2`-equivalent for the order-2 Hessian coefficient.
    fn hessian_via_tower(m1: f64, m2: f64, q_a: f64, q_b: f64, q_ab: f64) -> f64 {
        let mut q = Tower2::<2>::zero();
        q.g[0] = q_a;
        q.g[1] = q_b;
        q.h[0][1] = q_ab;
        q.h[1][0] = q_ab;
        q.compose_unary([0.0, m1, m2]).h[0][1]
    }

    /// The SINGLE SOURCE for `D_u H_ab` (order 3): seed a `Tower4<3>` jet for
    /// `q` over the sub-blocks of the three distinct directions {a, b, u},
    /// compose with the F-stack `[·, m1, m2, m3]`, and read the mixed channel
    /// `t3[a][b][u]`. The partition walker supplies every Leibniz coefficient
    /// mechanically — this is what the production hand factorization must equal.
    fn tower_order3(
        m1: f64,
        m2: f64,
        m3: f64,
        dq: f64,
        q_a: f64,
        q_b: f64,
        q_ab: f64,
        dq_a: f64,
        dq_b: f64,
        dq_ab: f64,
    ) -> f64 {
        let mut q = Tower4::<3>::zero();
        q.g[A] = q_a;
        q.g[B] = q_b;
        q.g[U] = dq;
        q.h[A][B] = q_ab;
        q.h[A][U] = dq_a;
        q.h[B][U] = dq_b;
        q.t3[A][B][U] = dq_ab;
        q.compose_unary([0.0, m1, m2, m3, 0.0]).t3[A][B][U]
    }

    /// The SINGLE SOURCE for `D²_{uv} H_ab` (order 4): seed a `Tower4<4>` jet
    /// for `q` over the 15 mixed sub-blocks of the four distinct directions
    /// {a, b, u, v}, compose with `[·, m1, m2, m3, m4]`, and read `t4[a][b][u][v]`.
    fn tower_order4(
        m1: f64,
        m2: f64,
        m3: f64,
        m4: f64,
        dq_u: f64,
        dqv: f64,
        d2q_uv: f64,
        q_a: f64,
        q_b: f64,
        q_ab: f64,
        dq_a_u: f64,
        dq_av: f64,
        dq_b_u: f64,
        dq_bv: f64,
        d2q_a_uv: f64,
        d2q_b_uv: f64,
        dq_ab_u: f64,
        dq_abv: f64,
        d2q_ab_uv: f64,
    ) -> f64 {
        let mut q = Tower4::<4>::zero();
        q.g[A] = q_a;
        q.g[B] = q_b;
        q.g[U] = dq_u;
        q.g[V] = dqv;
        q.h[A][B] = q_ab;
        q.h[A][U] = dq_a_u;
        q.h[A][V] = dq_av;
        q.h[B][U] = dq_b_u;
        q.h[B][V] = dq_bv;
        q.h[U][V] = d2q_uv;
        q.t3[A][B][U] = dq_ab_u;
        q.t3[A][B][V] = dq_abv;
        q.t3[A][U][V] = d2q_a_uv;
        q.t3[B][U][V] = d2q_b_uv;
        q.t4[A][B][U][V] = d2q_ab_uv;
        q.compose_unary([0.0, m1, m2, m3, m4]).t4[A][B][U][V]
    }

    /// A deterministic pseudo-random stream so every channel participates with a
    /// distinct nonzero value (no accidental cancellation hides a dropped term).
    fn stream(seed: u64) -> impl FnMut() -> f64 {
        let mut s = seed;
        move || {
            s = s
                .wrapping_mul(6364136223846793005)
                .wrapping_add(1442695040888963407);
            ((s >> 11) as f64 / (1u64 << 53) as f64) * 2.0 - 1.0
        }
    }

    fn close(label: &str, got: f64, want: f64) {
        let tol = 1e-12 * want.abs().max(1.0);
        assert!(
            (got - want).abs() <= tol,
            "{label}: tower {got:+.17e} vs hand {want:+.17e} (|Δ|={:.3e})",
            (got - want).abs()
        );
    }

    #[test]
    fn hessian_matches_tower() {
        let mut next = stream(0xC0FFEE);
        for _ in 0..200 {
            let (m1, m2) = (next(), next());
            let (q_a, q_b, q_ab) = (next(), next(), next());
            let prod = hessian_coeff_fromobjective_q_terms(m1, m2, q_a, q_b, q_ab);
            let tower = hessian_via_tower(m1, m2, q_a, q_b, q_ab);
            close("H_ab", prod, tower);
        }
    }

    #[test]
    fn directional_matches_jet_single_source() {
        let mut next = stream(0xBEEF);
        for _ in 0..400 {
            let m = [next(), next(), next()];
            let (dq, q_a, q_b, q_ab, dq_a, dq_b, dq_ab) =
                (next(), next(), next(), next(), next(), next(), next());
            // production (hand-factored, fast) vs the Tower4<3> single source.
            let prod = directionalhessian_coeff_fromobjective_q_terms(
                m[0], m[1], m[2], dq, q_a, q_b, q_ab, dq_a, dq_b, dq_ab,
            );
            let tower = tower_order3(m[0], m[1], m[2], dq, q_a, q_b, q_ab, dq_a, dq_b, dq_ab);
            close("D_u H_ab", prod, tower);
        }
    }

    #[test]
    fn second_directional_matches_jet_single_source() {
        let mut next = stream(0xD00D);
        for _ in 0..400 {
            let m = [next(), next(), next(), next()];
            let dq_u = next();
            let dqv = next();
            let d2q_uv = next();
            let q_a = next();
            let q_b = next();
            let q_ab = next();
            let dq_a_u = next();
            let dq_av = next();
            let dq_b_u = next();
            let dq_bv = next();
            let d2q_a_uv = next();
            let d2q_b_uv = next();
            let dq_ab_u = next();
            let dq_abv = next();
            let d2q_ab_uv = next();
            // production (hand-factored, fast) vs the Tower4<4> single source.
            let prod = second_directionalhessian_coeff_fromobjective_q_terms(
                m[0], m[1], m[2], m[3], dq_u, dqv, d2q_uv, q_a, q_b, q_ab, dq_a_u, dq_av, dq_b_u,
                dq_bv, d2q_a_uv, d2q_b_uv, dq_ab_u, dq_abv, d2q_ab_uv,
            );
            let tower = tower_order4(
                m[0], m[1], m[2], m[3], dq_u, dqv, d2q_uv, q_a, q_b, q_ab, dq_a_u, dq_av, dq_b_u,
                dq_bv, d2q_a_uv, d2q_b_uv, dq_ab_u, dq_abv, d2q_ab_uv,
            );
            close("D2_uv H_ab", prod, tower);
        }
    }

    /// #932 βw cross-channel guard. The link-wiggle kernel
    /// (`binomial/wiggle.rs`) does NOT call
    /// `second_directionalhessian_coeff_fromobjective_q_terms` per (row, coeff)
    /// — that would be `O(n · p_w)` order-4 tower evaluations. Instead it
    /// hand-expands the coefficient as a polynomial in each wiggle basis slot
    /// `(b, b′, b″, b‴)` and factors out the per-row scalar coefficients
    /// `alpha_xw_{b,d,dd,d3}` (the `X_t/X_ls ↔ wiggle` cross blocks) and
    /// `c_ww_{bb,bd,bdd,dd_pair}` (the wiggle↔wiggle block), so the operator
    /// applies them as rank structure. Those hand expansions are the exact
    /// #736/#947 bug genus (a dropped or mis-weighted mixed term).
    ///
    /// This test re-derives both expansions from the documented substitutions
    /// and asserts they reproduce the SINGLE-SOURCE coefficient bit-for-bit, so
    /// any future drift in the wiggle expansion that diverges from the jet
    /// truth is caught here rather than in a silently wrong outer Hessian.
    #[test]
    fn betaw_cross_channel_expansions_match_single_source() {
        let mut next = stream(0x5151);
        let mut max_xw = 0.0_f64;
        let mut max_ww = 0.0_f64;
        for _ in 0..2000 {
            let (m1, m2, m3, m4) = (next(), next(), next(), next());
            let (dq_u, dqv, d2q_uv) = (next(), next(), next());
            // full-q x-channel partials (x = t).
            let (q_t, dq_t_u, dq_tv, d2q_t_uv) = (next(), next(), next(), next());
            // base-q0 partials.
            let (q0_t, dq0_u, dq0v, d2q0_uv) = (next(), next(), next(), next());
            let (dq0_t_u, dq0_tv, d2q0_t_uv) = (next(), next(), next());
            // one wiggle basis slot j and a second slot k.
            let (br, dr, ddr, d3r) = (next(), next(), next(), next());

            // X_t ↔ wiggle cross block: a = t, b = w. Substitutions mirror the
            // `binomial/wiggle.rs` comment (qw = b, dqw_u = b′·dq0u, …).
            let qw = br;
            let dqw_u = dr * dq0_u;
            let dqwv = dr * dq0v;
            let d2qw_uv = ddr * dq0_u * dq0v + dr * d2q0_uv;
            let q_tw = dr * q0_t;
            let dq_tw_u = ddr * dq0_u * q0_t + dr * dq0_t_u;
            let dq_twv = ddr * dq0v * q0_t + dr * dq0_tv;
            let d2q_tw_uv = d3r * dq0_u * dq0v * q0_t
                + ddr * (d2q0_uv * q0_t + dq0_u * dq0_tv + dq0v * dq0_t_u)
                + dr * d2q0_t_uv;
            let coeff_tw = second_directionalhessian_coeff_fromobjective_q_terms(
                m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, qw, q_tw, dq_t_u, dq_tv, dqw_u, dqwv,
                d2q_t_uv, d2qw_uv, dq_tw_u, dq_twv, d2q_tw_uv,
            );
            let alpha_b = m4 * dq_u * dqv * q_t
                + m3 * (d2q_uv * q_t + dq_u * dq_tv + dqv * dq_t_u)
                + m2 * d2q_t_uv;
            let alpha_d = m3 * (dq_u * q_t * dq0v + dqv * q_t * dq0_u + dq_u * dqv * q0_t)
                + m2 * (dq_t_u * dq0v
                    + dq_tv * dq0_u
                    + q_t * d2q0_uv
                    + d2q_uv * q0_t
                    + dq_u * dq0_tv
                    + dqv * dq0_t_u)
                + m1 * d2q0_t_uv;
            let alpha_dd = m2 * (q_t * dq0_u * dq0v + dq_u * dq0v * q0_t + dqv * dq0_u * q0_t)
                + m1 * (d2q0_uv * q0_t + dq0_u * dq0_tv + dq0v * dq0_t_u);
            let alpha_d3 = m1 * dq0_u * dq0v * q0_t;
            let recon_xw = alpha_b * br + alpha_d * dr + alpha_dd * ddr + alpha_d3 * d3r;
            max_xw = max_xw.max((coeff_tw - recon_xw).abs() / coeff_tw.abs().max(1.0));

            // wiggle ↔ wiggle block: a = w_j, b = w_k (q linear in each βw, so
            // q_ab = 0 and there is no third/fourth own-channel term).
            let (brk, drk, ddrk) = (next(), next(), next());
            let coeff_ww = second_directionalhessian_coeff_fromobjective_q_terms(
                m1,
                m2,
                m3,
                m4,
                dq_u,
                dqv,
                d2q_uv,
                br,
                brk,
                0.0,
                dr * dq0_u,
                dr * dq0v,
                drk * dq0_u,
                drk * dq0v,
                ddr * dq0_u * dq0v + dr * d2q0_uv,
                ddrk * dq0_u * dq0v + drk * d2q0_uv,
                0.0,
                0.0,
                0.0,
            );
            let c_ww_bb = m4 * dq_u * dqv + m3 * d2q_uv;
            let c_ww_bd = m3 * (dq_u * dq0v + dqv * dq0_u) + m2 * d2q0_uv;
            let c_ww_bdd = m2 * dq0_u * dq0v;
            let c_ww_dd_pair = 2.0 * m2 * dq0_u * dq0v;
            let recon_ww = c_ww_bb * br * brk
                + c_ww_bd * (br * drk + dr * brk)
                + c_ww_bdd * (br * ddrk + ddr * brk)
                + c_ww_dd_pair * dr * drk;
            max_ww = max_ww.max((coeff_ww - recon_ww).abs() / coeff_ww.abs().max(1.0));
        }
        assert!(
            max_xw < 1e-12,
            "alpha_xw drifted from single source: {max_xw:.3e}"
        );
        assert!(
            max_ww < 1e-12,
            "c_ww drifted from single source: {max_ww:.3e}"
        );
    }
}