gam-models 0.3.132

Model families (GAMLSS, survival location-scale, BMS) for the gam penalized-likelihood engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
//! Empirical KKT-refusal report for the survival marginal-slope joint
//! penalized Hessian (gam#979).
//!
//! Background. The residual #979 pathology is an ill-posed survival
//! marginal-slope shape whose joint penalized Hessian
//! `M = Jᵀ H J + S` carries a quadratically-flat near-null direction
//! (tiny `λ_min`, huge condition number, `nullity@tol = 0`). When that
//! direction is left unreduced and handed to the constrained joint Newton,
//! the inner solve cannot certify stationarity, seed screening escalates,
//! and the outer wall-clock deadline becomes load-bearing rather than a
//! pure backstop.
//!
//! The temptation is to simply shrink or delete every near-null direction.
//! That is reward-hacking: a near-null direction can also be a *genuine*
//! non-stationarity (a near-separating direction the model legitimately
//! wants to push), and silently projecting it would hide a real failure to
//! converge. A naive nullspace-shrinkage fix of exactly this kind already
//! broke the `n ≥ 1000` spatial path.
//!
//! This module produces an **empirical, measured** report instead. It
//! assembles `M` and the joint score `g` at the pilot operating point,
//! eigendecomposes `M`, and for every near-null direction `v` measures the
//! gradient residual `r = |vᵀ g|`. A direction is classified as a
//! projectable PHANTOM only when its residual stays at or below the gate
//! `λ · step` (equivalently, its unconstrained Newton step `r/λ` stays
//! within the trust radius `step`): the optimizer would converge that
//! direction in under one step, so projecting it cannot change the optimum
//! and cannot hide non-stationarity. A direction whose residual EXCEEDS the
//! gate is flagged as real non-stationarity and is deliberately left alone.
//!
//! The joint Hessian here is small (`p_total ≲ 30`); the eigendecomposition
//! and gating are deterministic and cheap, so the whole report is a
//! crate-level diagnostic that does not require running the full outer fit.

use faer::Side;
use gam_linalg::faer_ndarray::FaerEigh;
use gam_problem::diagnostics::KktRefusalDiagnosis;
use ndarray::{Array1, Array2, Array3};

use super::identifiability::SurvivalRowHessian;

const K_SURVIVAL: usize = 4;

/// Default coefficient-space trust radius for the phantom gate. A near-null
/// direction is a projectable phantom only when its unconstrained Newton
/// step `r/λ` stays within this radius. It is the convergence step scale of
/// the inner solver: a direction already stationary to within this radius is
/// "converged", so projecting it is identity on the optimum. Deliberately
/// small so the gate is conservative — projecting fewer directions risks no
/// reward-hacking, only a missed cleanup that the deadline still backstops.
pub(crate) const KKT_PHANTOM_TRUST_RADIUS: f64 = 1.0e-3;

/// Relative tolerance applied to `λ_max` when counting eigenvalues as
/// structurally zero (`nullity@tol`) and when flooring the gate so a
/// genuinely zero-curvature direction is still allowed its numerical slop.
pub(crate) const KKT_RANK_REL_TOL: f64 = 1.0e-10;

/// Relative threshold (against `λ_max`) below which an eigenvalue is
/// considered "near-null" and is examined by the gate. Captures the
/// "tiny `λ_min`, huge condition number" regime up to condition `~1e8`.
pub(crate) const KKT_NEAR_NULL_REL_TOL: f64 = 1.0e-8;

/// One near-null eigen-direction of the joint penalized Hessian, with the
/// measured quantities the phantom gate decides on.
#[derive(Clone, Debug)]
pub(crate) struct NearNullDirection {
    /// Eigenvalue `λ` (curvature along this direction).
    pub(crate) eigenvalue: f64,
    /// Measured gradient residual `r = |vᵀ g|` along the eigenvector.
    pub(crate) gradient_residual: f64,
    /// Unconstrained Newton step magnitude `r / max(λ, floor)`.
    pub(crate) newton_step: f64,
    /// Gate threshold `max(λ, rank_floor·λ_max) · step`.
    pub(crate) gate_threshold: f64,
    /// `true` iff `gradient_residual ≤ gate_threshold` — the measured,
    /// non-reward-hacking signal that this direction is a phantom that may be
    /// projected away.
    pub(crate) phantom_projectable: bool,
    /// The eigenvector in full joint-coefficient coordinates.
    pub(crate) eigenvector: Array1<f64>,
}

/// Empirical refusal report on the joint penalized Hessian `M = Jᵀ H J + S`
/// and joint score `g` at a fixed operating point.
#[derive(Clone, Debug)]
pub(crate) struct SurvivalKktRefusalReport {
    /// Eigenvalues of `M`, ascending.
    pub(crate) eigenvalues_ascending: Vec<f64>,
    pub(crate) lambda_min: f64,
    pub(crate) lambda_max: f64,
    /// `λ_max / λ_min` (`+inf` when `λ_min == 0`).
    pub(crate) condition_number: f64,
    /// Count of eigenvalues `≤ KKT_RANK_REL_TOL · λ_max`.
    pub(crate) nullity_at_tol: usize,
    pub(crate) rank_tol: f64,
    /// The coefficient-space trust radius the gate used.
    pub(crate) step: f64,
    /// Every near-null direction, with its gate decision.
    pub(crate) near_null: Vec<NearNullDirection>,
    pub(crate) diagnosis: KktRefusalDiagnosis,
}

impl SurvivalKktRefusalReport {
    /// Number of near-null directions the gate classified as projectable
    /// phantoms (`r ≤ λ·step`).
    pub(crate) fn phantom_projectable_count(&self) -> usize {
        self.near_null
            .iter()
            .filter(|d| d.phantom_projectable)
            .count()
    }

    /// Number of near-null directions that FAILED the gate — measured real
    /// non-stationarity that must NOT be projected away.
    pub(crate) fn real_nonstationary_count(&self) -> usize {
        self.near_null
            .iter()
            .filter(|d| !d.phantom_projectable)
            .count()
    }

    /// `true` iff there is at least one near-null direction and EVERY
    /// near-null direction is a measured projectable phantom. This is the
    /// gate the design-reduction fallback consults before accepting a
    /// channel-collapsing reduction: the collapse is legitimate
    /// identifiability cleanup only when no collapsed direction carries real
    /// non-stationarity.
    pub(crate) fn all_near_null_are_phantom(&self) -> bool {
        !self.near_null.is_empty() && self.real_nonstationary_count() == 0
    }

    /// Compact one-line summary for structured logging. Reads every measured
    /// field so the empirical report is fully surfaced in the production log
    /// (eigenvalue spectrum extent, rank/gate scales, and the carrying
    /// near-null direction's residual / Newton step / eigenvector extent).
    pub(crate) fn summary(&self) -> String {
        let p = self.eigenvalues_ascending.len();
        let carrying = self.near_null.first();
        let (car_lambda, car_resid, car_newton, car_gate, car_vinf, car_phantom) = match carrying {
            Some(d) => (
                d.eigenvalue,
                d.gradient_residual,
                d.newton_step,
                d.gate_threshold,
                d.eigenvector.iter().fold(0.0_f64, |m, &v| m.max(v.abs())),
                d.phantom_projectable,
            ),
            None => (f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::NAN, false),
        };
        format!(
            "p={p} lambda_min={:.4e} lambda_max={:.4e} cond={:.4e} \
             nullity@tol={} rank_tol={:.4e} step={:.4e} near_null={} \
             phantom={} real={} diagnosis={} | carrying: lambda={:.4e} \
             resid={:.4e} newton_step={:.4e} gate={:.4e} v_inf={:.4e} phantom={}",
            self.lambda_min,
            self.lambda_max,
            self.condition_number,
            self.nullity_at_tol,
            self.rank_tol,
            self.step,
            self.near_null.len(),
            self.phantom_projectable_count(),
            self.real_nonstationary_count(),
            self.diagnosis.as_str(),
            car_lambda,
            car_resid,
            car_newton,
            car_gate,
            car_vinf,
            car_phantom,
        )
    }
}

/// The six effective per-row designs that chain the stacked block layouts
/// into the survival primary state `(q0, q1, qd1, g)`: the time columns
/// (`dq0`/`dq1`/`dqd1` driving `q0`/`q1`/`qd1`), the marginal columns (`m_dq`
/// driving `q0` and `q1`, `m_dqd1` driving `qd1`), and the logslope columns
/// (`g_dg` driving `g`). Grouped because every assembly threads the same six
/// matrices together as one effective metric.
#[derive(Clone, Copy)]
pub(crate) struct SurvivalEffectiveDesigns<'a> {
    pub(crate) dq0: &'a Array2<f64>,
    pub(crate) dq1: &'a Array2<f64>,
    pub(crate) dqd1: &'a Array2<f64>,
    pub(crate) m_dq: &'a Array2<f64>,
    pub(crate) m_dqd1: &'a Array2<f64>,
    pub(crate) g_dg: &'a Array2<f64>,
}

/// Per-row pilot operating point: the linearisation primary state
/// `(q0, q1, qd1, g)` paired with the observation columns `(z, weights,
/// event)` the survival row kernel needs to evaluate the score there.
#[derive(Clone, Copy)]
pub(crate) struct SurvivalPilotRows<'a> {
    pub(crate) q0: &'a Array1<f64>,
    pub(crate) q1: &'a Array1<f64>,
    pub(crate) qd1: &'a Array1<f64>,
    pub(crate) g: &'a Array1<f64>,
    pub(crate) z: &'a Array1<f64>,
    pub(crate) weights: &'a Array1<f64>,
    pub(crate) event: &'a Array1<f64>,
}

/// Link-kernel scalars shared by the survival row primary evaluation.
#[derive(Clone, Copy)]
pub(crate) struct SurvivalLinkParams {
    pub(crate) derivative_guard: f64,
    pub(crate) probit_scale: f64,
}

/// Assemble the joint penalized Hessian `M = Σ_i J_iᵀ H_i J_i + S` and the
/// joint score `g = Σ_i J_iᵀ grad_i` at a fixed operating point.
///
/// The per-row effective primary Jacobian `J_i` (`4 × p_total`) chains the
/// stacked block designs into the survival primary state `(q0, q1, qd1, g)`:
/// time columns drive `(q0, q1, qd1)`; marginal columns drive `(q0, q1)`
/// equally and `qd1` via the marginal derivative design; logslope columns
/// drive `g`. This is the same effective metric the full 4×4 row-Hessian
/// identifiability compiler residualises in.
///
/// `row_hess` is the PSD per-row `(n × 4 × 4)` Hessian and `row_grad` the
/// per-row `(n × 4)` gradient, both in `(q0, q1, qd1, g)` order. `s_total`
/// is the unit-weight block-diagonal penalty (its NULLSPACE is what makes a
/// confounded direction unidentifiable for ALL smoothing parameters, so unit
/// weights suffice to expose the phantom).
pub(crate) fn assemble_joint_penalized_hessian_and_score(
    designs: SurvivalEffectiveDesigns<'_>,
    row_hess: &Array3<f64>,
    row_grad: &Array2<f64>,
    s_total: &Array2<f64>,
) -> Result<(Array2<f64>, Array1<f64>), String> {
    let SurvivalEffectiveDesigns {
        dq0,
        dq1,
        dqd1,
        m_dq,
        m_dqd1,
        g_dg,
    } = designs;
    let n = dq0.nrows();
    let p_time = dq0.ncols();
    let p_marg = m_dq.ncols();
    let p_log = g_dg.ncols();
    let p_total = p_time + p_marg + p_log;
    for (name, rows, cols, want_rows, want_cols) in [
        ("dq1", dq1.nrows(), dq1.ncols(), n, p_time),
        ("dqd1", dqd1.nrows(), dqd1.ncols(), n, p_time),
        ("m_dqd1", m_dqd1.nrows(), m_dqd1.ncols(), n, p_marg),
    ] {
        if rows != want_rows || cols != want_cols {
            return Err(format!(
                "kkt_refusal assembly: {name} is {rows}x{cols}, expected {want_rows}x{want_cols}"
            ));
        }
    }
    if row_hess.shape() != [n, K_SURVIVAL, K_SURVIVAL] {
        return Err(format!(
            "kkt_refusal assembly: row_hess is {:?}, expected [{n}, {K_SURVIVAL}, {K_SURVIVAL}]",
            row_hess.shape()
        ));
    }
    if row_grad.shape() != [n, K_SURVIVAL] {
        return Err(format!(
            "kkt_refusal assembly: row_grad is {:?}, expected [{n}, {K_SURVIVAL}]",
            row_grad.shape()
        ));
    }
    if s_total.shape() != [p_total, p_total] {
        return Err(format!(
            "kkt_refusal assembly: s_total is {:?}, expected [{p_total}, {p_total}]",
            s_total.shape()
        ));
    }

    let mut m = s_total.clone();
    let mut g = Array1::<f64>::zeros(p_total);
    // Per-row 4×p_total effective Jacobian, reused across rows.
    let mut j_i = Array2::<f64>::zeros((K_SURVIVAL, p_total));
    let marg_off = p_time;
    let log_off = p_time + p_marg;
    for i in 0..n {
        j_i.fill(0.0);
        for c in 0..p_time {
            j_i[[0, c]] = dq0[[i, c]];
            j_i[[1, c]] = dq1[[i, c]];
            j_i[[2, c]] = dqd1[[i, c]];
        }
        for c in 0..p_marg {
            let gc = marg_off + c;
            j_i[[0, gc]] = m_dq[[i, c]];
            j_i[[1, gc]] = m_dq[[i, c]];
            j_i[[2, gc]] = m_dqd1[[i, c]];
        }
        for c in 0..p_log {
            j_i[[3, log_off + c]] = g_dg[[i, c]];
        }
        // H_i J_i  (4 × p_total)
        let mut hj = Array2::<f64>::zeros((K_SURVIVAL, p_total));
        for a in 0..K_SURVIVAL {
            for col in 0..p_total {
                let mut acc = 0.0;
                for b in 0..K_SURVIVAL {
                    acc += row_hess[[i, a, b]] * j_i[[b, col]];
                }
                hj[[a, col]] = acc;
            }
        }
        // M += J_iᵀ (H_i J_i);  g += J_iᵀ grad_i
        for col in 0..p_total {
            for row in 0..p_total {
                let mut acc = 0.0;
                for a in 0..K_SURVIVAL {
                    acc += j_i[[a, row]] * hj[[a, col]];
                }
                m[[row, col]] += acc;
            }
            let mut gacc = 0.0;
            for a in 0..K_SURVIVAL {
                gacc += j_i[[a, col]] * row_grad[[i, a]];
            }
            g[col] += gacc;
        }
    }
    // Symmetrize M defensively (the accumulation is symmetric up to rounding).
    for row in 0..p_total {
        for col in (row + 1)..p_total {
            let avg = 0.5 * (m[[row, col]] + m[[col, row]]);
            m[[row, col]] = avg;
            m[[col, row]] = avg;
        }
    }
    Ok((m, g))
}

/// Build the empirical refusal report from an already-assembled joint
/// penalized Hessian `M` and joint score `g`, gating each near-null
/// direction at trust radius `step`.
pub(crate) fn build_refusal_report_from_hessian(
    m: &Array2<f64>,
    g: &Array1<f64>,
    step: f64,
) -> Result<SurvivalKktRefusalReport, String> {
    let p = m.nrows();
    if m.ncols() != p {
        return Err(format!("kkt_refusal: M is {}x{}, must be square", p, m.ncols()));
    }
    if g.len() != p {
        return Err(format!("kkt_refusal: g len {} != M dim {p}", g.len()));
    }
    if !step.is_finite() || step <= 0.0 {
        return Err(format!("kkt_refusal: step must be finite and positive, got {step}"));
    }
    let (evals, evecs) = m
        .eigh(Side::Lower)
        .map_err(|e| format!("kkt_refusal: eigendecomposition of M failed: {e:?}"))?;
    // `eigh` yields ascending eigenvalues with eigenvectors in matching
    // columns, but do not rely on that ordering — sort an index permutation.
    let mut order: Vec<usize> = (0..p).collect();
    order.sort_by(|&a, &b| evals[a].partial_cmp(&evals[b]).unwrap_or(std::cmp::Ordering::Equal));
    let eigenvalues_ascending: Vec<f64> = order.iter().map(|&i| evals[i]).collect();
    let lambda_min = eigenvalues_ascending.first().copied().unwrap_or(0.0);
    let lambda_max = eigenvalues_ascending.last().copied().unwrap_or(0.0);
    let condition_number = if lambda_min > 0.0 {
        lambda_max / lambda_min
    } else {
        f64::INFINITY
    };
    let rank_tol = KKT_RANK_REL_TOL * lambda_max.max(0.0);
    let nullity_at_tol = eigenvalues_ascending
        .iter()
        .filter(|&&v| v <= rank_tol)
        .count();
    // Gate floor: a structurally-zero direction is still allowed the numerical
    // residual a rank-floor curvature would produce over one step.
    let gate_floor_curvature = rank_tol;
    let near_null_threshold = KKT_NEAR_NULL_REL_TOL * lambda_max.max(0.0);

    let mut near_null: Vec<NearNullDirection> = Vec::new();
    for &idx in &order {
        let eigenvalue = evals[idx];
        if eigenvalue > near_null_threshold {
            // Ascending order: once above the near-null band, stop.
            break;
        }
        let v = evecs.column(idx);
        let mut r = 0.0;
        for k in 0..p {
            r += v[k] * g[k];
        }
        let gradient_residual = r.abs();
        let gate_curvature = eigenvalue.max(gate_floor_curvature);
        let gate_threshold = gate_curvature * step;
        let newton_step = gradient_residual / gate_curvature;
        let phantom_projectable = gradient_residual <= gate_threshold;
        near_null.push(NearNullDirection {
            eigenvalue,
            gradient_residual,
            newton_step,
            gate_threshold,
            phantom_projectable,
            eigenvector: v.to_owned(),
        });
    }

    // Classify by the H spectrum, matching the shared diagnosis carrier.
    // The per-direction `phantom_projectable` flags carry the actual gate
    // decision; the diagnosis is the spectrum-level label.
    let diagnosis = if nullity_at_tol > 0 {
        KktRefusalDiagnosis::RankDeficientHPen
    } else {
        // nullity@tol == 0 but a near-null band exists with a huge condition
        // number: the #979 "phantom multiplier with well-conditioned H" case.
        KktRefusalDiagnosis::PhantomMultiplierWithWellConditionedH
    };

    Ok(SurvivalKktRefusalReport {
        eigenvalues_ascending,
        lambda_min,
        lambda_max,
        condition_number,
        nullity_at_tol,
        rank_tol,
        step,
        near_null,
        diagnosis,
    })
}

/// End-to-end report: assemble `M`/`g` from the effective designs + per-row
/// derivatives + unit penalty, then gate. Convenience wrapper used by the
/// fit-entry fallback and the unit tests.
pub(crate) fn survival_kkt_refusal_report_from_designs(
    designs: SurvivalEffectiveDesigns<'_>,
    row_hess: &Array3<f64>,
    row_grad: &Array2<f64>,
    s_total: &Array2<f64>,
    step: f64,
) -> Result<SurvivalKktRefusalReport, String> {
    let (m, g) =
        assemble_joint_penalized_hessian_and_score(designs, row_hess, row_grad, s_total)?;
    build_refusal_report_from_hessian(&m, &g, step)
}

/// Per-row gradient `(n × 4)` of the survival neg-log-likelihood at the pilot
/// primary state, in `(q0, q1, qd1, g)` order — the score companion to
/// [`SurvivalRowHessian::from_pilot_primary_state`].
pub(crate) fn survival_row_gradient_from_pilot_primary_state(
    rows: SurvivalPilotRows<'_>,
    link: SurvivalLinkParams,
) -> Result<Array2<f64>, String> {
    let SurvivalPilotRows {
        q0,
        q1,
        qd1,
        g,
        z,
        weights,
        event,
    } = rows;
    let SurvivalLinkParams {
        derivative_guard,
        probit_scale,
    } = link;
    let n = q0.len();
    for (name, len) in [
        ("q1", q1.len()),
        ("qd1", qd1.len()),
        ("g", g.len()),
        ("z", z.len()),
        ("weights", weights.len()),
        ("event", event.len()),
    ] {
        if len != n {
            return Err(format!(
                "survival_row_gradient: length mismatch q0={n}, {name}={len}"
            ));
        }
    }
    let mut out = Array2::<f64>::zeros((n, K_SURVIVAL));
    for i in 0..n {
        let (_, grad, _) = super::row_primary_for_compiler(
            q0[i],
            q1[i],
            qd1[i],
            g[i],
            z[i],
            weights[i],
            event[i],
            derivative_guard,
            probit_scale,
        )?;
        for a in 0..K_SURVIVAL {
            out[[i, a]] = grad[a];
        }
    }
    Ok(out)
}

/// Build the unit-weight block-diagonal total penalty `S` (dimension
/// `p_total = p_time + p_marg + p_log`) from per-block penalty matrices given
/// as full-block dense matrices. Each `*_block` matrix is square at its block
/// width and is embedded at its block offset. Penalties act block-locally, so
/// their nullspaces — the directions no smoothing parameter can ever bound —
/// are exactly the cross-block-confounded directions the report hunts.
pub(crate) fn assemble_unit_block_penalty(
    p_time: usize,
    p_marg: usize,
    p_log: usize,
    time_block: &Array2<f64>,
    marg_block: &Array2<f64>,
    log_block: &Array2<f64>,
) -> Result<Array2<f64>, String> {
    let p_total = p_time + p_marg + p_log;
    let mut s = Array2::<f64>::zeros((p_total, p_total));
    for (name, blk, off, width) in [
        ("time", time_block, 0usize, p_time),
        ("marginal", marg_block, p_time, p_marg),
        ("logslope", log_block, p_time + p_marg, p_log),
    ] {
        if blk.nrows() != width || blk.ncols() != width {
            return Err(format!(
                "assemble_unit_block_penalty: {name} block is {}x{}, expected {width}x{width}",
                blk.nrows(),
                blk.ncols(),
            ));
        }
        for r in 0..width {
            for c in 0..width {
                s[[off + r, off + c]] += blk[[r, c]];
            }
        }
    }
    Ok(s)
}

/// Sum a list of full-block dense penalty matrices (the `TimeBlockInput`
/// penalty representation, `Vec<Array2<f64>>`) into one `width × width`
/// block penalty. Each entry must already be `width × width`.
pub(crate) fn dense_block_penalty_from_dense_list(
    pens: &[Array2<f64>],
    width: usize,
) -> Result<Array2<f64>, String> {
    let mut s = Array2::<f64>::zeros((width, width));
    for (k, p) in pens.iter().enumerate() {
        if p.nrows() != width || p.ncols() != width {
            return Err(format!(
                "dense_block_penalty_from_dense_list: penalty {k} is {}x{}, expected {width}x{width}",
                p.nrows(),
                p.ncols(),
            ));
        }
        s += p;
    }
    Ok(s)
}

/// Embed a list of block-local [`super::BlockwisePenalty`] (the marginal /
/// logslope design penalty representation) into one `width × width` block
/// penalty by summing each `local` at its `col_range` offset.
pub(crate) fn dense_block_penalty_from_blockwise(
    pens: &[super::BlockwisePenalty],
    width: usize,
) -> Result<Array2<f64>, String> {
    let mut s = Array2::<f64>::zeros((width, width));
    for (k, p) in pens.iter().enumerate() {
        let r = p.col_range.clone();
        if r.end > width {
            return Err(format!(
                "dense_block_penalty_from_blockwise: penalty {k} col_range {}..{} exceeds width {width}",
                r.start, r.end,
            ));
        }
        let bl = r.len();
        if p.local.nrows() != bl || p.local.ncols() != bl {
            return Err(format!(
                "dense_block_penalty_from_blockwise: penalty {k} local is {}x{} but col_range width is {bl}",
                p.local.nrows(),
                p.local.ncols(),
            ));
        }
        for i in 0..bl {
            for j in 0..bl {
                s[[r.start + i, r.start + j]] += p.local[[i, j]];
            }
        }
    }
    Ok(s)
}

/// Build the empirical refusal report at a pilot operating point straight from
/// the survival design pieces, the PSD row Hessian, the pilot primary state,
/// and an already-assembled unit block penalty `s_total`. Computes the per-row
/// score internally (the gradient companion to the row Hessian) and gates each
/// near-null direction at trust radius `step`.
pub(crate) fn survival_kkt_refusal_report_at_pilot(
    designs: SurvivalEffectiveDesigns<'_>,
    row_hess: &SurvivalRowHessian,
    rows: SurvivalPilotRows<'_>,
    link: SurvivalLinkParams,
    s_total: &Array2<f64>,
    step: f64,
) -> Result<SurvivalKktRefusalReport, String> {
    use gam_identifiability::families::compiler::RowHessian;
    let row_grad = survival_row_gradient_from_pilot_primary_state(rows, link)?;
    let h_tensor = row_hess.evaluate_full();
    survival_kkt_refusal_report_from_designs(designs, &h_tensor, &row_grad, s_total, step)
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::Array3;

    /// Build a per-row Hessian tensor where every row is the same 4×4 PSD
    /// matrix `h`.
    fn constant_row_hess(n: usize, h: &Array2<f64>) -> Array3<f64> {
        let mut out = Array3::<f64>::zeros((n, K_SURVIVAL, K_SURVIVAL));
        for i in 0..n {
            for a in 0..K_SURVIVAL {
                for b in 0..K_SURVIVAL {
                    out[[i, a, b]] = h[[a, b]];
                }
            }
        }
        out
    }

    /// Rank-1-coupled primary Hessian on the (q1, g) sub-block:
    /// `[[1,1],[1,1]]`. The marginal channel drives q1 and the logslope
    /// channel drives g, so this q1↔g coupling is exactly what makes a
    /// SHARED marginal/logslope basis collinear in the effective metric —
    /// the real #979 confound. The (q1, g) block is PSD (eigenvalues 0, 2)
    /// with null direction `(q1 − g)`, so the joint (marginal − logslope)
    /// direction is flat. With independent bases the same coupling yields a
    /// well-conditioned Gram, so it doubles as the control geometry.
    fn coupled_q1_g_hessian() -> Array2<f64> {
        let mut h = Array2::<f64>::zeros((K_SURVIVAL, K_SURVIVAL));
        h[[1, 1]] = 1.0; // q1
        h[[3, 3]] = 1.0; // g
        h[[1, 3]] = 1.0; // q1↔g coupling
        h[[3, 1]] = 1.0;
        h
    }

    /// A confounded design: the marginal column and the logslope column are
    /// the SAME basis evaluated on the same rows, so in the q1/g effective
    /// metric the combined direction `(marginal, -logslope)` is flat. With
    /// NO independent gradient along it, it is a phantom: the gate must mark
    /// it projectable.
    #[test]
    fn confounded_flat_undriven_direction_is_a_projectable_phantom() {
        let n = 64;
        let p_time = 0;
        let p_marg = 1;
        let p_log = 1;
        // Shared basis values across rows.
        let mut basis = Array2::<f64>::zeros((n, 1));
        for i in 0..n {
            basis[[i, 0]] = (i as f64 / n as f64) - 0.5;
        }
        let dq0 = Array2::<f64>::zeros((n, p_time));
        let dq1 = Array2::<f64>::zeros((n, p_time));
        let dqd1 = Array2::<f64>::zeros((n, p_time));
        let m_dq = basis.clone();
        let m_dqd1 = Array2::<f64>::zeros((n, p_marg));
        let g_dg = basis.clone();
        let row_hess = constant_row_hess(n, &coupled_q1_g_hessian());
        // Gradient driving BOTH channels equally and in the SAME sign as the
        // shared basis, so the score lies along (marginal + logslope) — the
        // RANGE direction — and is orthogonal to the flat (marginal - logslope)
        // null direction. The null direction therefore carries no residual.
        let mut row_grad = Array2::<f64>::zeros((n, K_SURVIVAL));
        for i in 0..n {
            row_grad[[i, 1]] = basis[[i, 0]]; // q1 score
            row_grad[[i, 3]] = basis[[i, 0]]; // g score
        }
        let s_total = Array2::<f64>::zeros((p_marg + p_log, p_marg + p_log));
        let report = survival_kkt_refusal_report_from_designs(
            SurvivalEffectiveDesigns {
                dq0: &dq0,
                dq1: &dq1,
                dqd1: &dqd1,
                m_dq: &m_dq,
                m_dqd1: &m_dqd1,
                g_dg: &g_dg,
            },
            &row_hess,
            &row_grad,
            &s_total,
            KKT_PHANTOM_TRUST_RADIUS,
        )
        .expect("report builds");

        // Confound ⇒ tiny λ_min, huge condition number.
        assert!(
            report.lambda_min <= 1e-9 * report.lambda_max.max(1.0),
            "expected a near-null λ_min, got {:.3e} (λ_max={:.3e})",
            report.lambda_min,
            report.lambda_max
        );
        assert!(
            report.condition_number >= 1e8,
            "expected huge condition number, got {:.3e}",
            report.condition_number
        );
        assert!(
            !report.near_null.is_empty(),
            "expected at least one near-null direction"
        );
        // The flat direction is undriven ⇒ measured phantom ⇒ projectable.
        assert!(
            report.all_near_null_are_phantom(),
            "flat + undriven confound must be a projectable phantom; report: {}",
            report.summary()
        );
    }

    /// Same flat geometry, but now inject a gradient ALONG the flat
    /// (marginal - logslope) direction: a near-separating direction the model
    /// genuinely wants to push. The gate MUST refuse to project it — doing so
    /// would hide real non-stationarity (reward-hacking).
    #[test]
    fn flat_but_driven_direction_is_real_nonstationarity_not_projected() {
        let n = 64;
        let p_marg = 1;
        let p_log = 1;
        let mut basis = Array2::<f64>::zeros((n, 1));
        for i in 0..n {
            basis[[i, 0]] = (i as f64 / n as f64) - 0.5;
        }
        let dq0 = Array2::<f64>::zeros((n, 0));
        let dq1 = Array2::<f64>::zeros((n, 0));
        let dqd1 = Array2::<f64>::zeros((n, 0));
        let m_dq = basis.clone();
        let m_dqd1 = Array2::<f64>::zeros((n, p_marg));
        let g_dg = basis.clone();
        let row_hess = constant_row_hess(n, &coupled_q1_g_hessian());
        // Score along the FLAT direction: marginal channel pushed up, logslope
        // channel pushed down (opposite signs) ⇒ projects onto (marginal -
        // logslope). Large magnitude ⇒ a separating pull.
        let mut row_grad = Array2::<f64>::zeros((n, K_SURVIVAL));
        for i in 0..n {
            row_grad[[i, 1]] = 50.0 * basis[[i, 0]]; // q1 score (marginal up)
            row_grad[[i, 3]] = -50.0 * basis[[i, 0]]; // g score (logslope down)
        }
        let s_total = Array2::<f64>::zeros((p_marg + p_log, p_marg + p_log));
        let report = survival_kkt_refusal_report_from_designs(
            SurvivalEffectiveDesigns {
                dq0: &dq0,
                dq1: &dq1,
                dqd1: &dqd1,
                m_dq: &m_dq,
                m_dqd1: &m_dqd1,
                g_dg: &g_dg,
            },
            &row_hess,
            &row_grad,
            &s_total,
            KKT_PHANTOM_TRUST_RADIUS,
        )
        .expect("report builds");

        assert!(
            !report.near_null.is_empty(),
            "expected a near-null direction"
        );
        // The flat direction is DRIVEN ⇒ NOT a phantom ⇒ must not be projected.
        assert!(
            report.real_nonstationary_count() >= 1,
            "driven flat direction must be flagged real non-stationarity; report: {}",
            report.summary()
        );
        assert!(
            !report.all_near_null_are_phantom(),
            "must refuse to project a driven flat direction; report: {}",
            report.summary()
        );
    }

    /// A well-conditioned, fully-identified problem: independent marginal and
    /// logslope bases, unit curvature. No near-null direction, so there is
    /// nothing to project and `all_near_null_are_phantom` is false (empty).
    #[test]
    fn well_conditioned_problem_has_no_near_null_direction() {
        let n = 64;
        let p_marg = 1;
        let p_log = 1;
        let mut marg = Array2::<f64>::zeros((n, 1));
        let mut logb = Array2::<f64>::zeros((n, 1));
        for i in 0..n {
            let t = i as f64 / n as f64;
            marg[[i, 0]] = t - 0.5;
            logb[[i, 0]] = (2.0 * std::f64::consts::PI * t).sin();
        }
        let dq0 = Array2::<f64>::zeros((n, 0));
        let dq1 = Array2::<f64>::zeros((n, 0));
        let dqd1 = Array2::<f64>::zeros((n, 0));
        let m_dqd1 = Array2::<f64>::zeros((n, p_marg));
        let row_hess = constant_row_hess(n, &coupled_q1_g_hessian());
        let row_grad = Array2::<f64>::zeros((n, K_SURVIVAL));
        let s_total = Array2::<f64>::zeros((p_marg + p_log, p_marg + p_log));
        let report = survival_kkt_refusal_report_from_designs(
            SurvivalEffectiveDesigns {
                dq0: &dq0,
                dq1: &dq1,
                dqd1: &dqd1,
                m_dq: &marg,
                m_dqd1: &m_dqd1,
                g_dg: &logb,
            },
            &row_hess,
            &row_grad,
            &s_total,
            KKT_PHANTOM_TRUST_RADIUS,
        )
        .expect("report builds");
        assert!(
            report.condition_number < 1e6,
            "well-conditioned design must have modest condition number, got {:.3e}",
            report.condition_number
        );
        assert!(
            report.near_null.is_empty(),
            "well-conditioned design must have no near-null direction; report: {}",
            report.summary()
        );
        assert!(!report.all_near_null_are_phantom());
    }

    /// A penalty-null but data-curved direction must NOT register as near-null:
    /// the data curvature identifies it even though the penalty does not. This
    /// guards against re-introducing the broken naive nullspace shrinkage.
    #[test]
    fn penalty_null_but_data_curved_direction_is_not_near_null() {
        let n = 64;
        let p_marg = 1;
        let p_log = 1;
        let mut marg = Array2::<f64>::zeros((n, 1));
        let mut logb = Array2::<f64>::zeros((n, 1));
        for i in 0..n {
            let t = i as f64 / n as f64;
            marg[[i, 0]] = t - 0.5;
            logb[[i, 0]] = (3.0 * t).cos();
        }
        let dq0 = Array2::<f64>::zeros((n, 0));
        let dq1 = Array2::<f64>::zeros((n, 0));
        let dqd1 = Array2::<f64>::zeros((n, 0));
        let m_dqd1 = Array2::<f64>::zeros((n, p_marg));
        let row_hess = constant_row_hess(n, &coupled_q1_g_hessian());
        let row_grad = Array2::<f64>::zeros((n, K_SURVIVAL));
        // Zero penalty everywhere: every direction is penalty-null, yet the
        // data curvature (unit q1/g Hessian on independent bases) identifies
        // both, so none is near-null.
        let s_total = Array2::<f64>::zeros((p_marg + p_log, p_marg + p_log));
        let report = survival_kkt_refusal_report_from_designs(
            SurvivalEffectiveDesigns {
                dq0: &dq0,
                dq1: &dq1,
                dqd1: &dqd1,
                m_dq: &marg,
                m_dqd1: &m_dqd1,
                g_dg: &logb,
            },
            &row_hess,
            &row_grad,
            &s_total,
            KKT_PHANTOM_TRUST_RADIUS,
        )
        .expect("report builds");
        assert!(
            report.near_null.is_empty(),
            "data-identified directions must not be near-null; report: {}",
            report.summary()
        );
    }
}