Skip to main content

gam_solve/inference/
residual_factor.rs

1//! #974 — the structured-residual covariance estimator and the single producer
2//! of [`MetricProvenance::WhitenedStructured`](gam_problem::MetricProvenance::WhitenedStructured).
3//!
4//! # What this estimates
5//!
6//! Given a residual matrix `R ∈ ℝ^{n×p}` (one `p`-dimensional reconstruction
7//! residual per row) and a smooth *activity coordinate* `z ∈ ℝ^n`, this fits the
8//! **structured residual-covariance model**
9//!
10//! ```text
11//!     Cov(r_n) = Σ_n = Λ · c(z_n) · Λᵀ + D ,
12//! ```
13//!
14//! where
15//!
16//! * `Λ ∈ ℝ^{p×r}` is a **low-rank interference factor** (the shared
17//!   off-isotropic subspace the residuals correlate along — e.g. a planted
18//!   interference subspace or a topology-race confound),
19//! * `D = diag(d) ≻ 0` is the **idiosyncratic diagonal** (per-channel
20//!   independent noise), and
21//! * `c(z) > 0` is the **smooth activity-scale law**: a strictly-positive scalar
22//!   that modulates the factor energy with the activity coordinate, recovered as
23//!   a binned-then-smoothed function of `z`.
24//!
25//! The fit is a deterministic, fixed-iteration **alternation** (no clock, no
26//! RNG; any tie is broken by index): it alternates
27//!
28//! 1. *(scale | Λ, D)* — re-estimate the per-row factor activity `c(z_n)` and
29//!    smooth it across `z`, holding the factor model fixed; and
30//! 2. *(Λ, D | scale)* — re-estimate the factor and diagonal from the
31//!    scale-deflated second-moment, holding the activity law fixed,
32//!
33//! a fixed small number of times. The **factor count `r`** is chosen by an
34//! evidence ladder: each candidate `r` is scored by its penalized Gaussian
35//! log-evidence and the best is kept.
36//!
37//! # What it produces
38//!
39//! [`StructuredResidualModel::row_metric`] materializes the **per-row precision
40//! factor** `U_n ∈ ℝ^{p×p}` with `U_n U_nᵀ = Σ_n^{-1}`, packaged as a
41//! [`RowMetric`](gam_problem::RowMetric) with
42//! [`MetricProvenance::WhitenedStructured`](gam_problem::MetricProvenance::WhitenedStructured).
43//! Whitening a residual `r_n` through it (`U_nᵀ r_n`) yields a vector whose
44//! squared Euclidean norm is `r_nᵀ Σ_n^{-1} r_n` — the Mahalanobis residual under
45//! the estimated noise model, which is exactly the likelihood-correct data-fit.
46//! The factor is built from `Σ_n^{-1}` computed in **Woodbury form** (an
47//! `r × r` solve, never a `p × p` inverse), so the estimator scales with the
48//! factor rank, not the dense output dimension.
49//!
50//! This is the first real producer of `WhitenedStructured`, and therefore the
51//! first metric whose `whitens_likelihood()` is `true`: see
52//! [`RowMetric::whitens_likelihood`](gam_problem::RowMetric::whitens_likelihood).
53
54use std::sync::Arc;
55
56use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
57
58use gam_problem::RowMetric;
59use gam_linalg::faer_ndarray::{FaerCholesky, FaerEigh};
60use faer::Side;
61
62/// Number of (scale | factor) ↔ (factor | scale) alternation sweeps. Fixed and
63/// deterministic: the alternation is a smooth descent on the structured-Gaussian
64/// objective and converges geometrically, so a small fixed budget is both
65/// sufficient and reproducible (no clock/RNG-driven stopping).
66const ALTERNATION_SWEEPS: usize = 8;
67
68/// Number of bins the activity coordinate `z` is partitioned into for the smooth
69/// activity-scale `c(z)`. The per-bin factor activity is estimated then linearly
70/// interpolated across bin centers, giving a continuous piecewise-linear scale
71/// law. Chosen as a fixed structural constant (magic-by-default): enough bins to
72/// resolve a smooth monotone or unimodal scale trend without over-fitting the
73/// per-row noise.
74const ACTIVITY_SCALE_BINS: usize = 8;
75
76/// Relative floor on the idiosyncratic diagonal `D`, as a fraction of the mean
77/// residual variance. Keeps `Σ_n ≻ 0` and the Woodbury `r × r` capacitance
78/// invertible even when a channel is (near-)perfectly explained by the factor.
79const DIAGONAL_REL_FLOOR: f64 = 1e-6;
80
81/// Relative floor on the activity scale `c(z)`, as a fraction of its mean. Keeps
82/// `c(z) > 0` (a covariance scale) across the whole `z` range.
83const SCALE_REL_FLOOR: f64 = 1e-4;
84
85/// The fitted structured residual-covariance model: low-rank factor `Λ`,
86/// idiosyncratic diagonal `D`, and the smooth activity-scale `c(z)` evaluated at
87/// every row. Produces per-row precision factors and the
88/// [`MetricProvenance::WhitenedStructured`](gam_problem::MetricProvenance::WhitenedStructured)
89/// [`RowMetric`](gam_problem::RowMetric).
90#[derive(Clone, Debug)]
91pub struct StructuredResidualModel {
92    /// Output dimensionality `p` (residual width).
93    p: usize,
94    /// Selected factor rank `r` (`0 ≤ r ≤ p`). `0` ⇒ pure-diagonal noise model.
95    factor_rank: usize,
96    /// Interference factor `Λ ∈ ℝ^{p×r}` (the shared off-diagonal subspace).
97    lambda: Array2<f64>,
98    /// Idiosyncratic diagonal `d ∈ ℝ^p` (`D = diag(d)`), floored `≻ 0`.
99    diagonal: Array1<f64>,
100    /// Per-row activity scale `c(z_n) > 0`, length `n`.
101    row_scale: Array1<f64>,
102    /// Penalized Gaussian log-evidence of the selected model (higher is better).
103    /// The value the evidence ladder maximized over the candidate ranks.
104    log_evidence: f64,
105}
106
107/// Estimator inputs: the residual matrix and the smooth activity coordinate.
108///
109/// `residuals` is `R ∈ ℝ^{n×p}`. `activity` is `z ∈ ℝ^n` — the coordinate the
110/// scale law `c(z)` is smooth in (e.g. an assignment-mass or activation-strength
111/// summary per row). When no genuine activity coordinate is available, passing a
112/// constant `z` recovers a homoscedastic factor model (`c(z) ≡ const`).
113pub struct ResidualFactorInput<'a> {
114    /// Residual matrix `R ∈ ℝ^{n×p}`.
115    pub residuals: ArrayView2<'a, f64>,
116    /// Activity coordinate `z ∈ ℝ^n` the scale law is smooth in.
117    pub activity: ArrayView1<'a, f64>,
118    /// Maximum factor rank the evidence ladder is allowed to consider. The
119    /// ladder scores `r = 0, 1, …, min(max_factor_rank, p−1)` and keeps the
120    /// penalized-evidence maximizer. `0` forces the pure-diagonal model.
121    pub max_factor_rank: usize,
122}
123
124/// A persistent, evidence-earning residual factor direction — a promotion
125/// candidate for the #2021 Λ nursery→promotion birth channel. Emitted by
126/// [`StructuredResidualModel::promotion_candidates`] when a column of this
127/// pass's `Λ` both (a) aligns with a column of the previous pass's `Λ` (it
128/// *persisted* across the outer alternation) and (b) explains residual energy
129/// above the idiosyncratic-noise floor (it *earns its complexity*). The driver
130/// accumulates persistence across passes (the nursery) and, once a direction
131/// survives long enough, promotes it to a new curved/linear atom seeded by
132/// [`Self::direction`].
133#[derive(Clone, Debug)]
134pub struct FactorPromotion {
135    /// Unit-norm factor direction in output space (`p`-vector): the L2-normalized
136    /// column of `Λ`. This is the decoder direction a promoted atom is born with.
137    pub direction: Array1<f64>,
138    /// Explained residual energy `‖Λ_:,j‖²` (pre-normalization squared column
139    /// norm) — the factor's contribution to `Σ = c·ΛΛᵀ + D`. Candidates are
140    /// returned in descending energy so the driver promotes the strongest first.
141    pub energy: f64,
142    /// `|cos|` alignment (∈ `[0, 1]`) between this direction and the best-matching
143    /// column of the previous pass's `Λ` — the persistence score gating promotion.
144    pub persistence_alignment: f64,
145    /// Index of the best-matching previous-pass `Λ` column (the nursery lineage
146    /// this candidate continues), so the driver can track a stable identity for a
147    /// direction across passes.
148    pub prev_column: usize,
149}
150
151impl StructuredResidualModel {
152    /// Fit the structured residual-covariance model by the deterministic
153    /// fixed-iteration alternation, selecting the factor rank by the evidence
154    /// ladder. Returns an error only on shape / non-finite-input violations; the
155    /// numerical path is total (every floor and solve is guarded).
156    pub fn fit(input: ResidualFactorInput<'_>) -> Result<Self, String> {
157        let r = input.residuals;
158        let z = input.activity;
159        let n = r.nrows();
160        let p = r.ncols();
161        if n == 0 || p == 0 {
162            return Err(format!(
163                "StructuredResidualModel::fit: residuals must be non-empty; got ({n}, {p})"
164            ));
165        }
166        if z.len() != n {
167            return Err(format!(
168                "StructuredResidualModel::fit: activity length {} != residual rows {n}",
169                z.len()
170            ));
171        }
172        if !r.iter().all(|v| v.is_finite()) {
173            return Err("StructuredResidualModel::fit: residuals must be finite".to_string());
174        }
175        if !z.iter().all(|v| v.is_finite()) {
176            return Err("StructuredResidualModel::fit: activity must be finite".to_string());
177        }
178
179        // Bin assignment for the activity-scale law: deterministic equal-width
180        // bins over the observed z-range. A degenerate (zero-width) range maps
181        // every row to bin 0, recovering a single homoscedastic scale.
182        let bins = ACTIVITY_SCALE_BINS.max(1);
183        let z_min = z.iter().copied().fold(f64::INFINITY, f64::min);
184        let z_max = z.iter().copied().fold(f64::NEG_INFINITY, f64::max);
185        let z_span = z_max - z_min;
186        let row_bin: Vec<usize> = (0..n)
187            .map(|i| {
188                if z_span <= 0.0 {
189                    0
190                } else {
191                    let frac = (z[i] - z_min) / z_span;
192                    let idx = (frac * bins as f64).floor() as isize;
193                    idx.clamp(0, bins as isize - 1) as usize
194                }
195            })
196            .collect();
197
198        let max_rank = input.max_factor_rank.min(p.saturating_sub(1));
199
200        // Evidence ladder over candidate factor ranks. Each candidate is fit by
201        // the full alternation and scored by its penalized Gaussian log-evidence;
202        // the maximizer is kept. Index order breaks any tie (lowest rank wins on
203        // an exact tie — Occam).
204        let mut best: Option<StructuredResidualModel> = None;
205        for rank in 0..=max_rank {
206            let model = Self::fit_fixed_rank(r, &row_bin, bins, rank)?;
207            let take = match &best {
208                None => true,
209                Some(b) => model.log_evidence > b.log_evidence,
210            };
211            if take {
212                best = Some(model);
213            }
214        }
215        best.ok_or_else(|| "StructuredResidualModel::fit: evidence ladder empty".to_string())
216    }
217
218    /// Fit the model at a fixed factor rank by the deterministic alternation.
219    fn fit_fixed_rank(
220        r: ArrayView2<'_, f64>,
221        row_bin: &[usize],
222        bins: usize,
223        rank: usize,
224    ) -> Result<Self, String> {
225        let n = r.nrows();
226        let p = r.ncols();
227
228        // Mean residual variance — the scale reference for the diagonal floor.
229        let mut total_var = 0.0_f64;
230        for i in 0..n {
231            for j in 0..p {
232                total_var += r[[i, j]] * r[[i, j]];
233            }
234        }
235        let mean_var = (total_var / (n as f64 * p as f64)).max(f64::MIN_POSITIVE);
236        let diag_floor = DIAGONAL_REL_FLOOR * mean_var;
237
238        // Initialize the per-row scale to 1 (homoscedastic start), the diagonal
239        // to the per-channel sample variance, and Λ to the leading eigenvectors
240        // of the (scale-1) second moment. The alternation refines all three.
241        let mut row_scale = Array1::<f64>::ones(n);
242        let mut bin_scale = Array1::<f64>::ones(bins);
243        // Raw (undeflated) per-channel second moment — the D estimator's data
244        // term. Constant across sweeps.
245        let raw_diag = column_variances(r);
246        let mut diagonal = raw_diag.mapv(|v| v.max(diag_floor));
247        let mut lambda = Array2::<f64>::zeros((p, rank));
248
249        for _sweep in 0..ALTERNATION_SWEEPS {
250            // (Λ, D | scale): scale-deflated second moment
251            //   S = (1/n) Σ_n (r_n r_nᵀ) / c(z_n).
252            // Under the model E[r_n r_nᵀ] = c_n ΛΛᵀ + D, so S ≈ ΛΛᵀ + D̄ with
253            // D̄ the scale-averaged diagonal; the leading eigenpairs of S − D
254            // give Λ, the residual diagonal gives D.
255            let s = scaled_second_moment(r, &row_scale);
256            let (evals, evecs) = symmetric_eig_ascending(&s)?;
257            // Leading `rank` eigenpairs (eigenvalues ascending ⇒ take the tail).
258            if rank > 0 {
259                for k in 0..rank {
260                    let col = p - 1 - k;
261                    // Factor energy above the idiosyncratic floor: the part of
262                    // the eigenvalue not explained by the mean diagonal.
263                    let mean_diag = diagonal.iter().copied().sum::<f64>() / p as f64;
264                    let energy = (evals[col] - mean_diag).max(0.0);
265                    let amp = energy.sqrt();
266                    for row in 0..p {
267                        lambda[[row, k]] = amp * evecs[[row, col]];
268                    }
269                }
270            }
271            // D update from the RAW (undeflated) moment, floored ≻ 0. The model
272            // is Σ_n = c_n·ΛΛᵀ + D with D NOT scale-multiplied, and c is mean-1
273            // normalized, so E[(1/n)Σ r_n r_nᵀ] = ΛΛᵀ + D exactly. The deflated
274            // moment `s` is the right object for the FACTOR block (its factor
275            // part is scale-free) but its diagonal carries D·mean(1/c) — a
276            // Jensen-inflated D (mean(1/c) > 1 for any non-constant law), which
277            // biased D upward by exactly mean(1/c̃) and let a spurious
278            // higher-rank candidate win the evidence ladder on a better D
279            // alone (the probe's rank-2 winner had a zero second column).
280            for j in 0..p {
281                let mut factor_var = 0.0_f64;
282                for k in 0..rank {
283                    factor_var += lambda[[j, k]] * lambda[[j, k]];
284                }
285                diagonal[j] = (raw_diag[j] - factor_var).max(diag_floor);
286            }
287
288            // (scale | Λ, D): per-row factor activity. With residual r_n, the
289            // factor-subspace energy is r_nᵀ P r_n where P projects onto
290            // range(Λ) in the D-whitened metric; the maximum-likelihood scalar
291            // multiplier on ΛΛᵀ that matches the row's factor-subspace energy is
292            //   c_n = (r̃_nᵀ B (BᵀB)^{-1} Bᵀ r̃_n) / tr(...)-normalizer.
293            // We use a stable closed-form proxy: the row's factor-coordinate
294            // energy ‖Λ⁺ r_n‖² normalized by the unit-scale expectation, then
295            // bin-smoothed across z. With rank 0 there is no factor ⇒ c ≡ 1.
296            if rank > 0 {
297                let mut bin_num = Array1::<f64>::zeros(bins);
298                let mut bin_den = Array1::<f64>::zeros(bins);
299                let coords = factor_coordinates(&lambda, &diagonal, r)?;
300                for i in 0..n {
301                    let mut energy = 0.0_f64;
302                    for k in 0..rank {
303                        energy += coords[[i, k]] * coords[[i, k]];
304                    }
305                    let b = row_bin[i];
306                    bin_num[b] += energy;
307                    bin_den[b] += rank as f64;
308                }
309                // Per-bin mean factor energy = activity scale. Empty bins inherit
310                // the global mean so the scale law stays defined everywhere.
311                let global = {
312                    let num: f64 = bin_num.iter().sum();
313                    let den: f64 = bin_den.iter().sum();
314                    if den > 0.0 { num / den } else { 1.0 }
315                };
316                for b in 0..bins {
317                    bin_scale[b] = if bin_den[b] > 0.0 {
318                        bin_num[b] / bin_den[b]
319                    } else {
320                        global
321                    };
322                }
323                // Smooth (3-point moving average over bins) for a continuous law,
324                // then floor ≻ 0.
325                let scale_floor = SCALE_REL_FLOOR * global.max(f64::MIN_POSITIVE);
326                let smoothed = moving_average_3(&bin_scale);
327                for b in 0..bins {
328                    bin_scale[b] = smoothed[b].max(scale_floor);
329                }
330                // Re-normalize so the mean scale is 1 (the factor amplitude lives
331                // in Λ; c(z) carries only the relative activity law). This keeps
332                // the (Λ, D) ↔ (scale) split identified.
333                //
334                // The mean MUST be taken over ROWS, not over bins. The identity
335                // that makes `raw_diag` an unbiased ΛΛᵀ + D estimator is
336                //   E[(1/n) Σ_n r_n r_nᵀ] = (1/n) Σ_i c(z_i) · ΛΛᵀ + D,
337                // which reduces to ΛΛᵀ + D iff the ROW mean of c is 1:
338                //   (1/n) Σ_i c(z_i) = Σ_b (n_b / n) · bin_scale[b] = 1,
339                // where n_b is the occupancy (row count) of bin b. Under uneven
340                // occupancy (the common case — z is data-driven) the bin-UNIFORM
341                // mean (1/bins) Σ_b bin_scale[b] ≠ this occupancy-weighted mean, so
342                // normalizing by it would leave raw_diag = ΛΛᵀ + D biased by
343                // exactly (occupancy mean / bin mean). Divide by the occupancy-
344                // weighted mean instead, so (1/n) Σ_i row_scale[i] is exactly 1.
345                // ORDERING: the positivity floor was applied above FIRST, so the
346                // floored per-bin values are the ones this normalization sees; the
347                // per-row assignment below therefore needs no second clamp (a
348                // re-clamp would use pre-normalization floor units and break the
349                // exact row-mean-1 invariant just established).
350                let mut bin_count = vec![0.0_f64; bins];
351                for &b in row_bin.iter() {
352                    bin_count[b] += 1.0;
353                }
354                let mean_scale = (0..bins)
355                    .map(|b| bin_count[b] * bin_scale[b])
356                    .sum::<f64>()
357                    / n as f64;
358                if mean_scale > 0.0 {
359                    bin_scale.mapv_inplace(|v| v / mean_scale);
360                }
361                // Each bin_scale[b] is already ≥ scale_floor / mean_scale > 0.
362                for i in 0..n {
363                    row_scale[i] = bin_scale[row_bin[i]];
364                }
365            }
366        }
367
368        let log_evidence = penalized_log_evidence(r, &lambda, &diagonal, &row_scale, rank);
369        let mut model = Self {
370            p,
371            factor_rank: rank,
372            lambda,
373            diagonal,
374            row_scale,
375            log_evidence,
376        };
377        // Guard against any non-finite leak from a degenerate fit: fall back to a
378        // pure-diagonal model with the same evidence accounting.
379        if !model.is_finite() {
380            model.lambda = Array2::<f64>::zeros((p, rank));
381            model.row_scale = Array1::<f64>::ones(n);
382        }
383        Ok(model)
384    }
385
386    fn is_finite(&self) -> bool {
387        self.lambda.iter().all(|v| v.is_finite())
388            && self.diagonal.iter().all(|v| v.is_finite() && *v > 0.0)
389            && self.row_scale.iter().all(|v| v.is_finite() && *v > 0.0)
390            && self.log_evidence.is_finite()
391    }
392
393    /// Selected factor rank `r`.
394    pub fn factor_rank(&self) -> usize {
395        self.factor_rank
396    }
397
398    /// The fitted interference factor `Λ ∈ ℝ^{p×r}` (the shared off-isotropic
399    /// residual subspace). Consumed by the planted-subspace recovery test to
400    /// compare `range(Λ)` against the planted interference subspace.
401    pub fn factor(&self) -> ArrayView2<'_, f64> {
402        self.lambda.view()
403    }
404
405    /// The idiosyncratic diagonal `d ∈ ℝ^p` (`D = diag(d)`).
406    pub fn diagonal(&self) -> ArrayView1<'_, f64> {
407        self.diagonal.view()
408    }
409
410    /// The per-row activity scale `c(z_n) > 0`, length `n`. Recovers the smooth
411    /// activity-scale law evaluated at every observed `z_n`.
412    pub fn row_scale(&self) -> ArrayView1<'_, f64> {
413        self.row_scale.view()
414    }
415
416    /// The penalized Gaussian log-evidence the rank-selection ladder maximized.
417    pub fn log_evidence(&self) -> f64 {
418        self.log_evidence
419    }
420
421    /// #2021 Λ nursery→promotion: detect *persistent, evidence-earning* factor
422    /// directions relative to the previous outer-alternation pass's model.
423    ///
424    /// A column `j` of this model's `Λ` is a [`FactorPromotion`] candidate iff
425    /// both gates hold:
426    /// 1. **Earns its complexity** (evidence gate): its explained energy
427    ///    `‖Λ_:,j‖² ≥ energy_floor_mult · mean(diag(D))`. Every column is already
428    ///    inside the evidence-ladder-selected rank (so it cleared the BIC
429    ///    penalty globally); this per-direction floor additionally requires the
430    ///    factor to explain more than an average channel's idiosyncratic noise,
431    ///    so we never promote a direction that only barely survived rank
432    ///    selection.
433    /// 2. **Persists** (nursery gate): its `|cos|` alignment with the best-
434    ///    matching column of `prev`'s `Λ` is `≥ align_min` — the direction is the
435    ///    same subspace the previous pass already found, not a new
436    ///    pass-to-pass artifact.
437    ///
438    /// Returns candidates sorted by energy (descending). `prev = None` (the first
439    /// structured pass, damping toward `I`) yields no candidates — a direction
440    /// must survive at least one pass to enter the nursery. The driver holds the
441    /// cross-pass persistence count (promote after it clears the direction's
442    /// nursery dwell) and does the actual atom birth; this method is the pure,
443    /// per-pass detector.
444    ///
445    /// Errors on non-finite / out-of-range gates (`align_min ∈ [0,1]`,
446    /// `energy_floor_mult ≥ 0`) or a `prev` with a different output dim `p`.
447    pub fn promotion_candidates(
448        &self,
449        prev: Option<&StructuredResidualModel>,
450        align_min: f64,
451        energy_floor_mult: f64,
452    ) -> Result<Vec<FactorPromotion>, String> {
453        if !align_min.is_finite() || !(0.0..=1.0).contains(&align_min) {
454            return Err(format!(
455                "StructuredResidualModel::promotion_candidates: align_min must be finite in [0,1]; got {align_min}"
456            ));
457        }
458        if !energy_floor_mult.is_finite() || energy_floor_mult < 0.0 {
459            return Err(format!(
460                "StructuredResidualModel::promotion_candidates: energy_floor_mult must be finite and ≥ 0; got {energy_floor_mult}"
461            ));
462        }
463        let prev = match prev {
464            Some(pv) => pv,
465            None => return Ok(Vec::new()),
466        };
467        if prev.p != self.p {
468            return Err(format!(
469                "StructuredResidualModel::promotion_candidates: prev output dim {} != {}",
470                prev.p, self.p
471            ));
472        }
473        let r = self.factor_rank;
474        let prev_r = prev.factor_rank;
475        if r == 0 || prev_r == 0 {
476            return Ok(Vec::new());
477        }
478        // Idiosyncratic-noise floor: a promoted direction must explain more than
479        // an average channel's independent variance.
480        let mean_d = self.diagonal.iter().copied().sum::<f64>() / self.p as f64;
481        let energy_floor = energy_floor_mult * mean_d;
482
483        let mut out: Vec<FactorPromotion> = Vec::new();
484        for j in 0..r {
485            let col = self.lambda.column(j);
486            let energy: f64 = col.iter().map(|v| v * v).sum();
487            if energy <= 0.0 || energy < energy_floor {
488                continue;
489            }
490            let norm = energy.sqrt();
491            // Best |cos| against the previous pass's columns.
492            let mut best_align = 0.0_f64;
493            let mut best_k = 0usize;
494            for k in 0..prev_r {
495                let pcol = prev.lambda.column(k);
496                let pnorm: f64 = pcol.iter().map(|v| v * v).sum::<f64>().sqrt();
497                if pnorm <= 0.0 {
498                    continue;
499                }
500                let dot: f64 = col.iter().zip(pcol.iter()).map(|(a, b)| a * b).sum();
501                let cos_abs = (dot / (norm * pnorm)).abs();
502                if cos_abs > best_align {
503                    best_align = cos_abs;
504                    best_k = k;
505                }
506            }
507            if best_align >= align_min {
508                out.push(FactorPromotion {
509                    direction: col.mapv(|v| v / norm),
510                    energy,
511                    persistence_alignment: best_align,
512                    prev_column: best_k,
513                });
514            }
515        }
516        out.sort_by(|a, b| b.energy.total_cmp(&a.energy));
517        Ok(out)
518    }
519
520    /// Build the per-row precision factor stack `U_n ∈ ℝ^{p×p}` with
521    /// `U_n U_nᵀ = Σ_n^{-1}` and package it as a
522    /// [`MetricProvenance::WhitenedStructured`](gam_problem::MetricProvenance::WhitenedStructured)
523    /// [`RowMetric`](gam_problem::RowMetric). This is the single
524    /// production site of `WhitenedStructured`.
525    ///
526    /// The precision is formed in **Woodbury form**:
527    /// ```text
528    ///   Σ_n^{-1} = D^{-1} − D^{-1} Λ ( c^{-1} I_r + Λᵀ D^{-1} Λ )^{-1} Λᵀ D^{-1},
529    /// ```
530    /// an `r × r` capacitance solve (never a `p × p` inverse). The factor `U_n`
531    /// is the lower-Cholesky of the assembled `Σ_n^{-1}` (`rank = p`), so
532    /// `whiten_residual_row` returns coordinates whose squared norm is the exact
533    /// Mahalanobis residual `r_nᵀ Σ_n^{-1} r_n`.
534    pub fn row_metric(&self, n_rows: usize) -> Result<RowMetric, String> {
535        if n_rows != self.row_scale.len() {
536            return Err(format!(
537                "StructuredResidualModel::row_metric: requested {n_rows} rows but model has {}",
538                self.row_scale.len()
539            ));
540        }
541        let p = self.p;
542        let r = self.factor_rank;
543        // Hoist every row-INDEPENDENT Woodbury part out of the per-row loop: the
544        // inverse diagonal D^{-1}, B = D^{-1}Λ, its transpose Bᵀ, and the Gram
545        // M0 = ΛᵀD^{-1}Λ. Only the c_n^{-1} I_r shift on the capacitance is
546        // per-row, so the per-row capacitance is M_n = M0 + c_n^{-1} I_r — a
547        // scalar-diagonal reweight of the SAME M0 (mirroring the Fix-B hoist in
548        // `penalized_log_evidence`). Building the n-row U_n stack now costs
549        // O(p·r² + n·(p·r + r³ + p³)) instead of rebuilding B and the Gram every
550        // row. The summation order per row is unchanged, so the assembled U_n is
551        // bit-for-bit identical to the per-row-rebuild it replaces.
552        let d_inv: Vec<f64> = (0..p).map(|i| 1.0 / self.diagonal[i]).collect();
553        let mut b = Array2::<f64>::zeros((p, r));
554        let mut bt = Array2::<f64>::zeros((r, p));
555        let mut m0 = Array2::<f64>::zeros((r, r));
556        if r > 0 {
557            for i in 0..p {
558                for k in 0..r {
559                    b[[i, k]] = d_inv[i] * self.lambda[[i, k]];
560                }
561            }
562            for a in 0..r {
563                for bk in 0..r {
564                    let mut acc = 0.0_f64;
565                    for i in 0..p {
566                        acc += self.lambda[[i, a]] * b[[i, bk]];
567                    }
568                    m0[[a, bk]] = acc;
569                }
570            }
571            for k in 0..r {
572                for i in 0..p {
573                    bt[[k, i]] = b[[i, k]];
574                }
575            }
576        }
577        // Row-major flat factor matrix: u[n, i*p + k] = U_n[i, k].
578        let mut u = Array2::<f64>::zeros((n_rows, p * p));
579        for row in 0..n_rows {
580            let precision = self.row_precision(&d_inv, &b, &bt, &m0, row)?;
581            let factor = lower_cholesky_psd(&precision)?;
582            for i in 0..p {
583                for k in 0..p {
584                    u[[row, i * p + k]] = factor[[i, k]];
585                }
586            }
587        }
588        RowMetric::whitened_structured(Arc::new(u), p, p)
589    }
590
591    /// Convenience for the #2021 fit-path install seam: fit the structured
592    /// residual model on `input` and immediately materialize its per-row
593    /// `WhitenedStructured` [`RowMetric`] over all `input.residuals.nrows()`
594    /// rows. Equivalent to `Self::fit(input)?.row_metric(n)` — the single call
595    /// the outer alternation loop consumes when it installs the whitening metric
596    /// but does not also need the fitted factor (`factor()` / birth mining).
597    pub fn fit_row_metric(input: ResidualFactorInput<'_>) -> Result<RowMetric, String> {
598        let n = input.residuals.nrows();
599        Self::fit(input)?.row_metric(n)
600    }
601
602    /// Damped per-row metric for the #2021 driver: blend covariances in the
603    /// **covariance domain** (before the Woodbury→Cholesky) between this model's
604    /// estimate and a previous one,
605    /// ```text
606    ///   Σ_t(row) = (1 − γ) · Σ_prev(row) + γ · Σ̂_t(row),
607    /// ```
608    /// where `Σ̂_t(row) = c_t(z)·ΛΛᵀ + D` is this model's per-row covariance
609    /// (built from the hoisted-M0 / occupancy-weighted `c(z)` path), and
610    /// `Σ_prev(row)` is `prev`'s per-row covariance when `Some`, else `I_p`. The
611    /// returned factor `U_n` satisfies `U_n U_nᵀ = Σ_t(row)^{-1}`, packaged as a
612    /// [`RowMetric`](gam_problem::RowMetric).
613    ///
614    /// Endpoints (exact, byte-identical to the undamped producers):
615    /// * `γ = 1.0` ⇒ this model's [`Self::row_metric`] exactly (Woodbury path);
616    /// * `γ = 0.0` ⇒ `prev`'s [`Self::row_metric`] when `Some`, else the
617    ///   Euclidean identity metric.
618    ///
619    /// `γ` must be finite and in `[0, 1]`; when `prev` is `Some` it must share
620    /// this model's `p` and row count.
621    pub fn row_metric_damped(
622        &self,
623        n_rows: usize,
624        gamma: f64,
625        prev: Option<&StructuredResidualModel>,
626    ) -> Result<RowMetric, String> {
627        if n_rows != self.row_scale.len() {
628            return Err(format!(
629                "StructuredResidualModel::row_metric_damped: requested {n_rows} rows but model has {}",
630                self.row_scale.len()
631            ));
632        }
633        if !gamma.is_finite() || !(0.0..=1.0).contains(&gamma) {
634            return Err(format!(
635                "StructuredResidualModel::row_metric_damped: gamma must be finite in [0,1]; got {gamma}"
636            ));
637        }
638        if let Some(pv) = prev {
639            if pv.p != self.p {
640                return Err(format!(
641                    "StructuredResidualModel::row_metric_damped: prev output dim {} != {}",
642                    pv.p, self.p
643                ));
644            }
645            if pv.row_scale.len() != n_rows {
646                return Err(format!(
647                    "StructuredResidualModel::row_metric_damped: prev has {} rows but requested {n_rows}",
648                    pv.row_scale.len()
649                ));
650            }
651        }
652        // Exact endpoints — reuse the undamped producers so the result is
653        // byte-identical (γ=1 ⇒ this model; γ=0 ⇒ prev, or Euclidean identity).
654        if gamma == 1.0 {
655            return self.row_metric(n_rows);
656        }
657        if gamma == 0.0 {
658            return match prev {
659                Some(pv) => pv.row_metric(n_rows),
660                None => RowMetric::euclidean(n_rows, self.p),
661            };
662        }
663
664        let p = self.p;
665        // Row-INDEPENDENT outer products ΛΛᵀ (this model and, if present, prev):
666        // only the per-row activity scale c(z) multiplies them, so hoist the Gram
667        // out of the per-row loop (mirroring the row_metric / penalized_log_evidence
668        // hoist).
669        let self_gram = outer_product(&self.lambda);
670        let prev_gram = prev.map(|pv| outer_product(&pv.lambda));
671
672        let mut u = Array2::<f64>::zeros((n_rows, p * p));
673        for row in 0..n_rows {
674            let c = self.row_scale[row].max(f64::MIN_POSITIVE);
675            // γ · Σ̂_t = γ·(c·ΛΛᵀ + D).
676            let mut sigma = Array2::<f64>::zeros((p, p));
677            for a in 0..p {
678                for b in 0..p {
679                    sigma[[a, b]] = gamma * c * self_gram[[a, b]];
680                }
681                sigma[[a, a]] += gamma * self.diagonal[a];
682            }
683            // (1−γ) · Σ_prev  (prev's per-row Σ, or I_p when prev is None).
684            match prev {
685                Some(pv) => {
686                    let cp = pv.row_scale[row].max(f64::MIN_POSITIVE);
687                    let pg = prev_gram.as_ref().unwrap();
688                    for a in 0..p {
689                        for b in 0..p {
690                            sigma[[a, b]] += (1.0 - gamma) * cp * pg[[a, b]];
691                        }
692                        sigma[[a, a]] += (1.0 - gamma) * pv.diagonal[a];
693                    }
694                }
695                None => {
696                    for a in 0..p {
697                        sigma[[a, a]] += 1.0 - gamma;
698                    }
699                }
700            }
701            // Symmetrize against round-off before inversion.
702            for a in 0..p {
703                for b in (a + 1)..p {
704                    let avg = 0.5 * (sigma[[a, b]] + sigma[[b, a]]);
705                    sigma[[a, b]] = avg;
706                    sigma[[b, a]] = avg;
707                }
708            }
709            // Σ_t is a convex combination of SPD matrices (D ≻ 0 / I ≻ 0) ⇒ SPD.
710            // Precision = Σ_t^{-1} via a Cholesky solve against I_p, then the U_n
711            // factor is the lower-Cholesky of the precision (row_metric's U
712            // convention).
713            let precision = invert_spd(&sigma)?;
714            let factor = lower_cholesky_psd(&precision)?;
715            for i in 0..p {
716                for k in 0..p {
717                    u[[row, i * p + k]] = factor[[i, k]];
718                }
719            }
720        }
721        RowMetric::whitened_structured(Arc::new(u), p, p)
722    }
723
724    /// Per-row precision `Σ_n^{-1}` via the Woodbury identity (an `r × r` solve),
725    /// given the row-independent parts precomputed by [`Self::row_metric`]:
726    /// `d_inv = D^{-1}`, `b = D^{-1}Λ`, `bt = Bᵀ`, and the Gram `m0 = ΛᵀD^{-1}Λ`.
727    /// Only the per-row capacitance `M_n = m0 + c_n^{-1} I_r` and the back-solve
728    /// depend on the row.
729    fn row_precision(
730        &self,
731        d_inv: &[f64],
732        b: &Array2<f64>,
733        bt: &Array2<f64>,
734        m0: &Array2<f64>,
735        row: usize,
736    ) -> Result<Array2<f64>, String> {
737        let p = self.p;
738        let r = self.factor_rank;
739        // Start from D^{-1}.
740        let mut precision = Array2::<f64>::zeros((p, p));
741        for i in 0..p {
742            precision[[i, i]] = d_inv[i];
743        }
744        if r == 0 {
745            return Ok(precision);
746        }
747        let c = self.row_scale[row].max(f64::MIN_POSITIVE);
748        // Per-row capacitance M_n = M0 + c^{-1} I_r (copy the hoisted Gram, then
749        // add c^{-1} to the diagonal). M_n ≻ 0 since c^{-1} > 0 and M0 ⪰ 0.
750        let mut cap = m0.clone();
751        for a in 0..r {
752            cap[[a, a]] += 1.0 / c;
753        }
754        // Σ_n^{-1} = D^{-1} − B M_n^{-1} Bᵀ. Solve M_n X = Bᵀ for X = M_n^{-1} Bᵀ
755        // (r × p) via Cholesky.
756        let chol = cap
757            .cholesky(Side::Lower)
758            .map_err(|e| format!("StructuredResidualModel::row_precision capacitance: {e:?}"))?;
759        let x = chol.solve_mat(bt); // r × p
760        for i in 0..p {
761            for j in 0..p {
762                let mut acc = 0.0_f64;
763                for k in 0..r {
764                    acc += b[[i, k]] * x[[k, j]];
765                }
766                precision[[i, j]] -= acc;
767            }
768        }
769        // Symmetrize against round-off so the Cholesky downstream sees an exactly
770        // symmetric PSD matrix.
771        for i in 0..p {
772            for j in (i + 1)..p {
773                let avg = 0.5 * (precision[[i, j]] + precision[[j, i]]);
774                precision[[i, j]] = avg;
775                precision[[j, i]] = avg;
776            }
777        }
778        Ok(precision)
779    }
780}
781
782/// Outer product `Λ Λᵀ ∈ ℝ^{p×p}` of a factor matrix `Λ ∈ ℝ^{p×r}` — the
783/// row-independent factor covariance the per-row activity scale multiplies.
784/// Used by [`StructuredResidualModel::row_metric_damped`] to hoist the Gram out
785/// of its per-row covariance-blend loop.
786fn outer_product(lambda: &Array2<f64>) -> Array2<f64> {
787    let p = lambda.nrows();
788    let r = lambda.ncols();
789    let mut g = Array2::<f64>::zeros((p, p));
790    for a in 0..p {
791        for b in 0..p {
792            let mut acc = 0.0_f64;
793            for k in 0..r {
794                acc += lambda[[a, k]] * lambda[[b, k]];
795            }
796            g[[a, b]] = acc;
797        }
798    }
799    g
800}
801
802/// Inverse of a symmetric positive-definite matrix via a Cholesky solve against
803/// the identity, symmetrized against round-off. Used to form `Σ_t^{-1}` from a
804/// densely-blended covariance in [`StructuredResidualModel::row_metric_damped`]
805/// (the blended covariance is no longer low-rank-plus-diagonal, so Woodbury does
806/// not apply).
807fn invert_spd(a: &Array2<f64>) -> Result<Array2<f64>, String> {
808    let p = a.nrows();
809    let chol = a
810        .cholesky(Side::Lower)
811        .map_err(|e| format!("invert_spd: blended covariance not SPD: {e:?}"))?;
812    let mut inv = chol.solve_mat(&Array2::<f64>::eye(p));
813    for i in 0..p {
814        for j in (i + 1)..p {
815            let avg = 0.5 * (inv[[i, j]] + inv[[j, i]]);
816            inv[[i, j]] = avg;
817            inv[[j, i]] = avg;
818        }
819    }
820    Ok(inv)
821}
822
823/// Per-channel (column) sample second moment of the residual matrix.
824fn column_variances(r: ArrayView2<'_, f64>) -> Array1<f64> {
825    let n = r.nrows();
826    let p = r.ncols();
827    let mut v = Array1::<f64>::zeros(p);
828    for j in 0..p {
829        let mut acc = 0.0_f64;
830        for i in 0..n {
831            acc += r[[i, j]] * r[[i, j]];
832        }
833        v[j] = acc / n as f64;
834    }
835    v
836}
837
838/// Scale-deflated second moment `S = (1/n) Σ_n (r_n r_nᵀ) / c_n`.
839fn scaled_second_moment(r: ArrayView2<'_, f64>, row_scale: &Array1<f64>) -> Array2<f64> {
840    let n = r.nrows();
841    let p = r.ncols();
842    let mut s = Array2::<f64>::zeros((p, p));
843    for i in 0..n {
844        let w = 1.0 / row_scale[i].max(f64::MIN_POSITIVE);
845        for a in 0..p {
846            let ra = r[[i, a]];
847            for b in 0..p {
848                s[[a, b]] += w * ra * r[[i, b]];
849            }
850        }
851    }
852    s.mapv_inplace(|v| v / n as f64);
853    // Symmetrize against accumulation round-off.
854    for a in 0..p {
855        for b in (a + 1)..p {
856            let avg = 0.5 * (s[[a, b]] + s[[b, a]]);
857            s[[a, b]] = avg;
858            s[[b, a]] = avg;
859        }
860    }
861    s
862}
863
864/// Factor coordinates `Λ⁺_D r_n` per row: the generalized-least-squares
865/// projection of each residual onto `range(Λ)` in the `D^{-1}` metric, returned
866/// as an `n × r` matrix. Solves the `r × r` normal equations
867/// `(Λᵀ D^{-1} Λ) γ = Λᵀ D^{-1} r_n` per row (shared factorization).
868fn factor_coordinates(
869    lambda: &Array2<f64>,
870    diagonal: &Array1<f64>,
871    r: ArrayView2<'_, f64>,
872) -> Result<Array2<f64>, String> {
873    let p = lambda.nrows();
874    let rank = lambda.ncols();
875    let n = r.nrows();
876    let d_inv: Vec<f64> = (0..p).map(|i| 1.0 / diagonal[i]).collect();
877    // Normal matrix ΛᵀD^{-1}Λ (+ tiny ridge for invertibility).
878    let mut normal = Array2::<f64>::zeros((rank, rank));
879    for a in 0..rank {
880        for b in 0..rank {
881            let mut acc = 0.0_f64;
882            for i in 0..p {
883                acc += lambda[[i, a]] * d_inv[i] * lambda[[i, b]];
884            }
885            normal[[a, b]] = acc;
886        }
887    }
888    let trace = (0..rank).map(|k| normal[[k, k]]).sum::<f64>().max(1.0);
889    let ridge = 1e-10 * trace / rank.max(1) as f64;
890    for k in 0..rank {
891        normal[[k, k]] += ridge;
892    }
893    let chol = normal
894        .cholesky(Side::Lower)
895        .map_err(|e| format!("factor_coordinates normal solve: {e:?}"))?;
896    let mut coords = Array2::<f64>::zeros((n, rank));
897    let mut rhs = Array1::<f64>::zeros(rank);
898    for i in 0..n {
899        for a in 0..rank {
900            let mut acc = 0.0_f64;
901            for j in 0..p {
902                acc += lambda[[j, a]] * d_inv[j] * r[[i, j]];
903            }
904            rhs[a] = acc;
905        }
906        let gamma = chol.solvevec(&rhs);
907        for a in 0..rank {
908            coords[[i, a]] = gamma[a];
909        }
910    }
911    Ok(coords)
912}
913
914/// 3-point moving average over a bin vector (edge-clamped), giving the smooth
915/// activity-scale law a continuous, low-curvature shape.
916fn moving_average_3(v: &Array1<f64>) -> Array1<f64> {
917    let m = v.len();
918    let mut out = Array1::<f64>::zeros(m);
919    for i in 0..m {
920        let lo = i.saturating_sub(1);
921        let hi = (i + 1).min(m - 1);
922        let mut acc = 0.0_f64;
923        let mut cnt = 0.0_f64;
924        for j in lo..=hi {
925            acc += v[j];
926            cnt += 1.0;
927        }
928        out[i] = acc / cnt;
929    }
930    out
931}
932
933/// Ascending-eigenvalue symmetric eigendecomposition (faer convention).
934fn symmetric_eig_ascending(m: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>), String> {
935    m.eigh(Side::Lower)
936        .map_err(|e| format!("symmetric_eig: {e:?}"))
937}
938
939/// Lower-triangular Cholesky factor `L` of a (numerically) PSD matrix `A` with
940/// `L Lᵀ = A`, with a relative spectral floor so a marginally-indefinite
941/// precision (round-off) still factors. Used to turn `Σ_n^{-1}` into the
942/// `RowMetric` factor `U_n` (here `U_n = L`).
943fn lower_cholesky_psd(a: &Array2<f64>) -> Result<Array2<f64>, String> {
944    if let Ok(chol) = a.cholesky(Side::Lower) {
945        return Ok(chol.lower_triangular());
946    }
947    // Eigen-repair: clamp eigenvalues to a small positive floor and rebuild a
948    // symmetric square root, then Cholesky that (always succeeds, PD).
949    let (evals, evecs) = symmetric_eig_ascending(a)?;
950    let max_ev = evals.iter().copied().fold(0.0_f64, f64::max).max(1.0);
951    let floor = 1e-10 * max_ev;
952    let p = a.nrows();
953    let mut sqrt = Array2::<f64>::zeros((p, p));
954    for i in 0..p {
955        for j in 0..p {
956            let mut acc = 0.0_f64;
957            for k in 0..p {
958                let ev = evals[k].max(floor);
959                acc += evecs[[i, k]] * ev.sqrt() * evecs[[j, k]];
960            }
961            sqrt[[i, j]] = acc;
962        }
963    }
964    sqrt.cholesky(Side::Lower)
965        .map(|c| c.lower_triangular())
966        .map_err(|e| format!("lower_cholesky_psd eigen-repair: {e:?}"))
967}
968
969/// Penalized Gaussian log-evidence of the structured model at the fitted
970/// parameters — the evidence ladder's rank-selection score.
971///
972/// The per-row log-density of `r_n ~ N(0, Σ_n)` is
973/// `−½ ( log|Σ_n| + r_nᵀ Σ_n^{-1} r_n + p log 2π )`. We sum it across rows and
974/// subtract a parameter-count penalty `½ k_params · log n` (a BIC-style Occam
975/// term over the `p·r` factor entries + `p` diagonal entries + the bin scales),
976/// so adding a spurious factor that does not improve the fit is rejected. Both
977/// `log|Σ_n|` and the quadratic use the Woodbury / matrix-determinant lemma so no
978/// dense `p × p` inverse or determinant is formed.
979fn penalized_log_evidence(
980    r: ArrayView2<'_, f64>,
981    lambda: &Array2<f64>,
982    diagonal: &Array1<f64>,
983    row_scale: &Array1<f64>,
984    rank: usize,
985) -> f64 {
986    let n = r.nrows();
987    let p = r.ncols();
988    let d_inv: Vec<f64> = (0..p).map(|i| 1.0 / diagonal[i]).collect();
989    let log_det_d: f64 = diagonal.iter().map(|&d| d.ln()).sum();
990    let two_pi_ln = (2.0 * std::f64::consts::PI).ln();
991
992    // Row-INDEPENDENT Gram M0 = ΛᵀD^{-1}Λ (r × r). This does not depend on the
993    // row, so build it ONCE here rather than rebuilding it inside the per-row loop
994    // (which was O(n·p·r²)). The per-row capacitance is only a scalar-diagonal
995    // reweight of this SAME M0 — M_n = M0 + (1/c_n) I_r — so each row copies M0 and
996    // adds 1/c_n to its diagonal (cheap, O(r)) before its own Cholesky. The
997    // summation order over j (0..p) is preserved exactly and the diagonal add is
998    // the identical `+= 1.0 / c` op, so the hoist is bit-for-bit identical to the
999    // pre-hoist per-row rebuild (same log|Σ_n|, same quadratic, same evidence).
1000    let mut m0 = Array2::<f64>::zeros((rank, rank));
1001    if rank > 0 {
1002        for a in 0..rank {
1003            for b in 0..rank {
1004                let mut acc = 0.0_f64;
1005                for j in 0..p {
1006                    acc += lambda[[j, a]] * d_inv[j] * lambda[[j, b]];
1007                }
1008                m0[[a, b]] = acc;
1009            }
1010        }
1011    }
1012
1013    let mut log_lik = 0.0_f64;
1014    for i in 0..n {
1015        let c = row_scale[i].max(f64::MIN_POSITIVE);
1016        // Quadratic r_nᵀ Σ_n^{-1} r_n via Woodbury:
1017        //   r_nᵀ D^{-1} r_n − (Bᵀ r_n)ᵀ M^{-1} (Bᵀ r_n),
1018        // with B = D^{-1}Λ and M = c^{-1}I + ΛᵀD^{-1}Λ.
1019        let mut quad = 0.0_f64;
1020        for j in 0..p {
1021            quad += r[[i, j]] * d_inv[j] * r[[i, j]];
1022        }
1023        let mut log_det = log_det_d;
1024        if rank > 0 {
1025            // Per-row capacitance M_n = M0 + (1/c) I_r (copy the hoisted M0, then
1026            // add 1/c to the diagonal), and w = Bᵀ r_n = ΛᵀD^{-1} r_n.
1027            let mut m = m0.clone();
1028            for a in 0..rank {
1029                m[[a, a]] += 1.0 / c;
1030            }
1031            let mut w = Array1::<f64>::zeros(rank);
1032            for a in 0..rank {
1033                let mut wa = 0.0_f64;
1034                for j in 0..p {
1035                    wa += lambda[[j, a]] * d_inv[j] * r[[i, j]];
1036                }
1037                w[a] = wa;
1038            }
1039            // Cholesky M = R Rᵀ → log|M|, and solve M y = w.
1040            match m.cholesky(Side::Lower) {
1041                Ok(chol) => {
1042                    let y = chol.solvevec(&w);
1043                    let mut wy = 0.0_f64;
1044                    for a in 0..rank {
1045                        wy += w[a] * y[a];
1046                    }
1047                    quad -= wy;
1048                    // log|Σ_n| = log|D| + log|M| + r·log c   (matrix-determinant
1049                    // lemma; the c^{-1}I shift carries the +r·log c).
1050                    let diag = chol.diag();
1051                    let log_det_m: f64 = diag.iter().map(|&l| (l * l).ln()).sum();
1052                    log_det = log_det_d + log_det_m + rank as f64 * c.ln();
1053                }
1054                Err(_) => {
1055                    // Degenerate capacitance — fall back to the diagonal model's
1056                    // accounting for this row (no factor correction).
1057                    log_det = log_det_d;
1058                }
1059            }
1060        }
1061        log_lik += -0.5 * (log_det + quad + p as f64 * two_pi_ln);
1062    }
1063
1064    let k_params = (p * rank + p + ACTIVITY_SCALE_BINS) as f64;
1065    log_lik - 0.5 * k_params * (n.max(2) as f64).ln()
1066}
1067
1068#[cfg(test)]
1069mod tests {
1070    use super::*;
1071    use ndarray::{Array1, Array2};
1072
1073    fn lcg_uniform(state: &mut u64) -> f64 {
1074        *state = state
1075            .wrapping_mul(6364136223846793005)
1076            .wrapping_add(1442695040888963407);
1077        ((*state >> 11) as f64) / ((1u64 << 53) as f64)
1078    }
1079
1080    fn lcg_normal(state: &mut u64) -> f64 {
1081        let u1 = lcg_uniform(state).max(1e-12);
1082        let u2 = lcg_uniform(state);
1083        (-2.0 * u1.ln()).sqrt() * (std::f64::consts::TAU * u2).cos()
1084    }
1085
1086    /// Per-rank evidence breakdown on the planted single-factor activity-law
1087    /// DGP (the `fitted_scale_recovers_planted_activity_law` plant). Pins the
1088    /// rank-selection decision itself: the ladder must prefer rank 1, and this
1089    /// test names the margin so an over-selection regression is diagnosable
1090    /// from the failure message alone.
1091    #[test]
1092    fn evidence_ladder_prefers_planted_rank_one() {
1093        let n = 5000usize;
1094        let p = 4usize;
1095        let lambda0 = ndarray::array![[1.5], [1.2], [-0.4], [0.3]];
1096        let sigma_eps = 0.2_f64;
1097        let slope = 1.3_f64;
1098        let mut seed = 0xD1B54A32D192ED03_u64;
1099        let mut residuals = Array2::<f64>::zeros((n, p));
1100        let mut activity = Array1::<f64>::zeros(n);
1101        for row in 0..n {
1102            let z = (row as f64) / (n as f64 - 1.0);
1103            activity[row] = z;
1104            let amp = (slope * z).exp().sqrt();
1105            let f = lcg_normal(&mut seed);
1106            for i in 0..p {
1107                residuals[[row, i]] = amp * lambda0[[i, 0]] * f + sigma_eps * lcg_normal(&mut seed);
1108            }
1109        }
1110        // Reproduce fit()'s bin assignment, then score each rank directly.
1111        let bins = ACTIVITY_SCALE_BINS.max(1);
1112        let row_bin: Vec<usize> = (0..n)
1113            .map(|i| {
1114                let frac = activity[i];
1115                (frac * bins as f64).floor().clamp(0.0, bins as f64 - 1.0) as usize
1116            })
1117            .collect();
1118        let mut report = String::new();
1119        let mut ev = Vec::new();
1120        for rank in 0..=2usize {
1121            let m = StructuredResidualModel::fit_fixed_rank(residuals.view(), &row_bin, bins, rank)
1122                .expect("fixed-rank fit");
1123            let k_params = (p * rank + p + ACTIVITY_SCALE_BINS) as f64;
1124            let log_lik = m.log_evidence() + 0.5 * k_params * (n as f64).ln();
1125            let col_norms: Vec<f64> = (0..rank)
1126                .map(|k| {
1127                    m.factor()
1128                        .column(k)
1129                        .iter()
1130                        .map(|v| v * v)
1131                        .sum::<f64>()
1132                        .sqrt()
1133                })
1134                .collect();
1135            report.push_str(&format!(
1136                "rank {rank}: evidence={:.3} loglik={:.3} penalty={:.3} col_norms={:?} diag={:?}\n",
1137                m.log_evidence(),
1138                log_lik,
1139                0.5 * k_params * (n as f64).ln(),
1140                col_norms,
1141                m.diagonal()
1142                    .iter()
1143                    .map(|v| (v * 1e4).round() / 1e4)
1144                    .collect::<Vec<_>>()
1145            ));
1146            ev.push(m.log_evidence());
1147        }
1148        assert!(
1149            ev[1] > ev[0] && ev[1] > ev[2],
1150            "evidence ladder must prefer the planted rank 1; breakdown:\n{report}"
1151        );
1152    }
1153
1154    /// Orthonormalize the columns of `m` (modified Gram–Schmidt), dropping
1155    /// numerically-null columns. Test-side helper for subspace comparisons.
1156    fn orthonormal_columns(m: ArrayView2<'_, f64>) -> Vec<Array1<f64>> {
1157        let mut basis: Vec<Array1<f64>> = Vec::new();
1158        for k in 0..m.ncols() {
1159            let mut v = m.column(k).to_owned();
1160            for q in &basis {
1161                let c = v.dot(q);
1162                v = &v - &(q * c);
1163            }
1164            let norm = v.dot(&v).sqrt();
1165            if norm > 1e-10 {
1166                basis.push(v / norm);
1167            }
1168        }
1169        basis
1170    }
1171
1172    /// Squared norm of the projection of unit vector `v` onto span(basis) —
1173    /// `cos²` of the principal angle between `v` and the subspace.
1174    fn projection_energy(v: &Array1<f64>, basis: &[Array1<f64>]) -> f64 {
1175        basis.iter().map(|q| v.dot(q).powi(2)).sum()
1176    }
1177
1178    /// #974 verification arm (a): the fitted factor must recover the PLANTED
1179    /// interference subspace. Two orthogonal planted directions with distinct
1180    /// strengths; the principal angles between each planted direction and
1181    /// range(Λ̂) must be small, and the evidence ladder must select rank 2.
1182    #[test]
1183    fn factor_recovers_planted_interference_subspace() {
1184        let n = 6000usize;
1185        let p = 6usize;
1186        // Two orthogonal planted unit directions.
1187        let raw1: Array1<f64> = ndarray::array![1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
1188        let raw2: Array1<f64> = ndarray::array![1.0, -1.0, 1.0, -1.0, 1.0, -1.0];
1189        let v1 = &raw1 / raw1.dot(&raw1).sqrt();
1190        let v2 = &raw2 / raw2.dot(&raw2).sqrt();
1191        let (amp1, amp2) = (1.4_f64, 0.9_f64);
1192        let sigma_eps = 0.15_f64;
1193
1194        let mut seed = 0x9E3779B97F4A7C15_u64;
1195        let mut residuals = Array2::<f64>::zeros((n, p));
1196        let activity = Array1::<f64>::zeros(n); // constant ⇒ homoscedastic law
1197        for row in 0..n {
1198            let f1 = amp1 * lcg_normal(&mut seed);
1199            let f2 = amp2 * lcg_normal(&mut seed);
1200            for i in 0..p {
1201                residuals[[row, i]] = f1 * v1[i] + f2 * v2[i] + sigma_eps * lcg_normal(&mut seed);
1202            }
1203        }
1204
1205        let model = StructuredResidualModel::fit(ResidualFactorInput {
1206            residuals: residuals.view(),
1207            activity: activity.view(),
1208            max_factor_rank: 4,
1209        })
1210        .expect("fit");
1211
1212        assert_eq!(
1213            model.factor_rank(),
1214            2,
1215            "ladder must select the planted rank 2 (got {}, evidence {:.3})",
1216            model.factor_rank(),
1217            model.log_evidence()
1218        );
1219        let basis = orthonormal_columns(model.factor());
1220        assert_eq!(basis.len(), 2, "fitted factor must span 2 directions");
1221        let e1 = projection_energy(&v1, &basis);
1222        let e2 = projection_energy(&v2, &basis);
1223        // cos² of each principal angle ≥ 0.95 ⇒ angle ≤ ~13°.
1224        assert!(
1225            e1 > 0.95 && e2 > 0.95,
1226            "planted directions must lie in range(Λ̂): cos² = ({e1:.4}, {e2:.4})"
1227        );
1228    }
1229
1230    /// #974 verification arm (d): recovery of the planted activity-variance
1231    /// law. Single planted factor with per-row energy `exp(slope·z)`; the
1232    /// fitted `c(z_n)` must reproduce the law's shape — strongly correlated
1233    /// with the planted log-scale and with the right dynamic range.
1234    #[test]
1235    fn fitted_scale_recovers_planted_activity_law() {
1236        let n = 6000usize;
1237        let p = 4usize;
1238        let lambda0 = ndarray::array![1.5, 1.2, -0.4, 0.3];
1239        let sigma_eps = 0.2_f64;
1240        let slope = 1.3_f64;
1241        let mut seed = 0xD1B54A32D192ED03_u64;
1242        let mut residuals = Array2::<f64>::zeros((n, p));
1243        let mut activity = Array1::<f64>::zeros(n);
1244        for row in 0..n {
1245            let z = (row as f64) / (n as f64 - 1.0);
1246            activity[row] = z;
1247            let amp = (slope * z).exp().sqrt();
1248            let f = lcg_normal(&mut seed);
1249            for i in 0..p {
1250                residuals[[row, i]] = amp * lambda0[i] * f + sigma_eps * lcg_normal(&mut seed);
1251            }
1252        }
1253
1254        let model = StructuredResidualModel::fit(ResidualFactorInput {
1255            residuals: residuals.view(),
1256            activity: activity.view(),
1257            max_factor_rank: 2,
1258        })
1259        .expect("fit");
1260        assert_eq!(model.factor_rank(), 1, "planted rank is 1");
1261
1262        // Pearson correlation between fitted log c(z_n) and the planted
1263        // log-law slope·z (mean-1 normalization cancels in the correlation).
1264        let fitted_log: Vec<f64> = model.row_scale().iter().map(|c| c.ln()).collect();
1265        let planted_log: Vec<f64> = activity.iter().map(|z| slope * z).collect();
1266        let mean_f = fitted_log.iter().sum::<f64>() / n as f64;
1267        let mean_p = planted_log.iter().sum::<f64>() / n as f64;
1268        let mut cov = 0.0_f64;
1269        let mut var_f = 0.0_f64;
1270        let mut var_p = 0.0_f64;
1271        for i in 0..n {
1272            let df = fitted_log[i] - mean_f;
1273            let dp = planted_log[i] - mean_p;
1274            cov += df * dp;
1275            var_f += df * df;
1276            var_p += dp * dp;
1277        }
1278        let corr = cov / (var_f.sqrt() * var_p.sqrt());
1279        assert!(
1280            corr > 0.9,
1281            "fitted activity law must track the planted exp({slope}·z): corr = {corr:.4}"
1282        );
1283
1284        // Dynamic range: planted c(top)/c(bottom) over the inner bin centers
1285        // is exp(slope·7/8) ≈ 3.1; the binned/smoothed estimate must land in
1286        // a generous bracket around it (smoothing shrinks the edges).
1287        let lo = model.row_scale()[n / 16]; // first-bin interior
1288        let hi = model.row_scale()[n - 1 - n / 16]; // last-bin interior
1289        let ratio = hi / lo;
1290        assert!(
1291            ratio > 1.8 && ratio < 5.5,
1292            "fitted dynamic range {ratio:.3} must bracket the planted ≈3.1"
1293        );
1294    }
1295
1296    /// Reproduce `fit`'s equal-width bin assignment for a test activity vector.
1297    fn assign_bins(activity: &Array1<f64>, bins: usize) -> Vec<usize> {
1298        let n = activity.len();
1299        let z_min = activity.iter().copied().fold(f64::INFINITY, f64::min);
1300        let z_max = activity.iter().copied().fold(f64::NEG_INFINITY, f64::max);
1301        let span = z_max - z_min;
1302        (0..n)
1303            .map(|i| {
1304                if span <= 0.0 {
1305                    0
1306                } else {
1307                    let frac = (activity[i] - z_min) / span;
1308                    (frac * bins as f64).floor().clamp(0.0, bins as f64 - 1.0) as usize
1309                }
1310            })
1311            .collect()
1312    }
1313
1314    /// FIX A invariant: with deliberately UNEVEN bin occupancy the fitted
1315    /// per-row scale must have `(1/n) Σ_i row_scale[i] = 1` (occupancy-weighted
1316    /// mean-1), NOT the bin-uniform mean-1 the old code enforced. We assert the
1317    /// row-mean is 1 to tight tolerance, and that the bin-UNIFORM mean of the
1318    /// distinct per-bin scales is materially ≠ 1 — which is exactly the quantity
1319    /// the old normalization forced to 1, so this proves the two means differ
1320    /// under uneven occupancy and the test bites.
1321    #[test]
1322    fn occupancy_weighted_scale_has_row_mean_one() {
1323        let n = 4000usize;
1324        let p = 4usize;
1325        let lambda0 = ndarray::array![1.5, 1.2, -0.4, 0.3];
1326        let sigma_eps = 0.2_f64;
1327        let slope = 2.0_f64;
1328        let mut seed = 0xB5297A4D_u64 ^ 0x68E31DA4_u64;
1329        let mut residuals = Array2::<f64>::zeros((n, p));
1330        let mut activity = Array1::<f64>::zeros(n);
1331        for row in 0..n {
1332            // Cubic warp concentrates rows in the low-z bins ⇒ uneven occupancy.
1333            let u = (row as f64) / (n as f64 - 1.0);
1334            let z = u * u * u;
1335            activity[row] = z;
1336            let amp = (slope * z).exp().sqrt();
1337            let f = lcg_normal(&mut seed);
1338            for i in 0..p {
1339                residuals[[row, i]] = amp * lambda0[i] * f + sigma_eps * lcg_normal(&mut seed);
1340            }
1341        }
1342
1343        let model = StructuredResidualModel::fit(ResidualFactorInput {
1344            residuals: residuals.view(),
1345            activity: activity.view(),
1346            max_factor_rank: 2,
1347        })
1348        .expect("fit");
1349        assert!(
1350            model.factor_rank() >= 1,
1351            "need a non-trivial scale law (rank ≥ 1) for this invariant to bite; got rank 0"
1352        );
1353
1354        // Occupancy-weighted (row) mean must be exactly 1.
1355        let row_mean = model.row_scale().iter().sum::<f64>() / n as f64;
1356        assert!(
1357            (row_mean - 1.0).abs() < 1e-9,
1358            "occupancy-weighted row mean of c(z) must be 1; got {row_mean:.12}"
1359        );
1360
1361        // Bins are genuinely unevenly occupied, and every occupied bin's rows
1362        // share one scale value (collect the distinct value per bin).
1363        let bins = ACTIVITY_SCALE_BINS.max(1);
1364        let row_bin = assign_bins(&activity, bins);
1365        let mut counts = vec![0usize; bins];
1366        let mut bin_val = vec![f64::NAN; bins];
1367        for i in 0..n {
1368            let b = row_bin[i];
1369            counts[b] += 1;
1370            bin_val[b] = model.row_scale()[i];
1371        }
1372        let occupied: Vec<usize> = (0..bins).filter(|&b| counts[b] > 0).collect();
1373        let max_c = *counts.iter().max().unwrap();
1374        let min_c = *counts.iter().filter(|&&c| c > 0).min().unwrap();
1375        assert!(
1376            max_c as f64 > 2.0 * min_c as f64,
1377            "fixture must have uneven occupancy; counts = {counts:?}"
1378        );
1379
1380        // The bin-UNIFORM mean of the per-bin scales is what the OLD code forced
1381        // to 1. Under uneven occupancy + a non-constant law it is materially ≠ 1,
1382        // so the old normalization would NOT satisfy the row-mean-1 identity.
1383        let uniform_mean =
1384            occupied.iter().map(|&b| bin_val[b]).sum::<f64>() / occupied.len() as f64;
1385        assert!(
1386            (uniform_mean - 1.0).abs() > 0.1,
1387            "bin-uniform mean must differ from 1 (proving occupancy weighting matters); \
1388             got uniform_mean = {uniform_mean:.6}, row_mean = {row_mean:.6}"
1389        );
1390    }
1391
1392    /// Naive, pre-hoist reference for `penalized_log_evidence`: rebuilds the
1393    /// row-independent Gram M0 = ΛᵀD⁻¹Λ INSIDE the per-row loop (the original
1394    /// formula). The production function hoists M0 out; the two must agree.
1395    fn naive_penalized_log_evidence(
1396        r: ArrayView2<'_, f64>,
1397        lambda: &Array2<f64>,
1398        diagonal: &Array1<f64>,
1399        row_scale: &Array1<f64>,
1400        rank: usize,
1401    ) -> f64 {
1402        let n = r.nrows();
1403        let p = r.ncols();
1404        let d_inv: Vec<f64> = (0..p).map(|i| 1.0 / diagonal[i]).collect();
1405        let log_det_d: f64 = diagonal.iter().map(|&d| d.ln()).sum();
1406        let two_pi_ln = (2.0 * std::f64::consts::PI).ln();
1407        let mut log_lik = 0.0_f64;
1408        for i in 0..n {
1409            let c = row_scale[i].max(f64::MIN_POSITIVE);
1410            let mut quad = 0.0_f64;
1411            for j in 0..p {
1412                quad += r[[i, j]] * d_inv[j] * r[[i, j]];
1413            }
1414            let mut log_det = log_det_d;
1415            if rank > 0 {
1416                let mut m = Array2::<f64>::zeros((rank, rank));
1417                let mut w = Array1::<f64>::zeros(rank);
1418                for a in 0..rank {
1419                    let mut wa = 0.0_f64;
1420                    for j in 0..p {
1421                        wa += lambda[[j, a]] * d_inv[j] * r[[i, j]];
1422                    }
1423                    w[a] = wa;
1424                    for b in 0..rank {
1425                        let mut acc = 0.0_f64;
1426                        for j in 0..p {
1427                            acc += lambda[[j, a]] * d_inv[j] * lambda[[j, b]];
1428                        }
1429                        m[[a, b]] = acc;
1430                    }
1431                    m[[a, a]] += 1.0 / c;
1432                }
1433                match m.cholesky(Side::Lower) {
1434                    Ok(chol) => {
1435                        let y = chol.solvevec(&w);
1436                        let mut wy = 0.0_f64;
1437                        for a in 0..rank {
1438                            wy += w[a] * y[a];
1439                        }
1440                        quad -= wy;
1441                        let diag = chol.diag();
1442                        let log_det_m: f64 = diag.iter().map(|&l| (l * l).ln()).sum();
1443                        log_det = log_det_d + log_det_m + rank as f64 * c.ln();
1444                    }
1445                    Err(_) => {
1446                        log_det = log_det_d;
1447                    }
1448                }
1449            }
1450            log_lik += -0.5 * (log_det + quad + p as f64 * two_pi_ln);
1451        }
1452        let k_params = (p * rank + p + ACTIVITY_SCALE_BINS) as f64;
1453        log_lik - 0.5 * k_params * (n.max(2) as f64).ln()
1454    }
1455
1456    /// Naive, per-row-rebuild reference for `factor_coordinates`: rebuilds and
1457    /// re-factors the (row-independent) normal matrix ΛᵀD⁻¹Λ for EVERY row.
1458    /// Mathematically identical to the shared-factorization production path.
1459    fn naive_factor_coordinates(
1460        lambda: &Array2<f64>,
1461        diagonal: &Array1<f64>,
1462        r: ArrayView2<'_, f64>,
1463    ) -> Array2<f64> {
1464        let p = lambda.nrows();
1465        let rank = lambda.ncols();
1466        let n = r.nrows();
1467        let d_inv: Vec<f64> = (0..p).map(|i| 1.0 / diagonal[i]).collect();
1468        let mut coords = Array2::<f64>::zeros((n, rank));
1469        for i in 0..n {
1470            let mut normal = Array2::<f64>::zeros((rank, rank));
1471            for a in 0..rank {
1472                for b in 0..rank {
1473                    let mut acc = 0.0_f64;
1474                    for j in 0..p {
1475                        acc += lambda[[j, a]] * d_inv[j] * lambda[[j, b]];
1476                    }
1477                    normal[[a, b]] = acc;
1478                }
1479            }
1480            let trace = (0..rank).map(|k| normal[[k, k]]).sum::<f64>().max(1.0);
1481            let ridge = 1e-10 * trace / rank.max(1) as f64;
1482            for k in 0..rank {
1483                normal[[k, k]] += ridge;
1484            }
1485            let chol = normal.cholesky(Side::Lower).expect("naive normal solve");
1486            let mut rhs = Array1::<f64>::zeros(rank);
1487            for a in 0..rank {
1488                let mut acc = 0.0_f64;
1489                for j in 0..p {
1490                    acc += lambda[[j, a]] * d_inv[j] * r[[i, j]];
1491                }
1492                rhs[a] = acc;
1493            }
1494            let gamma = chol.solvevec(&rhs);
1495            for a in 0..rank {
1496                coords[[i, a]] = gamma[a];
1497            }
1498        }
1499        coords
1500    }
1501
1502    /// FIX B equivalence: the hoisted `penalized_log_evidence` and the shared-
1503    /// factorization `factor_coordinates` must equal their naive per-row-rebuild
1504    /// references to ~1e-10 (in fact bit-for-bit — the hoist preserves op order).
1505    #[test]
1506    fn hoisted_gram_matches_naive_per_row_rebuild() {
1507        let n = 200usize;
1508        let p = 5usize;
1509        let rank = 2usize;
1510        let mut seed = 0x243F6A8885A308D3_u64;
1511        let mut lambda = Array2::<f64>::zeros((p, rank));
1512        for i in 0..p {
1513            for k in 0..rank {
1514                lambda[[i, k]] = lcg_normal(&mut seed);
1515            }
1516        }
1517        let mut diagonal = Array1::<f64>::zeros(p);
1518        for j in 0..p {
1519            diagonal[j] = 0.3 + lcg_uniform(&mut seed); // strictly positive
1520        }
1521        let mut row_scale = Array1::<f64>::zeros(n);
1522        for i in 0..n {
1523            row_scale[i] = 0.5 + 1.5 * lcg_uniform(&mut seed); // strictly positive
1524        }
1525        let mut residuals = Array2::<f64>::zeros((n, p));
1526        for i in 0..n {
1527            for j in 0..p {
1528                residuals[[i, j]] = lcg_normal(&mut seed);
1529            }
1530        }
1531
1532        let ev_hoisted =
1533            penalized_log_evidence(residuals.view(), &lambda, &diagonal, &row_scale, rank);
1534        let ev_naive =
1535            naive_penalized_log_evidence(residuals.view(), &lambda, &diagonal, &row_scale, rank);
1536        assert!(
1537            (ev_hoisted - ev_naive).abs() <= 1e-10 * (1.0 + ev_naive.abs()),
1538            "hoisted log-evidence must equal naive rebuild: {ev_hoisted} vs {ev_naive}"
1539        );
1540
1541        let coords_hoisted =
1542            factor_coordinates(&lambda, &diagonal, residuals.view()).expect("coords");
1543        let coords_naive = naive_factor_coordinates(&lambda, &diagonal, residuals.view());
1544        let mut max_abs = 0.0_f64;
1545        for i in 0..n {
1546            for a in 0..rank {
1547                max_abs = max_abs.max((coords_hoisted[[i, a]] - coords_naive[[i, a]]).abs());
1548            }
1549        }
1550        assert!(
1551            max_abs <= 1e-10,
1552            "hoisted factor coordinates must equal naive rebuild; max |Δ| = {max_abs:e}"
1553        );
1554    }
1555
1556    /// FIX A regression: on an uneven-bin synthetic with a KNOWN planted single
1557    /// factor, the low-rank reconstruction ΛΛᵀ + D built from the OCCUPANCY-
1558    /// weighted scale law reconstructs the empirical second moment
1559    /// (1/n) Σ_n r_n r_nᵀ strictly better (Frobenius) than the one built from
1560    /// the bin-UNIFORM scale law. Uses the module's own `scaled_second_moment` /
1561    /// eigen path so it exercises the real (Λ, D | scale) step.
1562    #[test]
1563    fn occupancy_scale_improves_second_moment_reconstruction() {
1564        let n = 4000usize;
1565        let p = 4usize;
1566        let lambda0 = ndarray::array![1.5, 1.2, -0.4, 0.3];
1567        let sigma_eps = 0.2_f64;
1568        let slope = 2.0_f64;
1569        let bins = ACTIVITY_SCALE_BINS.max(1);
1570        let mut seed = 0xCA62C1D6_u64 ^ 0x9B05688C_u64;
1571        let mut residuals = Array2::<f64>::zeros((n, p));
1572        let mut activity = Array1::<f64>::zeros(n);
1573        let mut c_true = Array1::<f64>::zeros(n);
1574        for row in 0..n {
1575            let u = (row as f64) / (n as f64 - 1.0);
1576            let z = u * u * u; // cubic warp ⇒ uneven bin occupancy
1577            activity[row] = z;
1578            let c = (slope * z).exp();
1579            c_true[row] = c;
1580            let amp = c.sqrt();
1581            let f = lcg_normal(&mut seed);
1582            for i in 0..p {
1583                residuals[[row, i]] = amp * lambda0[i] * f + sigma_eps * lcg_normal(&mut seed);
1584            }
1585        }
1586
1587        // Empirical (undeflated) second moment T = (1/n) Σ_n r_n r_nᵀ — the
1588        // object the model's ΛΛᵀ + D must reconstruct.
1589        let mut t = Array2::<f64>::zeros((p, p));
1590        for i in 0..n {
1591            for a in 0..p {
1592                for b in 0..p {
1593                    t[[a, b]] += residuals[[i, a]] * residuals[[i, b]];
1594                }
1595            }
1596        }
1597        t.mapv_inplace(|v| v / n as f64);
1598
1599        let raw_diag = column_variances(residuals.view());
1600        let mean_var = raw_diag.iter().sum::<f64>() / p as f64;
1601        let diag_floor = DIAGONAL_REL_FLOOR * mean_var.max(f64::MIN_POSITIVE);
1602
1603        // Per-bin raw scale law: mean of the true c(z) within each bin.
1604        let row_bin = assign_bins(&activity, bins);
1605        let mut bin_sum = vec![0.0_f64; bins];
1606        let mut bin_cnt = vec![0.0_f64; bins];
1607        for i in 0..n {
1608            bin_sum[row_bin[i]] += c_true[i];
1609            bin_cnt[row_bin[i]] += 1.0;
1610        }
1611        let bin_raw: Vec<f64> = (0..bins)
1612            .map(|b| if bin_cnt[b] > 0.0 { bin_sum[b] / bin_cnt[b] } else { 1.0 })
1613            .collect();
1614
1615        // Occupancy-weighted mean-1 (Fix A) vs bin-uniform mean-1 (old).
1616        let mean_occ = (0..bins).map(|b| bin_cnt[b] * bin_raw[b]).sum::<f64>() / n as f64;
1617        let occupied: Vec<usize> = (0..bins).filter(|&b| bin_cnt[b] > 0.0).collect();
1618        let mean_uni =
1619            occupied.iter().map(|&b| bin_raw[b]).sum::<f64>() / occupied.len() as f64;
1620        let row_scale_occ: Array1<f64> =
1621            (0..n).map(|i| bin_raw[row_bin[i]] / mean_occ).collect();
1622        let row_scale_uni: Array1<f64> =
1623            (0..n).map(|i| bin_raw[row_bin[i]] / mean_uni).collect();
1624
1625        // One (Λ, D | scale) extraction from the deflated moment, mirroring the
1626        // production first sweep, returning the reconstruction ΛΛᵀ + D.
1627        let extract_recon = |row_scale: &Array1<f64>| -> Array2<f64> {
1628            let s = scaled_second_moment(residuals.view(), row_scale);
1629            let (evals, evecs) = symmetric_eig_ascending(&s).expect("eig");
1630            let mean_diag =
1631                raw_diag.iter().map(|&v| v.max(diag_floor)).sum::<f64>() / p as f64;
1632            let col = p - 1;
1633            let amp = (evals[col] - mean_diag).max(0.0).sqrt();
1634            let mut lam = Array1::<f64>::zeros(p);
1635            for j in 0..p {
1636                lam[j] = amp * evecs[[j, col]];
1637            }
1638            let mut recon = Array2::<f64>::zeros((p, p));
1639            for a in 0..p {
1640                for b in 0..p {
1641                    recon[[a, b]] = lam[a] * lam[b];
1642                }
1643            }
1644            for j in 0..p {
1645                let d = (raw_diag[j] - lam[j] * lam[j]).max(diag_floor);
1646                recon[[j, j]] += d;
1647            }
1648            recon
1649        };
1650
1651        let frob = |m: &Array2<f64>| -> f64 {
1652            let mut acc = 0.0_f64;
1653            for a in 0..p {
1654                for b in 0..p {
1655                    let d = m[[a, b]] - t[[a, b]];
1656                    acc += d * d;
1657                }
1658            }
1659            acc.sqrt()
1660        };
1661
1662        let dist_occ = frob(&extract_recon(&row_scale_occ));
1663        let dist_uni = frob(&extract_recon(&row_scale_uni));
1664        assert!(
1665            dist_occ < dist_uni,
1666            "occupancy-weighted reconstruction must beat bin-uniform: \
1667             ‖·‖_F occ = {dist_occ:.6} vs uni = {dist_uni:.6}"
1668        );
1669    }
1670
1671    /// Fit a small structured model on a planted single-factor DGP — shared
1672    /// fixture builder for the producer / damped-metric integration tests.
1673    fn fit_small_model(seed0: u64, lambda0: &Array1<f64>) -> (usize, StructuredResidualModel) {
1674        let n = 300usize;
1675        let p = lambda0.len();
1676        let sigma_eps = 0.25_f64;
1677        let slope = 1.4_f64;
1678        let mut seed = seed0;
1679        let mut residuals = Array2::<f64>::zeros((n, p));
1680        let mut activity = Array1::<f64>::zeros(n);
1681        for row in 0..n {
1682            let u = (row as f64) / (n as f64 - 1.0);
1683            let z = u * u;
1684            activity[row] = z;
1685            let amp = (slope * z).exp().sqrt();
1686            let f = lcg_normal(&mut seed);
1687            for i in 0..p {
1688                residuals[[row, i]] = amp * lambda0[i] * f + sigma_eps * lcg_normal(&mut seed);
1689            }
1690        }
1691        let model = StructuredResidualModel::fit(ResidualFactorInput {
1692            residuals: residuals.view(),
1693            activity: activity.view(),
1694            max_factor_rank: 2,
1695        })
1696        .expect("fit");
1697        (n, model)
1698    }
1699
1700    /// WAVE-2 producer integration (#2021): the WhitenedStructured RowMetric from
1701    /// `row_metric` must deliver the exact Mahalanobis `vᵀ Σ_n^{-1} v` for
1702    /// `Σ_n = c_n·ΛΛᵀ + D` over the fitted occupancy-normalized scale. A
1703    /// deterministic refit reproduces the same metric (the seam `fit_row_metric`
1704    /// relies on).
1705    #[test]
1706    fn row_metric_precision_matches_woodbury_over_fitted_scale() {
1707        let lambda0 = ndarray::array![1.5, 1.2, -0.4, 0.3];
1708        let (n, model) = fit_small_model(0x14057B7EF767814F_u64, &lambda0);
1709        let p = 4usize;
1710        assert!(model.factor_rank() >= 1, "need a factor for a non-trivial Σ_n");
1711
1712        let metric = model.row_metric(n).expect("row_metric");
1713        assert!(
1714            metric.whitens_likelihood(),
1715            "WhitenedStructured metric must whiten the likelihood"
1716        );
1717
1718        let rank = model.factor_rank();
1719        let lam = model.factor();
1720        let diag = model.diagonal();
1721        let v: Array1<f64> = ndarray::array![0.7, -1.3, 0.4, 0.9];
1722
1723        for &row in &[0usize, n / 3, n / 2, n - 1] {
1724            let c = model.row_scale()[row];
1725            let mut sigma = Array2::<f64>::zeros((p, p));
1726            for a in 0..p {
1727                for b in 0..p {
1728                    let mut fac = 0.0_f64;
1729                    for k in 0..rank {
1730                        fac += lam[[a, k]] * lam[[b, k]];
1731                    }
1732                    sigma[[a, b]] = c * fac;
1733                }
1734                sigma[[a, a]] += diag[a];
1735            }
1736            let chol = sigma.cholesky(Side::Lower).expect("Σ_n PD");
1737            let x = chol.solvevec(&v);
1738            let mahal_dense: f64 = v.iter().zip(x.iter()).map(|(a, b)| a * b).sum();
1739            let mahal_metric = metric.quad_form(row, v.view());
1740            assert!(
1741                (mahal_dense - mahal_metric).abs() <= 1e-8 * (1.0 + mahal_dense.abs()),
1742                "row {row}: metric quad_form {mahal_metric} must equal dense vᵀΣ⁻¹v {mahal_dense}"
1743            );
1744        }
1745
1746        // Deterministic refit reproduces the identical metric (fit_row_metric seam).
1747        let (n2, model2) = fit_small_model(0x14057B7EF767814F_u64, &lambda0);
1748        assert_eq!(n2, n);
1749        let metric_again = model2.row_metric(n2).expect("row_metric again");
1750        for &row in &[0usize, n / 2, n - 1] {
1751            let q1 = metric.quad_form(row, v.view());
1752            let q2 = metric_again.quad_form(row, v.view());
1753            assert!(
1754                (q1 - q2).abs() <= 1e-12 * (1.0 + q1.abs()),
1755                "deterministic refit must match at row {row}: {q2} vs {q1}"
1756            );
1757        }
1758    }
1759
1760    /// WAVE-2 #2021 damped metric endpoint contracts: γ=1 ≡ row_metric (ignores
1761    /// prev), γ=0 ≡ prev.row_metric (or Euclidean identity), 0<γ<1 is SPD, and
1762    /// out-of-range / non-finite γ is rejected.
1763    #[test]
1764    fn row_metric_damped_endpoints() {
1765        let lambda_a = ndarray::array![1.5, 1.2, -0.4, 0.3];
1766        let lambda_b = ndarray::array![-0.6, 1.1, 0.9, -1.3];
1767        let (n, model) = fit_small_model(0x51ED270B_u64 ^ 0xF3A5C7D1_u64, &lambda_a);
1768        let (n_prev, prev) = fit_small_model(0x2545F491_u64 ^ 0x4F6CDD1D_u64, &lambda_b);
1769        assert_eq!(n, n_prev);
1770        let p = 4usize;
1771        let v: Array1<f64> = ndarray::array![0.7, -1.3, 0.4, 0.9];
1772
1773        // γ = 1 ⇒ byte-identical to this model's row_metric, regardless of prev.
1774        let base = model.row_metric(n).expect("row_metric");
1775        for prev_opt in [None, Some(&prev)] {
1776            let damped = model.row_metric_damped(n, 1.0, prev_opt).expect("damped γ=1");
1777            for row in [0usize, n / 2, n - 1] {
1778                for i in 0..p {
1779                    for k in 0..p {
1780                        assert_eq!(
1781                            damped.factor_entry(row, i, k),
1782                            base.factor_entry(row, i, k),
1783                            "γ=1 must be byte-identical to row_metric at ({row},{i},{k})"
1784                        );
1785                    }
1786                }
1787            }
1788        }
1789
1790        // γ = 0, prev = None ⇒ Euclidean identity: quad_form = ‖v‖².
1791        let ident = model.row_metric_damped(n, 0.0, None).expect("damped γ=0 None");
1792        let sumsq: f64 = v.iter().map(|x| x * x).sum();
1793        for row in [0usize, n / 2, n - 1] {
1794            let q = ident.quad_form(row, v.view());
1795            assert!(
1796                (q - sumsq).abs() <= 1e-12 * (1.0 + sumsq),
1797                "γ=0/None must be the identity metric: quad_form {q} vs ‖v‖² {sumsq}"
1798            );
1799        }
1800
1801        // γ = 0, prev = Some ⇒ byte-identical to prev.row_metric.
1802        let prev_metric = prev.row_metric(n).expect("prev row_metric");
1803        let damped0 = model.row_metric_damped(n, 0.0, Some(&prev)).expect("damped γ=0 Some");
1804        for row in [0usize, n / 2, n - 1] {
1805            for i in 0..p {
1806                for k in 0..p {
1807                    assert_eq!(
1808                        damped0.factor_entry(row, i, k),
1809                        prev_metric.factor_entry(row, i, k),
1810                        "γ=0/Some must be byte-identical to prev.row_metric at ({row},{i},{k})"
1811                    );
1812                }
1813            }
1814        }
1815
1816        // 0 < γ < 1 ⇒ valid SPD metric.
1817        let mid = model.row_metric_damped(n, 0.5, Some(&prev)).expect("damped γ=0.5");
1818        for row in [0usize, n / 2, n - 1] {
1819            let q = mid.quad_form(row, v.view());
1820            assert!(q.is_finite() && q > 0.0, "γ=0.5 metric must be SPD; got {q}");
1821        }
1822
1823        // Invalid γ rejected.
1824        assert!(model.row_metric_damped(n, 1.5, None).is_err());
1825        assert!(model.row_metric_damped(n, -0.1, None).is_err());
1826        assert!(model.row_metric_damped(n, f64::NAN, None).is_err());
1827    }
1828
1829    /// WAVE-2 #2021 Λ nursery→promotion: `promotion_candidates` must fire only
1830    /// for a factor that BOTH persists across passes (aligns with the previous
1831    /// model's Λ) AND clears the idiosyncratic-noise energy floor; a fresh
1832    /// orthogonal direction, an over-high energy floor, and `prev = None` all
1833    /// yield no candidates, and out-of-range gates are rejected.
1834    #[test]
1835    fn promotion_candidates_gates_on_persistence_and_energy() {
1836        let lambda_a = ndarray::array![1.5, 1.2, -0.4, 0.3];
1837        let lambda_b = ndarray::array![-0.6, 1.1, 0.9, -1.3];
1838        // Same planted direction across two passes ⇒ persistent.
1839        let (_, prev) = fit_small_model(0xA1B2C3D4_u64 ^ 0x0F0F0F0F_u64, &lambda_a);
1840        let (_, cur) = fit_small_model(0x5566778899AABBCC_u64, &lambda_a);
1841        // A different (well-separated) planted direction ⇒ NOT aligned with cur.
1842        let (_, other) = fit_small_model(0x1122334455667788_u64, &lambda_b);
1843
1844        assert!(prev.factor_rank() >= 1 && cur.factor_rank() >= 1 && other.factor_rank() >= 1);
1845
1846        // Persistent + energetic ⇒ at least one candidate, aligned with the
1847        // planted direction and above the noise floor.
1848        let cands = cur
1849            .promotion_candidates(Some(&prev), 0.9, 1.0)
1850            .expect("promotion_candidates");
1851        assert!(
1852            !cands.is_empty(),
1853            "a persistent, energetic factor must yield a promotion candidate"
1854        );
1855        let top = &cands[0];
1856        assert!(
1857            top.persistence_alignment >= 0.9,
1858            "top candidate must clear the alignment gate; got {}",
1859            top.persistence_alignment
1860        );
1861        // The promoted unit direction must align with the planted (unit) lambda_a.
1862        let la_norm = lambda_a.dot(&lambda_a).sqrt();
1863        let la_unit = lambda_a.mapv(|v| v / la_norm);
1864        let dir_cos = top.direction.dot(&la_unit).abs();
1865        assert!(
1866            dir_cos > 0.9,
1867            "promoted direction must recover the planted factor; |cos| = {dir_cos:.4}"
1868        );
1869        assert!(
1870            (top.direction.dot(&top.direction) - 1.0).abs() < 1e-10,
1871            "promoted direction must be unit-norm"
1872        );
1873        assert!(top.energy > 0.0);
1874
1875        // A fresh, well-separated direction does NOT persist ⇒ no candidate at 0.9.
1876        let cross = cur
1877            .promotion_candidates(Some(&other), 0.9, 1.0)
1878            .expect("promotion_candidates cross");
1879        assert!(
1880            cross.is_empty(),
1881            "a non-persistent (unaligned) factor must not be promoted; got {} candidate(s)",
1882            cross.len()
1883        );
1884
1885        // An over-high energy floor rejects even the persistent factor.
1886        let floored = cur
1887            .promotion_candidates(Some(&prev), 0.9, 1.0e6)
1888            .expect("promotion_candidates floored");
1889        assert!(
1890            floored.is_empty(),
1891            "energy floor must gate out factors below the noise-scaled threshold"
1892        );
1893
1894        // prev = None (first structured pass, damping toward I) ⇒ no candidates.
1895        assert!(cur.promotion_candidates(None, 0.9, 1.0).unwrap().is_empty());
1896
1897        // Invalid gates rejected.
1898        assert!(cur.promotion_candidates(Some(&prev), 1.5, 1.0).is_err());
1899        assert!(cur.promotion_candidates(Some(&prev), -0.1, 1.0).is_err());
1900        assert!(cur.promotion_candidates(Some(&prev), 0.9, -1.0).is_err());
1901        assert!(cur.promotion_candidates(Some(&prev), f64::NAN, 1.0).is_err());
1902    }
1903}