Skip to main content

gam_problem/
row_metric.rs

1//! `RowMetric` — the single provenance-carrying per-row inner product shared by
2//! the SAE-manifold **likelihood** (residual whitening) and the **gauge**
3//! (isometry pullback weight).
4//!
5//! # Why this exists
6//!
7//! The SAE-manifold machine historically carried *two* independent inner
8//! products:
9//!
10//! * the **likelihood** measured reconstruction residuals isotropically — a
11//!   single scalar dispersion `φ̂ = RSS / residual-dof`, the data-fit loop
12//!   summing the bare `½ rᵀr`; there was no per-row metric at all; and
13//! * the **gauge** carried its own per-row metric in
14//!   `IsometryPenalty.weight: WeightField` — a low-rank `W_n = U_n U_nᵀ`
15//!   pullback `g_n = J_nᵀ W_n J_n`, settable independently of anything the
16//!   likelihood saw.
17//!
18//! Nothing structurally forced "the metric the likelihood whitens by" to equal
19//! "the metric the gauge pulls back through". That is exactly the
20//! objective↔gradient-desync bug class wearing geometry clothing: a
21//! likelihood-metric ≠ gauge-metric state was *representable*.
22//!
23//! `RowMetric` collapses the two into one object. The likelihood whitens
24//! through it; the gauge `WeightField` is *constructed from* it. A
25//! divergent-metric state is therefore unrepresentable — there is only one
26//! per-row factor stack `U_n`, with one [`MetricProvenance`] tag.
27//!
28//! # Magic-by-default selector
29//!
30//! There is no flag. The provenance is chosen by whether per-row Fisher factors
31//! exist:
32//!
33//! * no factors supplied ⇒ [`MetricProvenance::Euclidean`]; `W_n = I_p`;
34//!   whitening is the identity, so `φ̂` and the data-fit loop are
35//!   **bit-for-bit** the prior isotropic path; and
36//! * per-row Fisher factors supplied ⇒ [`MetricProvenance::OutputFisher`]; the
37//!   residual is whitened by `U_nᵀ` and the gauge pulls back through the same
38//!   `U_n`.
39//!
40//! # Validation
41//!
42//! Every metric block is constructed **through**
43//! [`crate::normalize_fisher_rao_blocks`], which
44//! broadcasts and eigenvalue-validates PSD-ness. `RowMetric` does not
45//! reimplement that validation; it materializes `W_n = U_n U_nᵀ` (which is PSD
46//! by construction) and runs it through the shared normalizer as the
47//! single point of truth for "is this a valid precision metric".
48//!
49//! Any rank floor used to make a block invertible for an internal solve is
50//! **solver-only** (mirroring `RidgePolicy::solver_only`, #747): it never enters
51//! the residual the objective sums, so `δ` cannot bias the criterion.
52
53use ndarray::{Array2, Array3, ArrayView1};
54use std::sync::Arc;
55
56use crate::normalize_fisher_rao_blocks;
57
58/// Per-observation behavioral-metric field `W_n ∈ ℝ^{p × p}`, stored in
59/// **low-rank factored form** `W_n = U_n U_n^T` with `U_n ∈ ℝ^{p × r_n}`.
60///
61/// The canonical coordinate is the one where one unit of motion in `t` is one
62/// unit of behavioral change in the output space, so the `W_n` weighting is
63/// load-bearing: the pullback metric is `g_n = J_n^T W_n J_n`. Storing as
64/// `U_n` lets every contraction in this module run in
65/// `(J^T U_n)(U_n^T J)` order, which is `O(p · r · d + r · d²)` per row — we
66/// **never** materialize the `p × p` `W_n`, which is essential when `p`
67/// (number of observation channels) is large but rank is small (e.g. one or
68/// two behavioral dimensions per latent observation).
69///
70/// `Identity` is the gauge-fix default and corresponds to `U_n = I_p` so the
71/// pullback reduces to the standard `J_n^T J_n`. `Factored` stores the
72/// per-row `U_n` blocks contiguously: every row's factor is `p × rank`, and
73/// rows may share the same rank (uniform-rank case) or vary if the field is
74/// data-driven. For the uniform-rank case the storage is
75/// `(n_obs, p * rank)` row-major.
76#[derive(Clone)]
77pub enum WeightField {
78    /// `W_n = I_p` for every `n`. Reduces to the bare pullback `J^T J`.
79    Identity,
80    /// Per-row low-rank factor `U_n ∈ ℝ^{p × rank}`. Storage layout: a
81    /// `(n_obs, p * rank)` row-major matrix where row `n` packs `U_n` in
82    /// column-major-within-row order `U_n[i, k] = u[n, i * rank + k]`.
83    Factored {
84        u: Arc<Array2<f64>>,
85        rank: usize,
86        p_out: usize,
87    },
88}
89
90impl std::fmt::Debug for WeightField {
91    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92        match self {
93            WeightField::Identity => f.write_str("Identity"),
94            WeightField::Factored { u, rank, p_out } => f
95                .debug_struct("Factored")
96                .field("shape", &format_args!("{}×{}", u.nrows(), u.ncols()))
97                .field("rank", rank)
98                .field("p_out", p_out)
99                .finish(),
100        }
101    }
102}
103
104impl WeightField {
105    /// Apply `U_n^T J_n` for a specific row, given both the row's `J_n` flat
106    /// `(p * d)` slice and the row's `U_n` flat `(p * rank)` slice. Returns
107    /// the `(rank × d)` matrix and its row count.
108    pub fn project_jac_row_with_u(
109        u_row: &[f64],
110        jac_row: &[f64],
111        p: usize,
112        rank: usize,
113        d: usize,
114    ) -> Array2<f64> {
115        // M[k, a] = Σ_i U[i, k] · J[i, a].
116        let mut m = Array2::<f64>::zeros((rank, d));
117        for k in 0..rank {
118            for a in 0..d {
119                let mut s = 0.0;
120                for i in 0..p {
121                    s += u_row[i * rank + k] * jac_row[i * d + a];
122                }
123                m[[k, a]] = s;
124            }
125        }
126        m
127    }
128}
129
130/// Where the per-row metric came from — the provenance that makes
131/// "likelihood-metric ≠ gauge-metric" diagnosable instead of silent.
132///
133/// Object 4 (the gauge object) reads this to certify which inner product the
134/// fit actually used; #974 fills [`MetricProvenance::WhitenedStructured`] with a
135/// factor-analytic residual-covariance whitening.
136#[derive(Clone, Copy, PartialEq, Eq, Debug)]
137pub enum MetricProvenance {
138    /// `M_n = I_p` for every row. The likelihood is isotropic and the gauge
139    /// pullback reduces to the bare `J_nᵀ J_n`. This is the default and is
140    /// bit-for-bit the historical isotropic-`φ̂` path.
141    Euclidean,
142    /// `M_n = U_n U_nᵀ (+ solver-only δI)` from supplied per-row output-Fisher
143    /// factors `U_n ∈ ℝ^{p × rank}`. The canonical "one unit of latent motion ↦
144    /// one unit of behavioral change" metric: residuals are whitened in the
145    /// output-Fisher inner product and the gauge pulls back through the same
146    /// factors. The `rank` is carried in the provenance so a consumer (Object 4)
147    /// can certify the factor rank that produced the inner product.
148    OutputFisher { rank: usize },
149    /// `M_n = U_n U_nᵀ` from per-row output-Fisher factors that aggregate the
150    /// **downstream** influence of position `n` over future positions through
151    /// the KV path, rather than the same-position logits of
152    /// [`MetricProvenance::OutputFisher`] (#980, mechanism 2).
153    ///
154    /// The same-position pullback `∂logits_t/∂x_t` can be ≈ 0 for a feature
155    /// whose entire causal effect lands many tokens later (information carried
156    /// forward through attention); a gauge built on it is blind to exactly that
157    /// content. This provenance is the forward-looking alternative: each row's
158    /// factor `U_n` is the top-`rank` factorization of the aggregated output
159    /// Fisher `Σ_{t ≥ n} (∂logits_t/∂x_n)ᵀ F_t (∂logits_t/∂x_n)` over future
160    /// positions the residual stream at `n` reaches. It is provenance-generic:
161    /// it whitens nothing ([`Self::whitens_likelihood`] is `false`, like
162    /// [`MetricProvenance::OutputFisher`]) and drives the gauge / lens /
163    /// enrichment unchanged ([`Self::is_output_fisher_like`]). The lens/gauge
164    /// machinery consumes it identically; only the *scientific* reading
165    /// changes — dormant-feature detection becomes forward-looking (a feature
166    /// driving far-future tokens now registers behavioral coupling that the
167    /// same-position metric reported as ≈ 0).
168    OutputFisherDownstream { rank: usize },
169    /// Structured-residual whitening: `M_n = Σ_n^{-1}` from the **estimated**
170    /// factor-analytic residual covariance `Σ_n = Λ c(z_n) Λᵀ + D` (#974), with
171    /// `factor_rank` the selected factor count. Produced by
172    /// Structured-residual producers materialize this provenance when they fit
173    /// a residual-covariance whitening model;
174    /// the only provenance for which
175    /// [`whitens_likelihood`](RowMetric::whitens_likelihood) is `true`. It
176    /// carries the same low-rank factor layout as
177    /// [`MetricProvenance::OutputFisher`].
178    WhitenedStructured { factor_rank: usize },
179}
180
181/// The single per-row metric object. Holds one low-rank factor stack `U_n` (or
182/// none, for Euclidean) plus the validated PSD blocks, tagged with its
183/// [`MetricProvenance`].
184///
185/// `p` is the output dimensionality (residual / Jacobian-column dimension); the
186/// per-row factor `U_n ∈ ℝ^{p × rank}` so `W_n = U_n U_nᵀ ∈ ℝ^{p × p}` without
187/// ever being materialized as `p × p` in any hot path.
188#[derive(Clone, Debug)]
189pub struct RowMetric {
190    provenance: MetricProvenance,
191    n_rows: usize,
192    p: usize,
193    rank: usize,
194    /// `(n_rows, p * rank)` row-major: `U_n[i, k] = u[n, i * rank + k]`. `None`
195    /// for [`MetricProvenance::Euclidean`] (the identity factor is implicit).
196    factors: Option<Arc<Array2<f64>>>,
197    /// **Solver-only** Tikhonov floor `δ` added as `δ I_p` to make a
198    /// rank-deficient `U_n U_nᵀ` invertible for an *internal solve only*.
199    ///
200    /// Invariant (mirrors `RidgePolicy::solver_only`, #747): `δ` **never** enters
201    /// any quantity that feeds the evidence criterion. The criterion-facing
202    /// quad-form / whitening / fisher-mass methods all use the *un-floored*
203    /// `U_n U_nᵀ`; only [`Self::solve_floor`]-tagged solver helpers see `δ`. A
204    /// nonzero floor therefore cannot bias the objective the optimizer reports.
205    solver_delta: f64,
206    /// Per-row traces `tr(M_n)` of the criterion-facing (un-floored) metric.
207    ///
208    /// This is the only dense-block reduction any consumer reads (the #980
209    /// Fisher-mass row measure); the `(n_rows, p, p)` block stack itself is
210    /// validated **streamingly** at construction through
211    /// [`normalize_fisher_rao_blocks`] one row at a time and then dropped.
212    /// Retaining it was `n·p²·8` bytes — 13 GiB at `(n=2000, p=896)` and an
213    /// OOM at LLM-scale `p` — for a record nothing ever re-read. The solver
214    /// `δ` is deliberately *not* baked in here, so this is the
215    /// criterion-facing trace.
216    traces: ndarray::Array1<f64>,
217}
218
219impl RowMetric {
220    /// Euclidean metric: `W_n = I_p` for all `n`. Whitening is the identity, so
221    /// the likelihood residual path is bit-for-bit the prior isotropic `φ̂`.
222    ///
223    /// Constructed directly: the identity stack is PSD axiomatically, so
224    /// routing it through the dense normalizer would materialize and
225    /// spectrum-check `n` identity blocks (`n·p²` memory, `n·p³` flops) to
226    /// validate a tautology. `tr(I_p) = p` per row.
227    pub fn euclidean(n_rows: usize, p: usize) -> Result<Self, String> {
228        Ok(Self {
229            provenance: MetricProvenance::Euclidean,
230            n_rows,
231            p,
232            rank: p,
233            factors: None,
234            solver_delta: 0.0,
235            traces: ndarray::Array1::<f64>::from_elem(n_rows, p as f64),
236        })
237    }
238
239    /// Output-Fisher metric: per-row low-rank factors `U_n ∈ ℝ^{p × rank}`
240    /// supplied as a `(n_rows, p * rank)` row-major matrix (`U_n[i, k] =
241    /// u[n, i * rank + k]`). The induced `M_n = U_n U_nᵀ` is PSD by
242    /// construction; it is validated through [`normalize_fisher_rao_blocks`] so
243    /// the validation path is shared. No solver floor (`δ = 0`).
244    pub fn output_fisher(u: Arc<Array2<f64>>, p: usize, rank: usize) -> Result<Self, String> {
245        Self::from_factors(MetricProvenance::OutputFisher { rank }, u, p, rank, 0.0)
246    }
247
248    /// Downstream-influence output-Fisher metric: per-row factors `U_n ∈
249    /// ℝ^{p × rank}` whose `M_n = U_n U_nᵀ` is the aggregated output Fisher of
250    /// position `n` over the **future** positions it reaches through the KV path
251    /// ([`MetricProvenance::OutputFisherDownstream`], #980 mechanism 2). The
252    /// factor layout is identical to [`Self::output_fisher`]; only the
253    /// provenance tag (and hence the scientific reading) differs. Whitens
254    /// nothing, drives the gauge / lens / enrichment exactly as the
255    /// same-position metric does — the consuming machinery is provenance-generic
256    /// (see [`Self::is_output_fisher_like`]).
257    pub fn output_fisher_downstream(
258        u: Arc<Array2<f64>>,
259        p: usize,
260        rank: usize,
261    ) -> Result<Self, String> {
262        Self::from_factors(
263            MetricProvenance::OutputFisherDownstream { rank },
264            u,
265            p,
266            rank,
267            0.0,
268        )
269    }
270
271    /// Like [`Self::output_fisher`] but with a **solver-only** Tikhonov floor
272    /// `δ ≥ 0`. The floor is recorded for solver helpers only; every
273    /// criterion-facing method (`quad_form`, `whiten_residual`, `fisher_mass`)
274    /// ignores it (#747 discipline), so the evidence criterion is `δ`-free.
275    pub fn output_fisher_with_solver_floor(
276        u: Arc<Array2<f64>>,
277        p: usize,
278        rank: usize,
279        solver_delta: f64,
280    ) -> Result<Self, String> {
281        if !(solver_delta.is_finite() && solver_delta >= 0.0) {
282            return Err(format!(
283                "RowMetric::output_fisher_with_solver_floor: solver_delta must be finite and \
284                 non-negative; got {solver_delta}"
285            ));
286        }
287        Self::from_factors(
288            MetricProvenance::OutputFisher { rank },
289            u,
290            p,
291            rank,
292            solver_delta,
293        )
294    }
295
296    /// Structured-residual whitening from supplied per-row precision factors.
297    ///
298    /// `u` carries the per-row factor stack `U_n ∈ ℝ^{p × rank}` (row-major flat)
299    /// with `U_n U_nᵀ = M_n = Σ_n^{-1}` — the precision of the **estimated**
300    /// residual-covariance noise model. This is the low-level constructor; #974
301    /// producers that *fit* `Σ_n` (a low-rank factor + diagonal + smooth
302    /// activity-scale) assemble these factors and call through here. Because the
303    /// provenance is
304    /// [`MetricProvenance::WhitenedStructured`], [`Self::whitens_likelihood`] is
305    /// `true`: a metric built this way is the first that whitens the likelihood.
306    pub fn whitened_structured(u: Arc<Array2<f64>>, p: usize, rank: usize) -> Result<Self, String> {
307        Self::from_factors(
308            MetricProvenance::WhitenedStructured { factor_rank: rank },
309            u,
310            p,
311            rank,
312            0.0,
313        )
314    }
315
316    fn from_factors(
317        provenance: MetricProvenance,
318        u: Arc<Array2<f64>>,
319        p: usize,
320        rank: usize,
321        solver_delta: f64,
322    ) -> Result<Self, String> {
323        let n_rows = u.nrows();
324        if u.ncols() != p * rank {
325            return Err(format!(
326                "RowMetric::from_factors: factor matrix has {} cols; expected p*rank = {}*{} = {}",
327                u.ncols(),
328                p,
329                rank,
330                p * rank
331            ));
332        }
333        if !u.iter().all(|v| v.is_finite()) {
334            return Err("RowMetric::from_factors: factors must be finite".to_string());
335        }
336        // Materialize W_n = U_n U_nᵀ one row at a time (PSD by construction),
337        // validate each through the single shared normalizer rather than
338        // reimplementing the PSD check, record its trace, and drop the block.
339        // Streaming keeps construction O(p²) memory; the former whole-stack
340        // materialization retained `n·p²` doubles nothing ever re-read.
341        let mut traces = ndarray::Array1::<f64>::zeros(n_rows);
342        let mut full = Array3::<f64>::zeros((1, p, p));
343        for row in 0..n_rows {
344            for i in 0..p {
345                for j in 0..p {
346                    let mut acc = 0.0;
347                    for k in 0..rank {
348                        acc += u[[row, i * rank + k]] * u[[row, j * rank + k]];
349                    }
350                    full[[0, i, j]] = acc;
351                }
352            }
353            normalize_fisher_rao_blocks(full.view().into_dyn(), 1, p)
354                .map_err(|e| format!("RowMetric::from_factors: row {row}: {e}"))?;
355            let mut tr = 0.0_f64;
356            for i in 0..p {
357                tr += full[[0, i, i]];
358            }
359            traces[row] = tr;
360        }
361        Ok(Self {
362            provenance,
363            n_rows,
364            p,
365            rank,
366            factors: Some(u),
367            solver_delta,
368            traces,
369        })
370    }
371
372    /// The provenance tag (consumed by Object 4 to certify the inner product).
373    pub fn provenance(&self) -> MetricProvenance {
374        self.provenance
375    }
376
377    /// Whether this metric is allowed to **whiten the likelihood** (i.e. replace
378    /// the isotropic reconstruction data-fit `½ rᵀr` with the whitened
379    /// `½ rᵀ M_n r`).
380    ///
381    /// This is TRUE **only** for [`MetricProvenance::WhitenedStructured`] — a
382    /// genuinely *estimated noise model* (a factor-analytic residual covariance,
383    /// #974), for which whitening the likelihood is the statistically correct
384    /// thing to do. For [`MetricProvenance::Euclidean`] there is nothing to
385    /// whiten by, and for [`MetricProvenance::OutputFisher`] the inner product is
386    /// an **output-geometry gauge**, not an estimated noise model — whitening the
387    /// likelihood by it would silently replace the reconstruction loss with a
388    /// Fisher pullback (the #980 failure mode). So both leave the likelihood
389    /// untouched and only the gauge sees the metric (see [`Self::drives_gauge`]).
390    pub fn whitens_likelihood(&self) -> bool {
391        matches!(self.provenance, MetricProvenance::WhitenedStructured { .. })
392    }
393
394    /// Whether this metric **drives the gauge** — i.e. the isometry-penalty
395    /// pullback weight is taken from it rather than the identity.
396    ///
397    /// TRUE for any non-[`MetricProvenance::Euclidean`] provenance: both
398    /// [`MetricProvenance::OutputFisher`] and
399    /// [`MetricProvenance::WhitenedStructured`] supply a non-identity per-row
400    /// inner product the gauge pulls back through. Euclidean reduces the gauge
401    /// pullback to the bare `J_nᵀ J_n`, so it does not drive the gauge.
402    pub fn drives_gauge(&self) -> bool {
403        !matches!(self.provenance, MetricProvenance::Euclidean)
404    }
405
406    /// Whether this metric is an **output-Fisher gauge** — either the
407    /// same-position [`MetricProvenance::OutputFisher`] or the downstream
408    /// [`MetricProvenance::OutputFisherDownstream`] (#980). The two share every
409    /// consumer behavior (Sym(F) separation under the gauge, two-lens coupling,
410    /// steering geometry, enrichment); they differ only in the *scientific*
411    /// reading of what behavioral coupling means (same-position vs
412    /// forward-looking). Consumers that gate on "is this an output-Fisher
413    /// pullback" should use this predicate rather than matching one variant, so
414    /// the downstream metric rides the identical path.
415    pub fn is_output_fisher_like(&self) -> bool {
416        matches!(
417            self.provenance,
418            MetricProvenance::OutputFisher { .. } | MetricProvenance::OutputFisherDownstream { .. }
419        )
420    }
421
422    /// Number of rows the metric is defined over.
423    pub fn n_rows(&self) -> usize {
424        self.n_rows
425    }
426
427    /// Output dimensionality `p` (residual / Jacobian-column dimension).
428    pub fn p_out(&self) -> usize {
429        self.p
430    }
431
432    /// The factor rank: the dimension of the whitened residual
433    /// [`Self::whiten_residual_row`] returns (and the column count of the per-row
434    /// factor `U_n ∈ ℝ^{p × rank}`). For [`MetricProvenance::Euclidean`] this is
435    /// `p` (the implicit identity factor), so a consumer that sizes a whitened
436    /// buffer by `metric_rank()` gets the right length in every provenance.
437    pub fn metric_rank(&self) -> usize {
438        self.rank
439    }
440
441    /// Per-row traces `tr(M_n)` of the criterion-facing (un-floored) metric —
442    /// the Fisher-mass reduction the #980 row measure consumes. The dense
443    /// `(n_rows, p, p)` stack is validated streamingly at construction and
444    /// never retained; consumers wanting an explicit `W_n` rebuild it from
445    /// [`Self::metric_rank`]-sized factors.
446    pub fn row_traces(&self) -> ndarray::ArrayView1<'_, f64> {
447        self.traces.view()
448    }
449
450    /// Whiten a single `p`-dimensional residual row `r` into the coordinates
451    /// whose squared Euclidean norm equals `rᵀ W_n r`.
452    ///
453    /// * Euclidean: returns `r` unchanged (`‖r‖² = rᵀ I r`), so the likelihood
454    ///   reproduces the isotropic `½ rᵀr` data-fit bit-for-bit.
455    /// * Factored: returns `U_nᵀ r ∈ ℝ^{rank}`, with
456    ///   `‖U_nᵀ r‖² = rᵀ U_n U_nᵀ r = rᵀ W_n r`.
457    ///
458    /// This is the load-bearing identity that lets the data-fit loop sum
459    /// `0.5 * Σ whitened²` and recover exactly `rᵀ W_n r` whatever the
460    /// provenance.
461    pub fn whiten_residual_row(&self, row: usize, r: ArrayView1<'_, f64>) -> Vec<f64> {
462        match &self.factors {
463            None => r.iter().copied().collect(),
464            Some(u) => {
465                let mut out = vec![0.0_f64; self.rank];
466                for k in 0..self.rank {
467                    let mut acc = 0.0;
468                    for i in 0..self.p {
469                        acc += u[[row, i * self.rank + k]] * r[i];
470                    }
471                    out[k] = acc;
472                }
473                out
474            }
475        }
476    }
477
478    /// The factor entry `U_n[i, k]` for one row (`i ∈ [0, p)`, `k ∈ [0, rank)`).
479    /// For [`MetricProvenance::Euclidean`] the implicit factor is `I_p`, so this
480    /// returns `1.0` when `i == k` and `0.0` otherwise — letting a consumer that
481    /// whitens a Jacobian via `factor_entry` produce the identity whitening
482    /// without a provenance branch. Reads the **un-floored** factors (criterion
483    /// face, #747).
484    #[inline]
485    pub fn factor_entry(&self, row: usize, i: usize, k: usize) -> f64 {
486        match &self.factors {
487            None => {
488                if i == k {
489                    1.0
490                } else {
491                    0.0
492                }
493            }
494            Some(u) => u[[row, i * self.rank + k]],
495        }
496    }
497
498    /// Apply the full per-row metric `M_n x = U_n (U_nᵀ x) ∈ ℝ^p` for one
499    /// `p`-vector `x`, formed factored (`rank` flops in, `p` flops out) — never
500    /// materializing `M_n` as `p × p`. Euclidean returns `x` unchanged
501    /// (`M_n = I_p`). This is the p-space metric-applied vector the SAE β-tier
502    /// data-fit gradient contracts (β lives in p-output space, so its gradient
503    /// needs `M_n r_n`, not the rank-space whitened residual `U_nᵀ r_n`). Uses the
504    /// **un-floored** factors (criterion face, `δ`-free, #747 invariant).
505    pub fn apply_metric_row(&self, row: usize, x: ArrayView1<'_, f64>) -> Vec<f64> {
506        match &self.factors {
507            None => x.iter().copied().collect(),
508            Some(u) => {
509                // w = U_nᵀ x ∈ ℝ^{rank}.
510                let mut w = vec![0.0_f64; self.rank];
511                for k in 0..self.rank {
512                    let mut acc = 0.0;
513                    for i in 0..self.p {
514                        acc += u[[row, i * self.rank + k]] * x[i];
515                    }
516                    w[k] = acc;
517                }
518                // out = U_n w ∈ ℝ^p.
519                let mut out = vec![0.0_f64; self.p];
520                for i in 0..self.p {
521                    let mut acc = 0.0;
522                    for k in 0..self.rank {
523                        acc += u[[row, i * self.rank + k]] * w[k];
524                    }
525                    out[i] = acc;
526                }
527                out
528            }
529        }
530    }
531
532    /// Pullback metric `g_n = J_nᵀ W_n J_n` for one row, formed as
533    /// `(J_nᵀ U_n)(U_nᵀ J_n)` — never materializing the `p × p` `W_n`.
534    ///
535    /// `j_row` is the row's Jacobian `J_n ∈ ℝ^{p × d}` flattened row-major
536    /// (`J_n[i, a] = j_row[i * d + a]`). Returns the `d × d` `g_n`.
537    pub fn pullback(&self, row: usize, j_row: &[f64], d: usize) -> Array2<f64> {
538        match &self.factors {
539            None => {
540                // W_n = I_p ⇒ g_n = J_nᵀ J_n.
541                let mut g = Array2::<f64>::zeros((d, d));
542                for a in 0..d {
543                    for b in a..d {
544                        let mut acc = 0.0;
545                        for i in 0..self.p {
546                            acc += j_row[i * d + a] * j_row[i * d + b];
547                        }
548                        g[[a, b]] = acc;
549                        g[[b, a]] = acc;
550                    }
551                }
552                g
553            }
554            Some(u) => {
555                // M_n = U_nᵀ J_n ∈ ℝ^{rank × d}; g_n = M_nᵀ M_n.
556                let mut m = Array2::<f64>::zeros((self.rank, d));
557                for k in 0..self.rank {
558                    for a in 0..d {
559                        let mut acc = 0.0;
560                        for i in 0..self.p {
561                            acc += u[[row, i * self.rank + k]] * j_row[i * d + a];
562                        }
563                        m[[k, a]] = acc;
564                    }
565                }
566                let mut g = Array2::<f64>::zeros((d, d));
567                for a in 0..d {
568                    for b in a..d {
569                        let mut acc = 0.0;
570                        for k in 0..self.rank {
571                            acc += m[[k, a]] * m[[k, b]];
572                        }
573                        g[[a, b]] = acc;
574                        g[[b, a]] = acc;
575                    }
576                }
577                g
578            }
579        }
580    }
581
582    /// Quadratic form `r_nᵀ M_n r_n` for one row's residual `r_n ∈ ℝ^p`, formed
583    /// **factored** as `‖U_nᵀ r_n‖²` — never materializing the `p × p` `M_n`.
584    ///
585    /// This is the criterion-facing squared residual the likelihood sums; it uses
586    /// the **un-floored** `U_n U_nᵀ`, so the solver `δ` does not enter it
587    /// (#747 invariant). Euclidean provenance returns the bit-identical `‖r_n‖²`.
588    #[inline]
589    pub fn quad_form(&self, row: usize, r: ArrayView1<'_, f64>) -> f64 {
590        match &self.factors {
591            None => r.iter().map(|&v| v * v).sum(),
592            Some(_) => self
593                .whiten_residual_row(row, r)
594                .iter()
595                .map(|&w| w * w)
596                .sum(),
597        }
598    }
599
600    /// Whiten a per-row Jacobian `J_n ∈ ℝ^{p × d}` (row-major flat,
601    /// `J_n[i, a] = j_row[i * d + a]`) into `M_n = U_nᵀ J_n ∈ ℝ^{rank × d}` so
602    /// that `M_nᵀ M_n = J_nᵀ (U_n U_nᵀ) J_n = J_nᵀ W_n J_n` is the pullback
603    /// **without** any `p × p` intermediate. Euclidean returns `J_n` reshaped to
604    /// `(p, d)` (the identity whitening). Solver `δ` is not applied (criterion
605    /// face).
606    pub fn whiten_jacobian(&self, row: usize, j_row: &[f64], d: usize) -> Array2<f64> {
607        match &self.factors {
608            None => {
609                let mut out = Array2::<f64>::zeros((self.p, d));
610                for i in 0..self.p {
611                    for a in 0..d {
612                        out[[i, a]] = j_row[i * d + a];
613                    }
614                }
615                out
616            }
617            Some(u) => {
618                let mut m = Array2::<f64>::zeros((self.rank, d));
619                for k in 0..self.rank {
620                    for a in 0..d {
621                        let mut acc = 0.0;
622                        for i in 0..self.p {
623                            acc += u[[row, i * self.rank + k]] * j_row[i * d + a];
624                        }
625                        m[[k, a]] = acc;
626                    }
627                }
628                m
629            }
630        }
631    }
632
633    /// Fisher mass of a per-row output vector `x_n ∈ ℝ^p`: the scalar
634    /// `x_nᵀ M_n x_n` (alias of [`Self::quad_form`] read as an information mass
635    /// rather than a residual square). Factored, never `p × p`, `δ`-free.
636    #[inline]
637    pub fn fisher_mass(&self, row: usize, x: ArrayView1<'_, f64>) -> f64 {
638        self.quad_form(row, x)
639    }
640
641    /// The **solver-only** Tikhonov floor `δ` (#747). Returned for internal
642    /// solver helpers that need `U_n U_nᵀ + δ I` to be invertible; by contract
643    /// no caller may fold this into a criterion-facing quantity. Always `0` for
644    /// Euclidean and for factored metrics built without an explicit floor.
645    pub fn solver_floor(&self) -> f64 {
646        self.solver_delta
647    }
648
649    /// The gauge view of this metric: the
650    /// [`crate::WeightField`] the isometry penalty pulls back through.
651    ///
652    /// This is the **single** way an `IsometryPenalty` acquires a non-identity
653    /// gauge metric — the independent `WeightField` setter has been removed — so
654    /// the gauge metric is, by construction, the same object the likelihood
655    /// whitens with.
656    pub fn to_weight_field(&self) -> crate::WeightField {
657        use crate::WeightField;
658        match &self.factors {
659            None => WeightField::Identity,
660            Some(u) => WeightField::Factored {
661                u: Arc::clone(u),
662                rank: self.rank,
663                p_out: self.p,
664            },
665        }
666    }
667}