gam_problem/row_metric.rs
1//! `RowMetric` — the single provenance-carrying per-row inner product shared by
2//! the SAE-manifold **likelihood** (residual whitening) and the **gauge**
3//! (isometry pullback weight).
4//!
5//! # Why this exists
6//!
7//! The SAE-manifold machine historically carried *two* independent inner
8//! products:
9//!
10//! * the **likelihood** measured reconstruction residuals isotropically — a
11//! single scalar dispersion `φ̂ = RSS / residual-dof`, the data-fit loop
12//! summing the bare `½ rᵀr`; there was no per-row metric at all; and
13//! * the **gauge** carried its own per-row metric in
14//! `IsometryPenalty.weight: WeightField` — a low-rank `W_n = U_n U_nᵀ`
15//! pullback `g_n = J_nᵀ W_n J_n`, settable independently of anything the
16//! likelihood saw.
17//!
18//! Nothing structurally forced "the metric the likelihood whitens by" to equal
19//! "the metric the gauge pulls back through". That is exactly the
20//! objective↔gradient-desync bug class wearing geometry clothing: a
21//! likelihood-metric ≠ gauge-metric state was *representable*.
22//!
23//! `RowMetric` collapses the two into one object. The likelihood whitens
24//! through it; the gauge `WeightField` is *constructed from* it. A
25//! divergent-metric state is therefore unrepresentable — there is only one
26//! per-row factor stack `U_n`, with one [`MetricProvenance`] tag.
27//!
28//! # Magic-by-default selector
29//!
30//! There is no flag. The provenance is chosen by whether per-row Fisher factors
31//! exist:
32//!
33//! * no factors supplied ⇒ [`MetricProvenance::Euclidean`]; `W_n = I_p`;
34//! whitening is the identity, so `φ̂` and the data-fit loop are
35//! **bit-for-bit** the prior isotropic path; and
36//! * per-row Fisher factors supplied ⇒ [`MetricProvenance::OutputFisher`]; the
37//! residual is whitened by `U_nᵀ` and the gauge pulls back through the same
38//! `U_n`.
39//!
40//! # Validation
41//!
42//! Every metric block is constructed **through**
43//! [`crate::normalize_fisher_rao_blocks`], which
44//! broadcasts and eigenvalue-validates PSD-ness. `RowMetric` does not
45//! reimplement that validation; it materializes `W_n = U_n U_nᵀ` (which is PSD
46//! by construction) and runs it through the shared normalizer as the
47//! single point of truth for "is this a valid precision metric".
48//!
49//! Any rank floor used to make a block invertible for an internal solve is
50//! **solver-only** (mirroring `RidgePolicy::solver_only`, #747): it never enters
51//! the residual the objective sums, so `δ` cannot bias the criterion.
52
53use ndarray::{Array2, Array3, ArrayView1};
54use std::sync::Arc;
55
56use crate::normalize_fisher_rao_blocks;
57
58/// Per-observation behavioral-metric field `W_n ∈ ℝ^{p × p}`, stored in
59/// **low-rank factored form** `W_n = U_n U_n^T` with `U_n ∈ ℝ^{p × r_n}`.
60///
61/// The canonical coordinate is the one where one unit of motion in `t` is one
62/// unit of behavioral change in the output space, so the `W_n` weighting is
63/// load-bearing: the pullback metric is `g_n = J_n^T W_n J_n`. Storing as
64/// `U_n` lets every contraction in this module run in
65/// `(J^T U_n)(U_n^T J)` order, which is `O(p · r · d + r · d²)` per row — we
66/// **never** materialize the `p × p` `W_n`, which is essential when `p`
67/// (number of observation channels) is large but rank is small (e.g. one or
68/// two behavioral dimensions per latent observation).
69///
70/// `Identity` is the gauge-fix default and corresponds to `U_n = I_p` so the
71/// pullback reduces to the standard `J_n^T J_n`. `Factored` stores the
72/// per-row `U_n` blocks contiguously: every row's factor is `p × rank`, and
73/// rows may share the same rank (uniform-rank case) or vary if the field is
74/// data-driven. For the uniform-rank case the storage is
75/// `(n_obs, p * rank)` row-major.
76#[derive(Clone)]
77pub enum WeightField {
78 /// `W_n = I_p` for every `n`. Reduces to the bare pullback `J^T J`.
79 Identity,
80 /// Per-row low-rank factor `U_n ∈ ℝ^{p × rank}`. Storage layout: a
81 /// `(n_obs, p * rank)` row-major matrix where row `n` packs `U_n` in
82 /// column-major-within-row order `U_n[i, k] = u[n, i * rank + k]`.
83 Factored {
84 u: Arc<Array2<f64>>,
85 rank: usize,
86 p_out: usize,
87 },
88}
89
90impl std::fmt::Debug for WeightField {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self {
93 WeightField::Identity => f.write_str("Identity"),
94 WeightField::Factored { u, rank, p_out } => f
95 .debug_struct("Factored")
96 .field("shape", &format_args!("{}×{}", u.nrows(), u.ncols()))
97 .field("rank", rank)
98 .field("p_out", p_out)
99 .finish(),
100 }
101 }
102}
103
104impl WeightField {
105 /// Apply `U_n^T J_n` for a specific row, given both the row's `J_n` flat
106 /// `(p * d)` slice and the row's `U_n` flat `(p * rank)` slice. Returns
107 /// the `(rank × d)` matrix and its row count.
108 pub fn project_jac_row_with_u(
109 u_row: &[f64],
110 jac_row: &[f64],
111 p: usize,
112 rank: usize,
113 d: usize,
114 ) -> Array2<f64> {
115 // M[k, a] = Σ_i U[i, k] · J[i, a].
116 let mut m = Array2::<f64>::zeros((rank, d));
117 for k in 0..rank {
118 for a in 0..d {
119 let mut s = 0.0;
120 for i in 0..p {
121 s += u_row[i * rank + k] * jac_row[i * d + a];
122 }
123 m[[k, a]] = s;
124 }
125 }
126 m
127 }
128}
129
130/// Where the per-row metric came from — the provenance that makes
131/// "likelihood-metric ≠ gauge-metric" diagnosable instead of silent.
132///
133/// Object 4 (the gauge object) reads this to certify which inner product the
134/// fit actually used; #974 fills [`MetricProvenance::WhitenedStructured`] with a
135/// factor-analytic residual-covariance whitening.
136#[derive(Clone, Copy, PartialEq, Eq, Debug)]
137pub enum MetricProvenance {
138 /// `M_n = I_p` for every row. The likelihood is isotropic and the gauge
139 /// pullback reduces to the bare `J_nᵀ J_n`. This is the default and is
140 /// bit-for-bit the historical isotropic-`φ̂` path.
141 Euclidean,
142 /// `M_n = U_n U_nᵀ (+ solver-only δI)` from supplied per-row output-Fisher
143 /// factors `U_n ∈ ℝ^{p × rank}`. The canonical "one unit of latent motion ↦
144 /// one unit of behavioral change" metric: residuals are whitened in the
145 /// output-Fisher inner product and the gauge pulls back through the same
146 /// factors. The `rank` is carried in the provenance so a consumer (Object 4)
147 /// can certify the factor rank that produced the inner product.
148 OutputFisher { rank: usize },
149 /// `M_n = U_n U_nᵀ` from per-row output-Fisher factors that aggregate the
150 /// **downstream** influence of position `n` over future positions through
151 /// the KV path, rather than the same-position logits of
152 /// [`MetricProvenance::OutputFisher`] (#980, mechanism 2).
153 ///
154 /// The same-position pullback `∂logits_t/∂x_t` can be ≈ 0 for a feature
155 /// whose entire causal effect lands many tokens later (information carried
156 /// forward through attention); a gauge built on it is blind to exactly that
157 /// content. This provenance is the forward-looking alternative: each row's
158 /// factor `U_n` is the top-`rank` factorization of the aggregated output
159 /// Fisher `Σ_{t ≥ n} (∂logits_t/∂x_n)ᵀ F_t (∂logits_t/∂x_n)` over future
160 /// positions the residual stream at `n` reaches. It is provenance-generic:
161 /// it whitens nothing ([`Self::whitens_likelihood`] is `false`, like
162 /// [`MetricProvenance::OutputFisher`]) and drives the gauge / lens /
163 /// enrichment unchanged ([`Self::is_output_fisher_like`]). The lens/gauge
164 /// machinery consumes it identically; only the *scientific* reading
165 /// changes — dormant-feature detection becomes forward-looking (a feature
166 /// driving far-future tokens now registers behavioral coupling that the
167 /// same-position metric reported as ≈ 0).
168 OutputFisherDownstream { rank: usize },
169 /// Structured-residual whitening: `M_n = Σ_n^{-1}` from the **estimated**
170 /// factor-analytic residual covariance `Σ_n = Λ c(z_n) Λᵀ + D` (#974), with
171 /// `factor_rank` the selected factor count. Produced by
172 /// Structured-residual producers materialize this provenance when they fit
173 /// a residual-covariance whitening model;
174 /// the only provenance for which
175 /// [`whitens_likelihood`](RowMetric::whitens_likelihood) is `true`. It
176 /// carries the same low-rank factor layout as
177 /// [`MetricProvenance::OutputFisher`].
178 WhitenedStructured { factor_rank: usize },
179}
180
181/// The single per-row metric object. Holds one low-rank factor stack `U_n` (or
182/// none, for Euclidean) plus the validated PSD blocks, tagged with its
183/// [`MetricProvenance`].
184///
185/// `p` is the output dimensionality (residual / Jacobian-column dimension); the
186/// per-row factor `U_n ∈ ℝ^{p × rank}` so `W_n = U_n U_nᵀ ∈ ℝ^{p × p}` without
187/// ever being materialized as `p × p` in any hot path.
188#[derive(Clone, Debug)]
189pub struct RowMetric {
190 provenance: MetricProvenance,
191 n_rows: usize,
192 p: usize,
193 rank: usize,
194 /// `(n_rows, p * rank)` row-major: `U_n[i, k] = u[n, i * rank + k]`. `None`
195 /// for [`MetricProvenance::Euclidean`] (the identity factor is implicit).
196 factors: Option<Arc<Array2<f64>>>,
197 /// **Solver-only** Tikhonov floor `δ` added as `δ I_p` to make a
198 /// rank-deficient `U_n U_nᵀ` invertible for an *internal solve only*.
199 ///
200 /// Invariant (mirrors `RidgePolicy::solver_only`, #747): `δ` **never** enters
201 /// any quantity that feeds the evidence criterion. The criterion-facing
202 /// quad-form / whitening / fisher-mass methods all use the *un-floored*
203 /// `U_n U_nᵀ`; only [`Self::solve_floor`]-tagged solver helpers see `δ`. A
204 /// nonzero floor therefore cannot bias the objective the optimizer reports.
205 solver_delta: f64,
206 /// Per-row traces `tr(M_n)` of the criterion-facing (un-floored) metric.
207 ///
208 /// This is the only dense-block reduction any consumer reads (the #980
209 /// Fisher-mass row measure); the `(n_rows, p, p)` block stack itself is
210 /// validated **streamingly** at construction through
211 /// [`normalize_fisher_rao_blocks`] one row at a time and then dropped.
212 /// Retaining it was `n·p²·8` bytes — 13 GiB at `(n=2000, p=896)` and an
213 /// OOM at LLM-scale `p` — for a record nothing ever re-read. The solver
214 /// `δ` is deliberately *not* baked in here, so this is the
215 /// criterion-facing trace.
216 traces: ndarray::Array1<f64>,
217}
218
219impl RowMetric {
220 /// Euclidean metric: `W_n = I_p` for all `n`. Whitening is the identity, so
221 /// the likelihood residual path is bit-for-bit the prior isotropic `φ̂`.
222 ///
223 /// Constructed directly: the identity stack is PSD axiomatically, so
224 /// routing it through the dense normalizer would materialize and
225 /// spectrum-check `n` identity blocks (`n·p²` memory, `n·p³` flops) to
226 /// validate a tautology. `tr(I_p) = p` per row.
227 pub fn euclidean(n_rows: usize, p: usize) -> Result<Self, String> {
228 Ok(Self {
229 provenance: MetricProvenance::Euclidean,
230 n_rows,
231 p,
232 rank: p,
233 factors: None,
234 solver_delta: 0.0,
235 traces: ndarray::Array1::<f64>::from_elem(n_rows, p as f64),
236 })
237 }
238
239 /// Output-Fisher metric: per-row low-rank factors `U_n ∈ ℝ^{p × rank}`
240 /// supplied as a `(n_rows, p * rank)` row-major matrix (`U_n[i, k] =
241 /// u[n, i * rank + k]`). The induced `M_n = U_n U_nᵀ` is PSD by
242 /// construction; it is validated through [`normalize_fisher_rao_blocks`] so
243 /// the validation path is shared. No solver floor (`δ = 0`).
244 pub fn output_fisher(u: Arc<Array2<f64>>, p: usize, rank: usize) -> Result<Self, String> {
245 Self::from_factors(MetricProvenance::OutputFisher { rank }, u, p, rank, 0.0)
246 }
247
248 /// Downstream-influence output-Fisher metric: per-row factors `U_n ∈
249 /// ℝ^{p × rank}` whose `M_n = U_n U_nᵀ` is the aggregated output Fisher of
250 /// position `n` over the **future** positions it reaches through the KV path
251 /// ([`MetricProvenance::OutputFisherDownstream`], #980 mechanism 2). The
252 /// factor layout is identical to [`Self::output_fisher`]; only the
253 /// provenance tag (and hence the scientific reading) differs. Whitens
254 /// nothing, drives the gauge / lens / enrichment exactly as the
255 /// same-position metric does — the consuming machinery is provenance-generic
256 /// (see [`Self::is_output_fisher_like`]).
257 pub fn output_fisher_downstream(
258 u: Arc<Array2<f64>>,
259 p: usize,
260 rank: usize,
261 ) -> Result<Self, String> {
262 Self::from_factors(
263 MetricProvenance::OutputFisherDownstream { rank },
264 u,
265 p,
266 rank,
267 0.0,
268 )
269 }
270
271 /// Like [`Self::output_fisher`] but with a **solver-only** Tikhonov floor
272 /// `δ ≥ 0`. The floor is recorded for solver helpers only; every
273 /// criterion-facing method (`quad_form`, `whiten_residual`, `fisher_mass`)
274 /// ignores it (#747 discipline), so the evidence criterion is `δ`-free.
275 pub fn output_fisher_with_solver_floor(
276 u: Arc<Array2<f64>>,
277 p: usize,
278 rank: usize,
279 solver_delta: f64,
280 ) -> Result<Self, String> {
281 if !(solver_delta.is_finite() && solver_delta >= 0.0) {
282 return Err(format!(
283 "RowMetric::output_fisher_with_solver_floor: solver_delta must be finite and \
284 non-negative; got {solver_delta}"
285 ));
286 }
287 Self::from_factors(
288 MetricProvenance::OutputFisher { rank },
289 u,
290 p,
291 rank,
292 solver_delta,
293 )
294 }
295
296 /// Structured-residual whitening from supplied per-row precision factors.
297 ///
298 /// `u` carries the per-row factor stack `U_n ∈ ℝ^{p × rank}` (row-major flat)
299 /// with `U_n U_nᵀ = M_n = Σ_n^{-1}` — the precision of the **estimated**
300 /// residual-covariance noise model. This is the low-level constructor; #974
301 /// producers that *fit* `Σ_n` (a low-rank factor + diagonal + smooth
302 /// activity-scale) assemble these factors and call through here. Because the
303 /// provenance is
304 /// [`MetricProvenance::WhitenedStructured`], [`Self::whitens_likelihood`] is
305 /// `true`: a metric built this way is the first that whitens the likelihood.
306 pub fn whitened_structured(u: Arc<Array2<f64>>, p: usize, rank: usize) -> Result<Self, String> {
307 Self::from_factors(
308 MetricProvenance::WhitenedStructured { factor_rank: rank },
309 u,
310 p,
311 rank,
312 0.0,
313 )
314 }
315
316 fn from_factors(
317 provenance: MetricProvenance,
318 u: Arc<Array2<f64>>,
319 p: usize,
320 rank: usize,
321 solver_delta: f64,
322 ) -> Result<Self, String> {
323 let n_rows = u.nrows();
324 if u.ncols() != p * rank {
325 return Err(format!(
326 "RowMetric::from_factors: factor matrix has {} cols; expected p*rank = {}*{} = {}",
327 u.ncols(),
328 p,
329 rank,
330 p * rank
331 ));
332 }
333 if !u.iter().all(|v| v.is_finite()) {
334 return Err("RowMetric::from_factors: factors must be finite".to_string());
335 }
336 // Materialize W_n = U_n U_nᵀ one row at a time (PSD by construction),
337 // validate each through the single shared normalizer rather than
338 // reimplementing the PSD check, record its trace, and drop the block.
339 // Streaming keeps construction O(p²) memory; the former whole-stack
340 // materialization retained `n·p²` doubles nothing ever re-read.
341 let mut traces = ndarray::Array1::<f64>::zeros(n_rows);
342 let mut full = Array3::<f64>::zeros((1, p, p));
343 for row in 0..n_rows {
344 for i in 0..p {
345 for j in 0..p {
346 let mut acc = 0.0;
347 for k in 0..rank {
348 acc += u[[row, i * rank + k]] * u[[row, j * rank + k]];
349 }
350 full[[0, i, j]] = acc;
351 }
352 }
353 normalize_fisher_rao_blocks(full.view().into_dyn(), 1, p)
354 .map_err(|e| format!("RowMetric::from_factors: row {row}: {e}"))?;
355 let mut tr = 0.0_f64;
356 for i in 0..p {
357 tr += full[[0, i, i]];
358 }
359 traces[row] = tr;
360 }
361 Ok(Self {
362 provenance,
363 n_rows,
364 p,
365 rank,
366 factors: Some(u),
367 solver_delta,
368 traces,
369 })
370 }
371
372 /// The provenance tag (consumed by Object 4 to certify the inner product).
373 pub fn provenance(&self) -> MetricProvenance {
374 self.provenance
375 }
376
377 /// Whether this metric is allowed to **whiten the likelihood** (i.e. replace
378 /// the isotropic reconstruction data-fit `½ rᵀr` with the whitened
379 /// `½ rᵀ M_n r`).
380 ///
381 /// This is TRUE **only** for [`MetricProvenance::WhitenedStructured`] — a
382 /// genuinely *estimated noise model* (a factor-analytic residual covariance,
383 /// #974), for which whitening the likelihood is the statistically correct
384 /// thing to do. For [`MetricProvenance::Euclidean`] there is nothing to
385 /// whiten by, and for [`MetricProvenance::OutputFisher`] the inner product is
386 /// an **output-geometry gauge**, not an estimated noise model — whitening the
387 /// likelihood by it would silently replace the reconstruction loss with a
388 /// Fisher pullback (the #980 failure mode). So both leave the likelihood
389 /// untouched and only the gauge sees the metric (see [`Self::drives_gauge`]).
390 pub fn whitens_likelihood(&self) -> bool {
391 matches!(self.provenance, MetricProvenance::WhitenedStructured { .. })
392 }
393
394 /// Whether this metric **drives the gauge** — i.e. the isometry-penalty
395 /// pullback weight is taken from it rather than the identity.
396 ///
397 /// TRUE for any non-[`MetricProvenance::Euclidean`] provenance: both
398 /// [`MetricProvenance::OutputFisher`] and
399 /// [`MetricProvenance::WhitenedStructured`] supply a non-identity per-row
400 /// inner product the gauge pulls back through. Euclidean reduces the gauge
401 /// pullback to the bare `J_nᵀ J_n`, so it does not drive the gauge.
402 pub fn drives_gauge(&self) -> bool {
403 !matches!(self.provenance, MetricProvenance::Euclidean)
404 }
405
406 /// Whether this metric is an **output-Fisher gauge** — either the
407 /// same-position [`MetricProvenance::OutputFisher`] or the downstream
408 /// [`MetricProvenance::OutputFisherDownstream`] (#980). The two share every
409 /// consumer behavior (Sym(F) separation under the gauge, two-lens coupling,
410 /// steering geometry, enrichment); they differ only in the *scientific*
411 /// reading of what behavioral coupling means (same-position vs
412 /// forward-looking). Consumers that gate on "is this an output-Fisher
413 /// pullback" should use this predicate rather than matching one variant, so
414 /// the downstream metric rides the identical path.
415 pub fn is_output_fisher_like(&self) -> bool {
416 matches!(
417 self.provenance,
418 MetricProvenance::OutputFisher { .. } | MetricProvenance::OutputFisherDownstream { .. }
419 )
420 }
421
422 /// Number of rows the metric is defined over.
423 pub fn n_rows(&self) -> usize {
424 self.n_rows
425 }
426
427 /// Output dimensionality `p` (residual / Jacobian-column dimension).
428 pub fn p_out(&self) -> usize {
429 self.p
430 }
431
432 /// The factor rank: the dimension of the whitened residual
433 /// [`Self::whiten_residual_row`] returns (and the column count of the per-row
434 /// factor `U_n ∈ ℝ^{p × rank}`). For [`MetricProvenance::Euclidean`] this is
435 /// `p` (the implicit identity factor), so a consumer that sizes a whitened
436 /// buffer by `metric_rank()` gets the right length in every provenance.
437 pub fn metric_rank(&self) -> usize {
438 self.rank
439 }
440
441 /// Per-row traces `tr(M_n)` of the criterion-facing (un-floored) metric —
442 /// the Fisher-mass reduction the #980 row measure consumes. The dense
443 /// `(n_rows, p, p)` stack is validated streamingly at construction and
444 /// never retained; consumers wanting an explicit `W_n` rebuild it from
445 /// [`Self::metric_rank`]-sized factors.
446 pub fn row_traces(&self) -> ndarray::ArrayView1<'_, f64> {
447 self.traces.view()
448 }
449
450 /// Whiten a single `p`-dimensional residual row `r` into the coordinates
451 /// whose squared Euclidean norm equals `rᵀ W_n r`.
452 ///
453 /// * Euclidean: returns `r` unchanged (`‖r‖² = rᵀ I r`), so the likelihood
454 /// reproduces the isotropic `½ rᵀr` data-fit bit-for-bit.
455 /// * Factored: returns `U_nᵀ r ∈ ℝ^{rank}`, with
456 /// `‖U_nᵀ r‖² = rᵀ U_n U_nᵀ r = rᵀ W_n r`.
457 ///
458 /// This is the load-bearing identity that lets the data-fit loop sum
459 /// `0.5 * Σ whitened²` and recover exactly `rᵀ W_n r` whatever the
460 /// provenance.
461 pub fn whiten_residual_row(&self, row: usize, r: ArrayView1<'_, f64>) -> Vec<f64> {
462 match &self.factors {
463 None => r.iter().copied().collect(),
464 Some(u) => {
465 let mut out = vec![0.0_f64; self.rank];
466 for k in 0..self.rank {
467 let mut acc = 0.0;
468 for i in 0..self.p {
469 acc += u[[row, i * self.rank + k]] * r[i];
470 }
471 out[k] = acc;
472 }
473 out
474 }
475 }
476 }
477
478 /// The factor entry `U_n[i, k]` for one row (`i ∈ [0, p)`, `k ∈ [0, rank)`).
479 /// For [`MetricProvenance::Euclidean`] the implicit factor is `I_p`, so this
480 /// returns `1.0` when `i == k` and `0.0` otherwise — letting a consumer that
481 /// whitens a Jacobian via `factor_entry` produce the identity whitening
482 /// without a provenance branch. Reads the **un-floored** factors (criterion
483 /// face, #747).
484 #[inline]
485 pub fn factor_entry(&self, row: usize, i: usize, k: usize) -> f64 {
486 match &self.factors {
487 None => {
488 if i == k {
489 1.0
490 } else {
491 0.0
492 }
493 }
494 Some(u) => u[[row, i * self.rank + k]],
495 }
496 }
497
498 /// Apply the full per-row metric `M_n x = U_n (U_nᵀ x) ∈ ℝ^p` for one
499 /// `p`-vector `x`, formed factored (`rank` flops in, `p` flops out) — never
500 /// materializing `M_n` as `p × p`. Euclidean returns `x` unchanged
501 /// (`M_n = I_p`). This is the p-space metric-applied vector the SAE β-tier
502 /// data-fit gradient contracts (β lives in p-output space, so its gradient
503 /// needs `M_n r_n`, not the rank-space whitened residual `U_nᵀ r_n`). Uses the
504 /// **un-floored** factors (criterion face, `δ`-free, #747 invariant).
505 pub fn apply_metric_row(&self, row: usize, x: ArrayView1<'_, f64>) -> Vec<f64> {
506 match &self.factors {
507 None => x.iter().copied().collect(),
508 Some(u) => {
509 // w = U_nᵀ x ∈ ℝ^{rank}.
510 let mut w = vec![0.0_f64; self.rank];
511 for k in 0..self.rank {
512 let mut acc = 0.0;
513 for i in 0..self.p {
514 acc += u[[row, i * self.rank + k]] * x[i];
515 }
516 w[k] = acc;
517 }
518 // out = U_n w ∈ ℝ^p.
519 let mut out = vec![0.0_f64; self.p];
520 for i in 0..self.p {
521 let mut acc = 0.0;
522 for k in 0..self.rank {
523 acc += u[[row, i * self.rank + k]] * w[k];
524 }
525 out[i] = acc;
526 }
527 out
528 }
529 }
530 }
531
532 /// Pullback metric `g_n = J_nᵀ W_n J_n` for one row, formed as
533 /// `(J_nᵀ U_n)(U_nᵀ J_n)` — never materializing the `p × p` `W_n`.
534 ///
535 /// `j_row` is the row's Jacobian `J_n ∈ ℝ^{p × d}` flattened row-major
536 /// (`J_n[i, a] = j_row[i * d + a]`). Returns the `d × d` `g_n`.
537 pub fn pullback(&self, row: usize, j_row: &[f64], d: usize) -> Array2<f64> {
538 match &self.factors {
539 None => {
540 // W_n = I_p ⇒ g_n = J_nᵀ J_n.
541 let mut g = Array2::<f64>::zeros((d, d));
542 for a in 0..d {
543 for b in a..d {
544 let mut acc = 0.0;
545 for i in 0..self.p {
546 acc += j_row[i * d + a] * j_row[i * d + b];
547 }
548 g[[a, b]] = acc;
549 g[[b, a]] = acc;
550 }
551 }
552 g
553 }
554 Some(u) => {
555 // M_n = U_nᵀ J_n ∈ ℝ^{rank × d}; g_n = M_nᵀ M_n.
556 let mut m = Array2::<f64>::zeros((self.rank, d));
557 for k in 0..self.rank {
558 for a in 0..d {
559 let mut acc = 0.0;
560 for i in 0..self.p {
561 acc += u[[row, i * self.rank + k]] * j_row[i * d + a];
562 }
563 m[[k, a]] = acc;
564 }
565 }
566 let mut g = Array2::<f64>::zeros((d, d));
567 for a in 0..d {
568 for b in a..d {
569 let mut acc = 0.0;
570 for k in 0..self.rank {
571 acc += m[[k, a]] * m[[k, b]];
572 }
573 g[[a, b]] = acc;
574 g[[b, a]] = acc;
575 }
576 }
577 g
578 }
579 }
580 }
581
582 /// Quadratic form `r_nᵀ M_n r_n` for one row's residual `r_n ∈ ℝ^p`, formed
583 /// **factored** as `‖U_nᵀ r_n‖²` — never materializing the `p × p` `M_n`.
584 ///
585 /// This is the criterion-facing squared residual the likelihood sums; it uses
586 /// the **un-floored** `U_n U_nᵀ`, so the solver `δ` does not enter it
587 /// (#747 invariant). Euclidean provenance returns the bit-identical `‖r_n‖²`.
588 #[inline]
589 pub fn quad_form(&self, row: usize, r: ArrayView1<'_, f64>) -> f64 {
590 match &self.factors {
591 None => r.iter().map(|&v| v * v).sum(),
592 Some(_) => self
593 .whiten_residual_row(row, r)
594 .iter()
595 .map(|&w| w * w)
596 .sum(),
597 }
598 }
599
600 /// Whiten a per-row Jacobian `J_n ∈ ℝ^{p × d}` (row-major flat,
601 /// `J_n[i, a] = j_row[i * d + a]`) into `M_n = U_nᵀ J_n ∈ ℝ^{rank × d}` so
602 /// that `M_nᵀ M_n = J_nᵀ (U_n U_nᵀ) J_n = J_nᵀ W_n J_n` is the pullback
603 /// **without** any `p × p` intermediate. Euclidean returns `J_n` reshaped to
604 /// `(p, d)` (the identity whitening). Solver `δ` is not applied (criterion
605 /// face).
606 pub fn whiten_jacobian(&self, row: usize, j_row: &[f64], d: usize) -> Array2<f64> {
607 match &self.factors {
608 None => {
609 let mut out = Array2::<f64>::zeros((self.p, d));
610 for i in 0..self.p {
611 for a in 0..d {
612 out[[i, a]] = j_row[i * d + a];
613 }
614 }
615 out
616 }
617 Some(u) => {
618 let mut m = Array2::<f64>::zeros((self.rank, d));
619 for k in 0..self.rank {
620 for a in 0..d {
621 let mut acc = 0.0;
622 for i in 0..self.p {
623 acc += u[[row, i * self.rank + k]] * j_row[i * d + a];
624 }
625 m[[k, a]] = acc;
626 }
627 }
628 m
629 }
630 }
631 }
632
633 /// Fisher mass of a per-row output vector `x_n ∈ ℝ^p`: the scalar
634 /// `x_nᵀ M_n x_n` (alias of [`Self::quad_form`] read as an information mass
635 /// rather than a residual square). Factored, never `p × p`, `δ`-free.
636 #[inline]
637 pub fn fisher_mass(&self, row: usize, x: ArrayView1<'_, f64>) -> f64 {
638 self.quad_form(row, x)
639 }
640
641 /// The **solver-only** Tikhonov floor `δ` (#747). Returned for internal
642 /// solver helpers that need `U_n U_nᵀ + δ I` to be invertible; by contract
643 /// no caller may fold this into a criterion-facing quantity. Always `0` for
644 /// Euclidean and for factored metrics built without an explicit floor.
645 pub fn solver_floor(&self) -> f64 {
646 self.solver_delta
647 }
648
649 /// The gauge view of this metric: the
650 /// [`crate::WeightField`] the isometry penalty pulls back through.
651 ///
652 /// This is the **single** way an `IsometryPenalty` acquires a non-identity
653 /// gauge metric — the independent `WeightField` setter has been removed — so
654 /// the gauge metric is, by construction, the same object the likelihood
655 /// whitens with.
656 pub fn to_weight_field(&self) -> crate::WeightField {
657 use crate::WeightField;
658 match &self.factors {
659 None => WeightField::Identity,
660 Some(u) => WeightField::Factored {
661 u: Arc::clone(u),
662 rank: self.rank,
663 p_out: self.p,
664 },
665 }
666 }
667}