gam_problem/
gauge.rs

1// One Gauge object (#933).
2//
3// Every identifiability mechanism in the engine performs the same
4// mathematical act: quotient the coefficient space by directions in
5// ker(J) ∩ ker(S), pick a section, fit in the reduced coordinates θ,
6// and lift estimates / covariance / geometry back to the raw
7// coordinates β. This module owns that act once.
8//
9// A `Gauge` is the affine section itself: the lift matrix
10// `T : reduced → raw` plus an affine shift `a`
11// (`β_raw = T · θ + a`) together with the per-block partitions
12// of both coordinate systems. Block-diagonal `T`
13// (independent per-block reductions, the canonical-audit case) and
14// block-upper-triangular `T` (cross-block residualisation, the
15// survival V+M-exact compile) are the same object — the partitions
16// record where each block's rows/columns live.
17//
18// Lift conventions (the whole point — there is exactly one):
19//   - point estimate:   β_raw = T · θ + a
20//   - covariance / any symmetric bilinear form: Σ_raw = T · Σ_θ · Tᵀ
21//   - η is invariant:   X_raw · (T · θ + a) = X_reduced · θ + offset_reduced
22//
23// Raw directions outside the section (zero rows of `T`) receive exactly
24// zero estimate, zero variance, and zero covariance with every other
25// coordinate: a coordinate the reduced fit cannot move carries no
26// posterior uncertainty in raw space.
27
28use ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
29
30use gam_linalg::faer_ndarray::{fast_ab, fast_abt, fast_atb};
31
32/// Neutral view of a compiled identifiability reparametrisation that
33/// [`Gauge::from_compiled_map`] consumes. The concrete `CompiledMap`
34/// emitted by the identifiability compiler lives ABOVE this crate, so
35/// `Gauge` names only this trait (inverted dependency #1521); the
36/// compiler crate provides the `impl`.
37///
38/// `raw_from_compiled` IS the global triangular lift `T`; the two block
39/// range slices give the raw-width and compiled-width column partitions.
40pub trait CompiledBlockMap {
41    /// The `(p_raw × p_compiled)` raw-from-compiled reparam matrix `T`.
42    fn raw_from_compiled(&self) -> &Array2<f64>;
43    /// Per-block raw-width column ranges.
44    fn raw_block_ranges(&self) -> &[std::ops::Range<usize>];
45    /// Per-block compiled-width column ranges, parallel to
46    /// [`Self::raw_block_ranges`].
47    fn compiled_block_ranges(&self) -> &[std::ops::Range<usize>];
48}
49
50/// The lift `T : reduced → raw` plus the per-block partitions of both
51/// coordinate systems. See the module docs for the lift conventions.
52#[derive(Debug, Clone)]
53pub struct Gauge {
54    /// Global lift matrix, shape `(Σ p_b_raw) × (Σ r_b_reduced)`.
55    pub t_full: Array2<f64>,
56    /// Global affine shift in raw coordinates, length `Σ p_b_raw`.
57    pub affine_shift: Array1<f64>,
58    /// Raw-coordinate block partition: `block_starts_raw[b]..block_starts_raw[b+1]`
59    /// is block `b`'s raw row range in `t_full`. Length `n_blocks + 1`, starts at 0.
60    pub block_starts_raw: Vec<usize>,
61    /// Reduced-coordinate block partition (columns of `t_full`), same layout.
62    pub block_starts_reduced: Vec<usize>,
63}
64
65fn starts_from_widths(widths: &[usize]) -> Vec<usize> {
66    let mut starts = Vec::with_capacity(widths.len() + 1);
67    starts.push(0);
68    for w in widths {
69        starts.push(starts.last().copied().unwrap() + w);
70    }
71    starts
72}
73
74/// Assemble a block-upper-triangular lift `T` from per-block diagonal
75/// `V_b` matrices and strictly-upper residualisation blocks `R_{a→b}`.
76///
77/// `r_per_term[b]` (when `Some`) packs ALL strictly-upper off-diagonal
78/// columns for block `b` stacked row-wise across all earlier-priority
79/// blocks `a < b`: `nrows = Σ_{a<b} v_per_term[a].nrows()`,
80/// `ncols = v_per_term[b].ncols()`. The assembled `T` carries `V_b` on
81/// the diagonal and `−R_{a→b}` at `(a, b)`. `r_per_term[0]` must be
82/// `None` (no earlier block to residualise against).
83pub fn assemble_block_triangular_t(
84    v_per_term: &[Array2<f64>],
85    r_per_term: &[Option<Array2<f64>>],
86) -> Array2<f64> {
87    assert_eq!(
88        v_per_term.len(),
89        r_per_term.len(),
90        "assemble_block_triangular_t: v_per_term len {} != r_per_term len {}",
91        v_per_term.len(),
92        r_per_term.len(),
93    );
94    let raw_widths: Vec<usize> = v_per_term.iter().map(|v| v.nrows()).collect();
95    let kept_widths: Vec<usize> = v_per_term.iter().map(|v| v.ncols()).collect();
96    let row_offsets = starts_from_widths(&raw_widths);
97    let col_offsets = starts_from_widths(&kept_widths);
98    let total_rows = row_offsets.last().copied().unwrap_or(0);
99    let total_cols = col_offsets.last().copied().unwrap_or(0);
100    let mut t = Array2::<f64>::zeros((total_rows, total_cols));
101    // Diagonal: place V_b at (b, b).
102    for (b, v) in v_per_term.iter().enumerate() {
103        let r = v.nrows();
104        let c = v.ncols();
105        if r > 0 && c > 0 {
106            t.slice_mut(ndarray::s![
107                row_offsets[b]..row_offsets[b] + r,
108                col_offsets[b]..col_offsets[b] + c
109            ])
110            .assign(v);
111        }
112    }
113    // Strict upper triangle: for each b ≥ 1, place −R_{a→b} at (a, b),
114    // a < b, slicing the row-stacked `r_per_term[b]` in earlier-block order.
115    for b in 1..v_per_term.len() {
116        let Some(r_stack) = r_per_term[b].as_ref() else {
117            continue;
118        };
119        let kept_b = kept_widths[b];
120        assert_eq!(
121            r_stack.ncols(),
122            kept_b,
123            "assemble_block_triangular_t: r_per_term[{b}] has {} cols, expected {}",
124            r_stack.ncols(),
125            kept_b,
126        );
127        let expected_rows: usize = raw_widths.iter().take(b).sum();
128        assert_eq!(
129            r_stack.nrows(),
130            expected_rows,
131            "assemble_block_triangular_t: r_per_term[{b}] has {} rows, expected {} \
132             (sum of raw_widths[0..{}])",
133            r_stack.nrows(),
134            expected_rows,
135            b,
136        );
137        let mut local_row = 0usize;
138        for a in 0..b {
139            let r_a = raw_widths[a];
140            if r_a == 0 || kept_b == 0 {
141                local_row += r_a;
142                continue;
143            }
144            let block = r_stack.slice(ndarray::s![local_row..local_row + r_a, ..]);
145            let mut dst = t.slice_mut(ndarray::s![
146                row_offsets[a]..row_offsets[a] + r_a,
147                col_offsets[b]..col_offsets[b] + kept_b
148            ]);
149            for i in 0..r_a {
150                for j in 0..kept_b {
151                    dst[[i, j]] = -block[[i, j]];
152                }
153            }
154            local_row += r_a;
155        }
156    }
157    t
158}
159
160impl Gauge {
161    /// The trivial section: raw == reduced for every block.
162    pub fn identity(raw_widths: &[usize]) -> Self {
163        let transforms: Vec<Array2<f64>> =
164            raw_widths.iter().map(|&w| Array2::<f64>::eye(w)).collect();
165        Self::from_block_transforms(&transforms)
166    }
167
168    /// Block-diagonal section from independent per-block lifts
169    /// `T_b : reduced_b → raw_b` (selection matrices from the canonical
170    /// audit, orthogonalisation `V_b`s, or their compositions).
171    pub fn from_block_transforms(transforms: &[Array2<f64>]) -> Self {
172        let raw_total: usize = transforms.iter().map(|t| t.nrows()).sum();
173        Self::from_block_transforms_with_shift(transforms, Array1::zeros(raw_total))
174    }
175
176    /// Block-diagonal affine section from independent per-block lifts
177    /// plus one concatenated raw-coordinate shift.
178    pub fn from_block_transforms_with_shift(
179        transforms: &[Array2<f64>],
180        affine_shift: Array1<f64>,
181    ) -> Self {
182        let r_none: Vec<Option<Array2<f64>>> = transforms.iter().map(|_| None).collect();
183        let mut gauge = Self::from_v_and_r(transforms, &r_none);
184        assert_eq!(
185            affine_shift.len(),
186            gauge.raw_total(),
187            "Gauge::from_block_transforms_with_shift: affine shift len {} != raw width {}",
188            affine_shift.len(),
189            gauge.raw_total(),
190        );
191        gauge.affine_shift = affine_shift;
192        gauge
193    }
194
195    /// Single-block affine section.
196    pub fn from_block_transform_with_shift(
197        transform: Array2<f64>,
198        affine_shift: Array1<f64>,
199    ) -> Self {
200        Self::from_block_transforms_with_shift(&[transform], affine_shift)
201    }
202
203    /// Block-upper-triangular section from per-block `V_b` plus
204    /// cross-block residualisation stacks `R_{a→b}` — see
205    /// [`assemble_block_triangular_t`] for the packing convention.
206    pub fn from_v_and_r(v_per_term: &[Array2<f64>], r_per_term: &[Option<Array2<f64>>]) -> Self {
207        let raw_widths: Vec<usize> = v_per_term.iter().map(|v| v.nrows()).collect();
208        let reduced_widths: Vec<usize> = v_per_term.iter().map(|v| v.ncols()).collect();
209        Self {
210            t_full: assemble_block_triangular_t(v_per_term, r_per_term),
211            affine_shift: Array1::zeros(raw_widths.iter().sum::<usize>()),
212            block_starts_raw: starts_from_widths(&raw_widths),
213            block_starts_reduced: starts_from_widths(&reduced_widths),
214        }
215    }
216
217    /// The sum-to-zero (centering) section as a first-class single-block
218    /// gauge. `z` is the `(k × (k−1))` reparametrisation matrix returned by
219    /// `terms::basis::duchon_thinplate::apply_sum_to_zero_constraint`
220    /// (an orthonormal basis for `null(cᵀ)`, `c = Bᵀw` the weighted column
221    /// sums): the constrained design is `B_c = B · z`, so on the model
222    /// `η = B · β_raw = B_c · θ = B · z · θ` the raw coefficients lift back
223    /// from the reduced (centred) coefficients by exactly `β_raw = z · θ`.
224    ///
225    /// That is the one Gauge convention with `T = z` over a single block, so
226    /// the centring constraint stops being a special-cased outside-the-object
227    /// transform and becomes a `Gauge` section like every other reduction:
228    /// the covariance / penalised-Hessian of the centred fit pushes forward to
229    /// the raw basis through the SAME `z` via [`Gauge::lift_covariance`].
230    ///
231    /// `z` is taken as the section itself (rather than recomputed from a basis)
232    /// because the constraint matrix is the only gauge-relevant artifact — the
233    /// basis the column sums were taken over is irrelevant to the lift. The
234    /// only requirement is the structural one of a centring section:
235    /// `z.ncols() < z.nrows()` (at least one direction is removed); an identity
236    /// `z` would be `Gauge::identity` and is rejected so callers do not silently
237    /// treat an unconstrained block as centred.
238    pub fn sum_to_zero(z: Array2<f64>) -> Self {
239        let (k, r) = z.dim();
240        assert!(
241            k > 0 && r < k,
242            "Gauge::sum_to_zero: z must be a tall reparametrisation ({k}×{r}); \
243             a centring section removes at least one direction (r < k)",
244        );
245        Self::from_block_transforms(&[z])
246    }
247
248    /// Wrap an already-assembled global `T` given the per-block raw and
249    /// reduced width partitions.
250    pub fn from_t(t_full: Array2<f64>, raw_widths: &[usize], reduced_widths: &[usize]) -> Self {
251        let total_raw: usize = raw_widths.iter().sum();
252        Self::from_t_with_shift(t_full, raw_widths, reduced_widths, Array1::zeros(total_raw))
253    }
254
255    /// Wrap an already-assembled global affine section `β = Tθ + a` given the
256    /// per-block raw and reduced width partitions.
257    pub fn from_t_with_shift(
258        t_full: Array2<f64>,
259        raw_widths: &[usize],
260        reduced_widths: &[usize],
261        affine_shift: Array1<f64>,
262    ) -> Self {
263        assert_eq!(
264            raw_widths.len(),
265            reduced_widths.len(),
266            "Gauge::from_t: raw_widths len {} != reduced_widths len {}",
267            raw_widths.len(),
268            reduced_widths.len(),
269        );
270        let total_raw: usize = raw_widths.iter().sum();
271        let total_reduced: usize = reduced_widths.iter().sum();
272        assert_eq!(
273            t_full.dim(),
274            (total_raw, total_reduced),
275            "Gauge::from_t: T has shape {:?}, expected ({total_raw}, {total_reduced})",
276            t_full.dim(),
277        );
278        assert_eq!(
279            affine_shift.len(),
280            total_raw,
281            "Gauge::from_t_with_shift: affine shift len {} != raw width {total_raw}",
282            affine_shift.len(),
283        );
284        Self {
285            t_full,
286            affine_shift,
287            block_starts_raw: starts_from_widths(raw_widths),
288            block_starts_reduced: starts_from_widths(reduced_widths),
289        }
290    }
291
292    /// Build from a compiled identifiability reparametrisation
293    /// (see [`CompiledBlockMap`], implemented for the `CompiledMap` emitted by
294    /// the identifiability compiler): `map.raw_from_compiled()` IS the global
295    /// triangular `T`, and the block ranges give both partitions. `ordering`
296    /// is accepted purely as a length sanity check.
297    pub fn from_compiled_map<M: CompiledBlockMap, O>(map: &M, ordering: &[O]) -> Self {
298        assert_eq!(
299            map.raw_block_ranges().len(),
300            map.compiled_block_ranges().len(),
301            "Gauge::from_compiled_map: CompiledMap raw_block_ranges len {} != \
302             compiled_block_ranges len {}",
303            map.raw_block_ranges().len(),
304            map.compiled_block_ranges().len(),
305        );
306        assert_eq!(
307            map.raw_block_ranges().len(),
308            ordering.len(),
309            "Gauge::from_compiled_map: ordering len {} != block count {}",
310            ordering.len(),
311            map.raw_block_ranges().len(),
312        );
313        let mut block_starts_raw = Vec::with_capacity(map.raw_block_ranges().len() + 1);
314        block_starts_raw.push(0);
315        for r in map.raw_block_ranges() {
316            block_starts_raw.push(r.end);
317        }
318        let mut block_starts_reduced = Vec::with_capacity(map.compiled_block_ranges().len() + 1);
319        block_starts_reduced.push(0);
320        for r in map.compiled_block_ranges() {
321            block_starts_reduced.push(r.end);
322        }
323        let total_raw = block_starts_raw.last().copied().unwrap_or(0);
324        Self {
325            t_full: map.raw_from_compiled().clone(),
326            affine_shift: Array1::zeros(total_raw),
327            block_starts_raw,
328            block_starts_reduced,
329        }
330    }
331
332    /// Number of blocks in the partition.
333    pub fn n_blocks(&self) -> usize {
334        self.block_starts_raw.len().saturating_sub(1)
335    }
336
337    /// Total raw width `Σ p_b`.
338    pub fn raw_total(&self) -> usize {
339        self.block_starts_raw.last().copied().unwrap_or(0)
340    }
341
342    /// Total reduced width `Σ r_b`.
343    pub fn reduced_total(&self) -> usize {
344        self.block_starts_reduced.last().copied().unwrap_or(0)
345    }
346
347    /// Per-block raw widths.
348    pub fn raw_widths(&self) -> Vec<usize> {
349        self.block_starts_raw
350            .windows(2)
351            .map(|w| w[1] - w[0])
352            .collect()
353    }
354
355    /// Per-block reduced widths.
356    pub fn reduced_widths(&self) -> Vec<usize> {
357        self.block_starts_reduced
358            .windows(2)
359            .map(|w| w[1] - w[0])
360            .collect()
361    }
362
363    /// The diagonal slab `T_b = T[raw_b, reduced_b]` of block `b`.
364    /// For a block-diagonal gauge this is the whole story for the
365    /// block; for a triangular gauge it omits the cross-block `−R`.
366    pub fn block_transform(&self, b: usize) -> Array2<f64> {
367        assert!(
368            b < self.n_blocks(),
369            "Gauge::block_transform: block {b} out of range {}",
370            self.n_blocks(),
371        );
372        self.t_full
373            .slice(ndarray::s![
374                self.block_starts_raw[b]..self.block_starts_raw[b + 1],
375                self.block_starts_reduced[b]..self.block_starts_reduced[b + 1]
376            ])
377            .to_owned()
378    }
379
380    /// Compose a raw design with the section: `X_reduced = X_raw · T`.
381    pub fn restrict_design<S: Data<Elem = f64>>(
382        &self,
383        raw_design: &ArrayBase<S, Ix2>,
384    ) -> Array2<f64> {
385        let raw_total = self.raw_total();
386        assert_eq!(
387            raw_design.ncols(),
388            raw_total,
389            "Gauge::restrict_design: design has {} columns, expected raw width {raw_total}",
390            raw_design.ncols(),
391        );
392        fast_ab(raw_design, &self.t_full)
393    }
394
395    /// Compose a raw design and offset with the affine section:
396    /// `X_raw · (Tθ + a) + o_raw = (X_raw · T)θ + (o_raw + X_raw · a)`.
397    pub fn restrict_design_and_offset<S: Data<Elem = f64>>(
398        &self,
399        raw_design: &ArrayBase<S, Ix2>,
400        raw_offset: &Array1<f64>,
401    ) -> (Array2<f64>, Array1<f64>) {
402        assert_eq!(
403            raw_design.nrows(),
404            raw_offset.len(),
405            "Gauge::restrict_design_and_offset: design rows {} != offset len {}",
406            raw_design.nrows(),
407            raw_offset.len(),
408        );
409        let reduced_design = self.restrict_design(raw_design);
410        let reduced_offset = raw_offset + &raw_design.dot(&self.affine_shift);
411        (reduced_design, reduced_offset)
412    }
413
414    /// Pull a raw-coordinate quadratic form back to reduced coordinates:
415    /// `S_reduced = Tᵀ · S_raw · T`.
416    pub fn restrict_penalty<S: Data<Elem = f64>>(
417        &self,
418        raw_penalty: &ArrayBase<S, Ix2>,
419    ) -> Array2<f64> {
420        let raw_total = self.raw_total();
421        assert_eq!(
422            raw_penalty.dim(),
423            (raw_total, raw_total),
424            "Gauge::restrict_penalty: matrix has shape {:?}, expected ({raw_total}, {raw_total})",
425            raw_penalty.dim(),
426        );
427        let t_s = fast_atb(&self.t_full, raw_penalty);
428        fast_ab(&t_s, &self.t_full)
429    }
430
431    /// Append blocks that were never reduced (raw == reduced, identity
432    /// lift). Used to lift joint objects that span both gauged blocks
433    /// and untouched ones (e.g. the survival flex blocks alongside the
434    /// compiled parametric blocks).
435    pub fn extend_with_identity(&self, extra_raw_widths: &[usize]) -> Self {
436        let extra_total: usize = extra_raw_widths.iter().sum();
437        let raw_total = self.raw_total();
438        let reduced_total = self.reduced_total();
439        let mut t = Array2::<f64>::zeros((raw_total + extra_total, reduced_total + extra_total));
440        t.slice_mut(ndarray::s![0..raw_total, 0..reduced_total])
441            .assign(&self.t_full);
442        for k in 0..extra_total {
443            t[[raw_total + k, reduced_total + k]] = 1.0;
444        }
445        let mut block_starts_raw = self.block_starts_raw.clone();
446        let mut block_starts_reduced = self.block_starts_reduced.clone();
447        for &w in extra_raw_widths {
448            block_starts_raw.push(block_starts_raw.last().copied().unwrap() + w);
449            block_starts_reduced.push(block_starts_reduced.last().copied().unwrap() + w);
450        }
451        let mut affine_shift = Array1::<f64>::zeros(raw_total + extra_total);
452        affine_shift
453            .slice_mut(ndarray::s![0..raw_total])
454            .assign(&self.affine_shift);
455        Self {
456            t_full: t,
457            affine_shift,
458            block_starts_raw,
459            block_starts_reduced,
460        }
461    }
462
463    /// Lift per-block reduced coefficients to per-block raw
464    /// coefficients: concatenate into θ, apply `β = T · θ + a`, split at
465    /// the raw partition.
466    pub fn lift_block_betas(&self, reduced_block_betas: &[Array1<f64>]) -> Vec<Array1<f64>> {
467        let n_blocks = self.n_blocks();
468        assert_eq!(
469            reduced_block_betas.len(),
470            n_blocks,
471            "Gauge::lift_block_betas: got {} reduced block betas, expected {}",
472            reduced_block_betas.len(),
473            n_blocks,
474        );
475        for (b, beta) in reduced_block_betas.iter().enumerate() {
476            let expected = self.block_starts_reduced[b + 1] - self.block_starts_reduced[b];
477            assert_eq!(
478                beta.len(),
479                expected,
480                "Gauge::lift_block_betas: block {b} has β of len {}, expected reduced width {}",
481                beta.len(),
482                expected,
483            );
484        }
485        let mut theta_full = Array1::<f64>::zeros(self.reduced_total());
486        for (b, beta) in reduced_block_betas.iter().enumerate() {
487            let c0 = self.block_starts_reduced[b];
488            let c1 = self.block_starts_reduced[b + 1];
489            theta_full.slice_mut(ndarray::s![c0..c1]).assign(beta);
490        }
491        let beta_full = self.t_full.dot(&theta_full) + &self.affine_shift;
492        let mut out = Vec::with_capacity(n_blocks);
493        for b in 0..n_blocks {
494            let r0 = self.block_starts_raw[b];
495            let r1 = self.block_starts_raw[b + 1];
496            out.push(beta_full.slice(ndarray::s![r0..r1]).to_owned());
497        }
498        out
499    }
500
501    /// Push a reduced-coordinate symmetric matrix (posterior covariance,
502    /// penalized Hessian — any symmetric bilinear form on θ) forward to
503    /// raw coordinates via the exact sandwich `M_raw = T · M_θ · Tᵀ`.
504    ///
505    /// The result is explicitly symmetrised: `T · M · Tᵀ` is symmetric
506    /// for symmetric `M`, but the two matmuls accumulate independent
507    /// rounding, so the transpose pair is averaged to land an exactly
508    /// symmetric matrix for downstream Cholesky / eigensolves.
509    pub fn lift_covariance(&self, m_reduced: &Array2<f64>) -> Array2<f64> {
510        let total_reduced = self.reduced_total();
511        assert_eq!(
512            m_reduced.dim(),
513            (total_reduced, total_reduced),
514            "Gauge::lift_covariance: matrix has shape {:?}, expected ({total_reduced}, {total_reduced})",
515            m_reduced.dim(),
516        );
517        let t_m = fast_ab(&self.t_full, m_reduced);
518        let mut raw = fast_abt(&t_m, &self.t_full);
519        let n = raw.nrows();
520        for i in 0..n {
521            for j in (i + 1)..n {
522                let avg = 0.5 * (raw[[i, j]] + raw[[j, i]]);
523                raw[[i, j]] = avg;
524                raw[[j, i]] = avg;
525            }
526        }
527        raw
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn identity_gauge_round_trips_betas_and_covariance() {
537        let gauge = Gauge::identity(&[2, 3]);
538        assert_eq!(gauge.n_blocks(), 2);
539        assert_eq!(gauge.raw_total(), 5);
540        assert_eq!(gauge.reduced_total(), 5);
541        let theta = vec![
542            Array1::from(vec![0.5, -0.25]),
543            Array1::from(vec![1.0, 2.0, -3.0]),
544        ];
545        let raw = gauge.lift_block_betas(&theta);
546        assert_eq!(raw[0].as_slice().unwrap(), &[0.5, -0.25]);
547        assert_eq!(raw[1].as_slice().unwrap(), &[1.0, 2.0, -3.0]);
548
549        let mut cov = Array2::<f64>::eye(5);
550        cov[[0, 3]] = 0.4;
551        cov[[3, 0]] = 0.4;
552        let lifted = gauge.lift_covariance(&cov);
553        for i in 0..5 {
554            for j in 0..5 {
555                assert!(
556                    (lifted[[i, j]] - cov[[i, j]]).abs() < 1e-14,
557                    "identity gauge must be a covariance no-op at ({i},{j})",
558                );
559            }
560        }
561    }
562
563    #[test]
564    fn affine_gauge_lifts_betas_and_restricts_offsets() {
565        let t = Array2::from_shape_vec((3, 1), vec![2.0, -1.0, 0.5]).unwrap();
566        let shift = Array1::from(vec![0.25, 1.5, -0.75]);
567        let gauge = Gauge::from_block_transform_with_shift(t.clone(), shift.clone());
568        let theta = Array1::from(vec![4.0]);
569
570        let raw = gauge.lift_block_betas(&[theta.clone()]);
571        let expected_raw = t.dot(&theta) + &shift;
572        assert_eq!(raw[0], expected_raw);
573
574        let x = Array2::from_shape_vec((2, 3), vec![1.0, 0.0, 2.0, -1.0, 3.0, 0.5]).unwrap();
575        let offset = Array1::from(vec![0.1, -0.2]);
576        let (x_reduced, offset_reduced) = gauge.restrict_design_and_offset(&x, &offset);
577        assert_eq!(x_reduced, x.dot(&t));
578        assert_eq!(offset_reduced, &offset + &x.dot(&shift));
579
580        let eta_raw = x.dot(&expected_raw) + &offset;
581        let eta_reduced = x_reduced.dot(&theta) + &offset_reduced;
582        for i in 0..eta_raw.len() {
583            assert!((eta_raw[i] - eta_reduced[i]).abs() < 1e-14);
584        }
585
586        let cov_reduced = Array2::from_elem((1, 1), 3.0);
587        let lifted_cov = gauge.lift_covariance(&cov_reduced);
588        let expected_cov = t.dot(&cov_reduced).dot(&t.t());
589        assert_eq!(lifted_cov, expected_cov);
590    }
591
592    /// The covariance pushforward of an affine section `β = T·θ + a` must be
593    /// EXACTLY independent of the affine shift `a` — `Cov(T·θ + a) = T·Cov(θ)·Tᵀ`
594    /// for any constant `a`, because a deterministic offset adds no variance. The
595    /// b≡1 unit-log-t pin (#892) folds the warp into `a`; this is the property
596    /// that guarantees reporting the pinned coefficients carries the same
597    /// posterior uncertainty as the unpinned linear section. We assert it two
598    /// ways: (1) the analytic lift is bit-identical across a sweep of shift
599    /// magnitudes spanning the zero-shift linear case up to 1e7; and (2) an
600    /// empirical check — the sample covariance of `T·θ_k + a` over reduced draws
601    /// `θ_k` is unchanged when `a` is replaced by a 1e6-scale offset (the offset
602    /// cancels under centering).
603    #[test]
604    fn affine_shift_leaves_lifted_covariance_invariant() {
605        // A non-trivial 4-raw × 2-reduced section (so T mixes coordinates).
606        let t =
607            Array2::from_shape_vec((4, 2), vec![1.0, 0.0, 0.5, -1.0, 2.0, 0.3, -0.4, 1.5]).unwrap();
608        let raw_widths = [4usize];
609        let reduced_widths = [2usize];
610
611        // A non-diagonal reduced covariance.
612        let cov_reduced = Array2::from_shape_vec((2, 2), vec![2.0, -0.7, -0.7, 1.3]).unwrap();
613
614        // The reference lift is the zero-shift (purely linear) section.
615        let base =
616            Gauge::from_t_with_shift(t.clone(), &raw_widths, &reduced_widths, Array1::zeros(4));
617        let reference = base.lift_covariance(&cov_reduced);
618
619        // (1) Bit-identical across a wide sweep of shift magnitudes.
620        for &mag in &[0.0, 1e-7, 1.0, 1e3, 1e7] {
621            let shift = Array1::from(vec![mag, -mag, 0.5 * mag, -2.0 * mag]);
622            let gauge = Gauge::from_t_with_shift(t.clone(), &raw_widths, &reduced_widths, shift);
623            let lifted = gauge.lift_covariance(&cov_reduced);
624            for i in 0..4 {
625                for j in 0..4 {
626                    assert_eq!(
627                        lifted[[i, j]],
628                        reference[[i, j]],
629                        "affine shift magnitude {mag} must not perturb the lifted covariance \
630                         at ({i},{j}) — covariance is offset-invariant",
631                    );
632                }
633            }
634        }
635
636        // (2) Empirical check: draw reduced samples, push them through
637        // β = T·θ + a for two very different shifts, and confirm the sample
638        // covariance is the same for both shifts. Draws use a fixed Cholesky
639        // colouring of cov_reduced so the test is deterministic (no RNG).
640        let chol = {
641            let l00 = cov_reduced[[0, 0]].sqrt();
642            let l10 = cov_reduced[[1, 0]] / l00;
643            let l11 = (cov_reduced[[1, 1]] - l10 * l10).sqrt();
644            Array2::from_shape_vec((2, 2), vec![l00, 0.0, l10, l11]).unwrap()
645        };
646        let z_raw = [
647            [1.2, -0.4],
648            [-0.8, 0.9],
649            [0.3, 1.7],
650            [-1.5, -0.6],
651            [0.6, -1.1],
652            [-0.2, 0.3],
653            [1.9, 0.2],
654            [-1.4, -0.9],
655        ];
656        let sample_cov_for_shift = |shift: &Array1<f64>| -> Array2<f64> {
657            let n = z_raw.len();
658            let betas: Vec<Array1<f64>> = z_raw
659                .iter()
660                .map(|z| {
661                    let theta = chol.dot(&Array1::from(vec![z[0], z[1]]));
662                    t.dot(&theta) + shift
663                })
664                .collect();
665            let mut mean = Array1::<f64>::zeros(4);
666            for b in &betas {
667                mean = &mean + b;
668            }
669            mean /= n as f64;
670            let mut cov = Array2::<f64>::zeros((4, 4));
671            for b in &betas {
672                let c = b - &mean;
673                for i in 0..4 {
674                    for j in 0..4 {
675                        cov[[i, j]] += c[i] * c[j] / n as f64;
676                    }
677                }
678            }
679            cov
680        };
681        let cov_small = sample_cov_for_shift(&Array1::zeros(4));
682        let cov_big = sample_cov_for_shift(&Array1::from(vec![1e6, -1e6, 5e5, -2e6]));
683        for i in 0..4 {
684            for j in 0..4 {
685                assert!(
686                    (cov_small[[i, j]] - cov_big[[i, j]]).abs() < 1e-6,
687                    "empirical sample covariance must be offset-invariant at ({i},{j}): \
688                     small-shift {} vs big-shift {}",
689                    cov_small[[i, j]],
690                    cov_big[[i, j]],
691                );
692            }
693        }
694    }
695
696    #[test]
697    fn block_diagonal_gauge_matches_per_block_lift() {
698        // Block 0: selection keeping raw cols {0, 2} of width 3.
699        let mut t0 = Array2::<f64>::zeros((3, 2));
700        t0[[0, 0]] = 1.0;
701        t0[[2, 1]] = 1.0;
702        // Block 1: full identity of width 2.
703        let t1 = Array2::<f64>::eye(2);
704        let gauge = Gauge::from_block_transforms(&[t0.clone(), t1.clone()]);
705        assert_eq!(gauge.raw_widths(), vec![3, 2]);
706        assert_eq!(gauge.reduced_widths(), vec![2, 2]);
707
708        let theta = vec![Array1::from(vec![1.5, -2.5]), Array1::from(vec![0.5, 4.0])];
709        let raw = gauge.lift_block_betas(&theta);
710        assert_eq!(raw[0].as_slice().unwrap(), &[1.5, 0.0, -2.5]);
711        assert_eq!(raw[1].as_slice().unwrap(), &[0.5, 4.0]);
712
713        // block_transform recovers the diagonal slabs exactly.
714        assert_eq!(gauge.block_transform(0), t0);
715        assert_eq!(gauge.block_transform(1), t1);
716    }
717
718    #[test]
719    fn triangular_gauge_applies_negative_r_off_diagonal() {
720        // Two blocks, raw widths 2 and 2; block 1 keeps 1 column and is
721        // residualised against block 0 by R (2×1).
722        let v_a = Array2::<f64>::eye(2);
723        let mut v_b = Array2::<f64>::zeros((2, 1));
724        v_b[[0, 0]] = 1.0;
725        let mut r_ab = Array2::<f64>::zeros((2, 1));
726        r_ab[[0, 0]] = 0.5;
727        r_ab[[1, 0]] = -0.25;
728        let gauge = Gauge::from_v_and_r(&[v_a, v_b], &[None, Some(r_ab)]);
729
730        let theta = vec![Array1::from(vec![1.0, 2.0]), Array1::from(vec![4.0])];
731        let raw = gauge.lift_block_betas(&theta);
732        // β_a = V_a·θ_a − R_{a→b}·θ_b = [1 − 0.5·4, 2 + 0.25·4] = [−1, 3].
733        assert!((raw[0][0] - (-1.0)).abs() < 1e-14);
734        assert!((raw[0][1] - 3.0).abs() < 1e-14);
735        // β_b = V_b·θ_b = [4, 0].
736        assert!((raw[1][0] - 4.0).abs() < 1e-14);
737        assert!((raw[1][1] - 0.0).abs() < 1e-14);
738    }
739
740    /// For a zero-shift gauge, covariance lift must be the exact pushforward of
741    /// the SAME `T` the β lift applies: for a rank-1 `Σ_θ = θθᵀ`, the lifted
742    /// covariance must equal `(Tθ)(Tθ)ᵀ` built from the lifted β.
743    #[test]
744    fn covariance_lift_is_rank1_consistent_with_beta_lift() {
745        let v_a = Array2::<f64>::eye(2);
746        let mut v_b = Array2::<f64>::zeros((2, 1));
747        v_b[[0, 0]] = 1.0;
748        let mut r_ab = Array2::<f64>::zeros((2, 1));
749        r_ab[[0, 0]] = 0.3;
750        r_ab[[1, 0]] = 0.7;
751        let gauge = Gauge::from_v_and_r(&[v_a, v_b], &[None, Some(r_ab)]);
752
753        let theta = vec![Array1::from(vec![0.8, -1.2]), Array1::from(vec![2.0])];
754        let raw = gauge.lift_block_betas(&theta);
755        let beta_full: Vec<f64> = raw.iter().flat_map(|b| b.iter().copied()).collect();
756
757        let theta_full = Array1::from(vec![0.8, -1.2, 2.0]);
758        let cov_rank1 = {
759            let n = theta_full.len();
760            Array2::from_shape_fn((n, n), |(i, j)| theta_full[i] * theta_full[j])
761        };
762        let lifted = gauge.lift_covariance(&cov_rank1);
763        assert_eq!(lifted.dim(), (4, 4));
764        for i in 0..4 {
765            for j in 0..4 {
766                let expected = beta_full[i] * beta_full[j];
767                assert!(
768                    (lifted[[i, j]] - expected).abs() < 1e-12,
769                    "rank-1 covariance lift must equal (Tθ)(Tθ)ᵀ at ({i},{j}): \
770                     got {} expected {expected}",
771                    lifted[[i, j]],
772                );
773            }
774        }
775    }
776
777    /// `Gauge::sum_to_zero(z)` must lift exactly as `β_raw = z · θ`, and the
778    /// lift must preserve the linear predictor: for any centred design
779    /// `B_c = B · z` and any reduced coefficient `θ`, the raw prediction
780    /// `B · (z · θ)` equals the reduced prediction `B_c · θ`. This is the
781    /// invariant that makes `z` the correct section — a wrong gauge would
782    /// preserve coefficients but break η.
783    #[test]
784    fn sum_to_zero_gauge_lifts_via_z_and_preserves_eta() {
785        // A concrete orthonormal centring section: null space of c = [1,1,1]ᵀ
786        // (the unweighted sum-to-zero constraint on a width-3 block), built as
787        // two orthonormal columns each summing to zero.
788        let s = 1.0 / 2.0_f64.sqrt();
789        let s6 = 1.0 / 6.0_f64.sqrt();
790        let mut z = Array2::<f64>::zeros((3, 2));
791        z[[0, 0]] = s;
792        z[[1, 0]] = -s;
793        z[[2, 0]] = 0.0;
794        z[[0, 1]] = s6;
795        z[[1, 1]] = s6;
796        z[[2, 1]] = -2.0 * s6;
797        // The columns are orthonormal and sum to zero (cᵀz = 0).
798        for j in 0..2 {
799            assert!(
800                (z.column(j).sum()).abs() < 1e-14,
801                "column {j} must sum to 0"
802            );
803            assert!(
804                (z.column(j).dot(&z.column(j)) - 1.0).abs() < 1e-14,
805                "column {j} must be unit norm"
806            );
807        }
808
809        let gauge = Gauge::sum_to_zero(z.clone());
810        assert_eq!(gauge.n_blocks(), 1);
811        assert_eq!(gauge.raw_widths(), vec![3]);
812        assert_eq!(gauge.reduced_widths(), vec![2]);
813        assert_eq!(gauge.block_transform(0), z);
814
815        // Lift β_raw = z · θ exactly.
816        let theta = Array1::from(vec![1.3, -0.7]);
817        let raw = gauge.lift_block_betas(&[theta.clone()]);
818        let expected_raw = z.dot(&theta);
819        for i in 0..3 {
820            assert!((raw[0][i] - expected_raw[i]).abs() < 1e-14);
821        }
822        // Centring is satisfied: the raw coefficients sum to zero.
823        assert!(raw[0].sum().abs() < 1e-14, "lifted β must be centred");
824
825        // η preservation: B · (z · θ) == (B · z) · θ for an arbitrary B.
826        let b = Array2::from_shape_vec(
827            (4, 3),
828            vec![
829                1.0, 2.0, -1.0, 0.5, -0.5, 3.0, 2.0, 1.0, 1.0, -1.0, 0.0, 4.0,
830            ],
831        )
832        .unwrap();
833        let b_c = fast_ab(&b, &z); // the constrained design B_c
834        assert_eq!(gauge.restrict_design(&b), b_c);
835        let eta_reduced = b_c.dot(&theta);
836        let eta_raw = b.dot(&expected_raw);
837        for i in 0..4 {
838            assert!(
839                (eta_reduced[i] - eta_raw[i]).abs() < 1e-13,
840                "η must be invariant under the centring lift at row {i}",
841            );
842        }
843
844        // Covariance pushforward through the SAME z (rank-1 consistency).
845        let cov_rank1 = Array2::from_shape_fn((2, 2), |(i, j)| theta[i] * theta[j]);
846        let lifted = gauge.lift_covariance(&cov_rank1);
847        assert_eq!(lifted.dim(), (3, 3));
848        for i in 0..3 {
849            for j in 0..3 {
850                let expect = expected_raw[i] * expected_raw[j];
851                assert!(
852                    (lifted[[i, j]] - expect).abs() < 1e-13,
853                    "centring covariance lift must equal (zθ)(zθ)ᵀ at ({i},{j})",
854                );
855            }
856        }
857
858        let raw_penalty = Array2::from_shape_vec(
859            (3, 3),
860            vec![2.0, 0.5, 0.0, 0.5, 3.0, -0.25, 0.0, -0.25, 4.0],
861        )
862        .unwrap();
863        let reduced_penalty = gauge.restrict_penalty(&raw_penalty);
864        let expected_reduced_penalty = fast_ab(&fast_atb(&z, &raw_penalty), &z);
865        assert_eq!(reduced_penalty, expected_reduced_penalty);
866    }
867
868    #[test]
869    #[should_panic(expected = "removes at least one direction")]
870    fn sum_to_zero_rejects_identity_section() {
871        // A square z removes no direction — that is not a centring section.
872        drop(Gauge::sum_to_zero(Array2::<f64>::eye(3)));
873    }
874
875    #[test]
876    fn extend_with_identity_passes_extra_blocks_through() {
877        let mut t0 = Array2::<f64>::zeros((2, 1));
878        t0[[0, 0]] = 1.0;
879        let gauge = Gauge::from_block_transforms(&[t0]).extend_with_identity(&[2]);
880        assert_eq!(gauge.n_blocks(), 2);
881        assert_eq!(gauge.raw_total(), 4);
882        assert_eq!(gauge.reduced_total(), 3);
883
884        let theta = vec![Array1::from(vec![3.0]), Array1::from(vec![1.0, -1.0])];
885        let raw = gauge.lift_block_betas(&theta);
886        assert_eq!(raw[0].as_slice().unwrap(), &[3.0, 0.0]);
887        assert_eq!(raw[1].as_slice().unwrap(), &[1.0, -1.0]);
888
889        // Covariance: the extra (untouched) block's diagonal sub-matrix
890        // survives the lift bit-for-bit; the reduced block zero-pads.
891        let mut cov = Array2::<f64>::eye(3);
892        cov[[1, 2]] = 0.25;
893        cov[[2, 1]] = 0.25;
894        let lifted = gauge.lift_covariance(&cov);
895        assert_eq!(lifted.dim(), (4, 4));
896        assert!((lifted[[0, 0]] - 1.0).abs() < 1e-14);
897        assert!(
898            (lifted[[1, 1]] - 0.0).abs() < 1e-14,
899            "dropped raw row has zero variance"
900        );
901        assert!((lifted[[2, 2]] - 1.0).abs() < 1e-14);
902        assert!((lifted[[3, 3]] - 1.0).abs() < 1e-14);
903        assert!((lifted[[2, 3]] - 0.25).abs() < 1e-14);
904    }
905}
gam_problem/gauge.rs

gam_problem/
gauge.rs