Skip to main content

gam_terms/
latent.rs

1//! `LatentCoord` — per-row latent coordinates as a first-class gamfit parameter.
2//!
3//! The Riemannian update path follows manifold GPLVM practice (mGPLVM;
4//! Jensen/Kao/Tran/Stevenson 2020 and related head-direction / population
5//! manifold work): angular, spherical, and product-topology latents are
6//! updated on their natural manifold instead of as Euclidean coordinates
7//! with basis-side periodic hacks. Retractions and Euclidean-to-Riemannian
8//! Hessian conversion follow Absil/Mahony/Sepulchre (2008) and the Manopt /
9//! Pymanopt implementation pattern. In the audit-revised gauge framing, the
10//! Riemannian update is itself a gauge restriction: Circle/Sphere/Torus
11//! structure identifies the latent up to the corresponding global isometry
12//! (for example one rotation per cycle), not up to the full diffeomorphism
13//! group of an unconstrained Euclidean latent chart.
14//!
15//! ## Summary
16//!
17//! `LatentCoordValues` is the structural sibling of [`SpatialLogKappaCoords`]
18//! (see [`crate::smooth`]). Both store a flat `Array1<f64>` that the
19//! REML/IFT outer loop treats as *design-moving, non-penalty-like*
20//! hyper-coordinates. `SpatialLogKappaCoords` holds one or more kernel-shape
21//! coordinates per spatial term. `LatentCoordValues`
22//! holds an `N × d` matrix of per-row latent coordinates `t_n ∈ ℝ^d`.
23//!
24//! For a Duchon (or any radial) basis:
25//!
26//! ```text
27//! Φ_{n,k} = φ(‖t_n − c_k‖),
28//! ∂Φ_{n,k}/∂t_n = φ'(r_{nk}) · (t_n − c_k) / r_{nk}.
29//! ```
30//!
31//! The radial-gradient `φ'(r)` is the same scalar the kernel-shape machinery already
32//! computes via [`crate::basis::duchon_radial_jets`]; the chain rule
33//! `(t_n − c_k)/r_{nk}` is what differs between "differentiate against the
34//! kernel scale" and "differentiate against the first kernel argument t".
35//! Everything downstream of `HyperDesignDerivative::from_implicit` (matrix-free
36//! Newton, IFT cache, persistent warm-start, REML/LAML evaluation) is reused
37//! verbatim.
38//!
39//! ## Gauge fixing
40//!
41//! The bare data-fit `½‖y − Φ(t)β‖²` is invariant under any diffeomorphism
42//! `t ↦ φ(t)` (absorb into a re-fit β), so the inner Hessian in the latent
43//! block is singular and IFT breaks. [`LatentIdMode`] enumerates the
44//! gauge-fix penalties exposed at the configuration layer:
45//!
46//! * [`LatentIdMode::AuxPrior`] — iVAE-style auxiliary-conditional prior
47//!   `R_id(t,u) = ½ μ ‖t − ĥ(u)‖²` where `ĥ` is a small ridge / linear map
48//!   fit internally against the auxiliary `u`. `μ` is REML-selectable like a
49//!   smoothing parameter only when the marginal likelihood includes the
50//!   log-`μ` normalizer, `ĥ` is at least C¹, and the conditional precision is
51//!   positive-definite on the anchored subspace. Under those regularity
52//!   conditions this is the principled identifiability fix (Khemakhem et al.
53//!   2020).
54//! * [`LatentIdMode::DimSelection`] — ARD on each latent axis. One ridge
55//!   penalty per axis; REML drives unused axes' precision to infinity only
56//!   after `AuxPrior` or a future isometry prior fixes the gauge.
57//! * [`LatentIdMode::None`] — no gauge fix. Useful only as an explicit
58//!   opt-out; the caller is responsible for separately providing a unique
59//!   inner minimum (e.g. via a custom penalty).
60//!
61//! [`LatentIdMode::IsometryToReference`] (proposal §4(b)) anchors the latent to
62//! a caller-supplied reference configuration via `½ μ ‖t − reference‖²` with a
63//! REML-selectable `μ`, fixing the gauge without an auxiliary signal `u`.
64
65use gam_problem::LatentRetractionRegistry;
66use crate::basis::{BasisError, RadialScalarKind};
67use ndarray::{Array1, Array2, Array3, ArrayView1, ArrayView2, ArrayView3};
68use std::sync::atomic::{AtomicU64, Ordering};
69const SPHERE_NORMAL_PIN: f64 = 1.0;
70static NEXT_LATENT_COORD_ID: AtomicU64 = AtomicU64::new(1);
71
72fn next_latent_coord_id() -> u64 {
73    NEXT_LATENT_COORD_ID.fetch_add(1, Ordering::Relaxed)
74}
75
76/// Choice of auxiliary-prior conditional mean estimator `ĥ(u)`.
77///
78/// `Ridge` is the cheap default that closes form (one `K_u × K_u` solve);
79/// `Linear` is equivalent to `Ridge` with zero ridge and is intended for
80/// auxiliaries `u` that are already low-dimensional and well-conditioned.
81#[derive(Debug, Clone, Copy)]
82pub enum AuxPriorFamily {
83    /// Ridge regression `t ≈ U · A` with a small diagonal regularizer.
84    /// The default ridge strength is `1e-6 · trace(UᵀU)/p`, which is
85    /// numerically benign and never under-constrains the fit when
86    /// `n_obs > p`.
87    Ridge,
88    /// Plain linear projection (no ridge). Errors out at construction if
89    /// `UᵀU` is singular.
90    Linear,
91}
92
93/// Strength of the auxiliary-prior identifiability penalty.
94///
95/// `Auto` defers the choice to REML — the strength is added to the outer
96/// vector as one extra `ρ`-axis (one log-precision per `LatentCoord`). When
97/// the caller supplies an explicit `Fixed(μ)` the strength is held constant
98/// throughout the fit; useful for warm-starts and reproducibility. The REML
99/// path is valid only with the prior normalizer included, a C¹ conditional
100/// mean map, and positive-definite precision on the anchored subspace.
101#[derive(Debug, Clone, Copy)]
102pub enum AuxPriorStrength {
103    Auto,
104    Fixed(f64),
105}
106
107/// Identifiability / gauge-fix mode for a [`LatentCoordValues`] block.
108///
109/// `AuxPrior` is currently the only standalone gauge-fixing mode; see the
110/// module docstring. `DimSelection` must be paired with `AuxPrior` (or a
111/// future isometry mode) by higher-level assembly before fitting.
112#[derive(Debug, Clone)]
113pub enum LatentIdMode {
114    /// Conditional Gaussian prior `p(t | u)` with mean `ĥ(u)` fit by
115    /// `family`. The penalty contribution is
116    /// `R_id = ½ μ · ‖t − ĥ(u)‖²`. `u` has shape `(n_obs, p)`. If
117    /// `strength == Auto`, REML selection of `μ` requires the log-`μ`
118    /// normalizer, C¹ regularity of `ĥ`, and positive-definiteness on the
119    /// subspace anchored by `u`.
120    AuxPrior {
121        u: Array2<f64>,
122        family: AuxPriorFamily,
123        strength: AuxPriorStrength,
124    },
125    /// Auxiliary prior plus ARD over latent axes. `AuxPrior` supplies the
126    /// identifiability anchor; `init_log_precision` seeds the per-axis ARD
127    /// coordinates.
128    AuxPriorDimSelection {
129        u: Array2<f64>,
130        family: AuxPriorFamily,
131        strength: AuxPriorStrength,
132        init_log_precision: Option<Array1<f64>>,
133    },
134    /// ARD over latent axes. One ridge penalty per latent axis; the per-axis
135    /// log-precision joins the outer ρ vector. `init_log_precision` seeds
136    /// the per-axis ρ — a vector of length `d`. `None` defaults to a flat
137    /// zero seed (precision = 1 on every axis).
138    DimSelection {
139        init_log_precision: Option<Array1<f64>>,
140    },
141    /// Behaviorally-anchored head (issue #912). The auxiliary signal is
142    /// promoted from a fixed-covariate *prior* to a modeled *outcome*: a GLM
143    /// behavioral head `g(E[y|t]) = a + t·w` whose design columns are the
144    /// latent codes contributes a *likelihood* term to the joint objective,
145    /// so REML balances reconstruction vs. behavioral fit with no trade-off
146    /// scalar (magic by default).
147    ///
148    /// The head's coefficients are direct hyperparameters appended to θ (one
149    /// `(1 + d)` block per η-channel), like the AuxPrior log-`μ`. Because a
150    /// single binary label pins ~1 gauge dimension, `AuxOutcome` *composes*
151    /// with `DimSelection` ARD (the `init_log_precision` seed) and the
152    /// isometry pin rather than replacing them; the validator requires that
153    /// composition and rejects a head with no labels.
154    AuxOutcome {
155        head: crate::decoders::behavioral_head::BehavioralHead,
156        /// ARD seed composed with the head, one log-precision per latent axis
157        /// (length `d`). `AuxOutcome` always carries the ARD axis-selection
158        /// alongside the behavioral anchor, since the label alone under-pins
159        /// the gauge. `None` defaults to a flat zero seed.
160        init_log_precision: Option<Array1<f64>>,
161    },
162    /// Anchor the latent configuration to a caller-supplied reference up to
163    /// the global isometry the chosen manifold already quotients out: penalty
164    /// `R_id = ½ μ · ‖t − reference‖²` with REML-selectable `μ` (the log-`μ`
165    /// normalizer enters the marginal likelihood exactly as in `AuxPrior`).
166    /// Unlike `AuxPrior`, the target is a fixed reference configuration (e.g. a
167    /// pilot embedding that fixes the isometry representative) rather than an
168    /// auxiliary-conditional mean `ĥ(u)`, so it pins the gauge with no
169    /// auxiliary signal `u`. `reference` has shape `(n_obs, d)`. As a standalone
170    /// anchor it is a valid gauge fix; it also composes with `DimSelection` ARD.
171    IsometryToReference {
172        reference: Array2<f64>,
173        strength: AuxPriorStrength,
174    },
175    /// No gauge fix. Inner Hessian is rank-deficient; results are not
176    /// uniquely defined. Intended only for the explicit "I supply my own
177    /// gauge constraint via the smoothing penalty" pathway.
178    None,
179}
180
181/// Natural manifold for per-row latent-coordinate updates.
182///
183/// `Euclidean` preserves the original additive update. `Circle` is a scalar
184/// angular coordinate wrapped modulo `2π`. `Sphere { dim }` is the embedded
185/// unit sphere in `R^dim`, with retraction `(t + ξ) / ||t + ξ||`. `Product`
186/// composes these blockwise; inside a product, `Euclidean` denotes one
187/// unconstrained scalar axis.
188#[derive(Debug, Clone, PartialEq, Default)]
189pub enum LatentManifold {
190    /// Unconstrained `R^d` — the current default.
191    #[default]
192    Euclidean,
193    /// Scalar periodic coordinate on `S^1` with caller-supplied period.
194    ///
195    /// Wraps modulo `period`; pass `period = 2π` for radian conventions and
196    /// `period = 1.0` for basis evaluators that interpret the latent as a
197    /// fraction of one period. The metric weight uses `1/period²` so the
198    /// trust-region radius respects the chosen unit.
199    Circle { period: f64 },
200    /// Embedded unit sphere `S^(dim-1)`.
201    Sphere { dim: usize },
202    /// Closed interval in `R`; the retraction clamps to the boundary.
203    Interval { lo: f64, hi: f64 },
204    /// Product manifold, split block-by-block in row-major ambient storage.
205    Product(Vec<LatentManifold>),
206    /// Product manifold with explicit per-axis trust-region metric weights.
207    ///
208    /// Without per-axis weighting, a Product of Circle + Interval treats
209    /// 1 radian as commensurate with the entire bounded range. With weights
210    /// = 1/scale², the trust-region radius respects each axis's natural unit.
211    ProductWithMetric {
212        manifolds: Vec<LatentManifold>,
213        weights: Vec<f64>,
214    },
215}
216
217impl LatentManifold {
218    pub fn is_euclidean(&self) -> bool {
219        matches!(self, Self::Euclidean)
220    }
221
222    /// Whether the Euclidean→Riemannian geometry transform applied by
223    /// `crate::solver::arrow_schur::ArrowSchurSystem::apply_riemannian_latent_geometry`
224    /// is the **identity** on the per-row gradient, `H_tt`, and `H_tβ` blocks
225    /// for *every* coordinate `t` on this chart.
226    ///
227    /// This is the exact condition under which a coupled Gauss-Newton block
228    /// `μ AᵀA = [[htt, cross],[crossᵀ, hbb]]` assembled from one residual
229    /// Jacobian survives the geometry pass with its PSD coherence intact: if
230    /// the transform leaves `htt` and the `htbeta` cross-block untouched, the
231    /// whole block is still `μ AᵀA` (PSD) and its Schur complement is PSD, so
232    /// the isometry cross-coupling can be kept (faster, exact Newton).
233    ///
234    /// A chart that rewrites `htt` with a curvature/connection term or
235    /// column-projects the cross-block (`Sphere`, an active `Interval`
236    /// boundary, any curved `Product` factor) breaks that pairing — the
237    /// cross-block is then no longer matched to diagonals from the same
238    /// Jacobian and the Schur complement can go indefinite (the #681
239    /// circle/sphere failure mode). Such charts must drop the cross-block.
240    ///
241    /// Flat charts (`Euclidean`, `Circle`, and `Product`s built only from
242    /// these) transform as the identity unconditionally — their tangent
243    /// projection is the identity, they carry no connection term, and they add
244    /// no normal pinning — so coherence is preserved and the cross-block is
245    /// kept. `Interval` is excluded: its tangent projection masks coordinates
246    /// at an active boundary (a `t`-dependent projection), which breaks the
247    /// pairing exactly like a curved chart.
248    pub fn preserves_isometry_cross_block_coherence(&self) -> bool {
249        match self {
250            Self::Euclidean | Self::Circle { .. } => true,
251            Self::Sphere { .. } | Self::Interval { .. } => false,
252            Self::Product(parts)
253            | Self::ProductWithMetric {
254                manifolds: parts, ..
255            } => parts
256                .iter()
257                .all(|part| part.preserves_isometry_cross_block_coherence()),
258        }
259    }
260
261    pub fn ambient_dim(&self, fallback_dim: usize) -> usize {
262        match self {
263            Self::Euclidean => fallback_dim,
264            Self::Circle { .. } | Self::Interval { .. } => 1,
265            Self::Sphere { dim } => *dim,
266            Self::Product(parts)
267            | Self::ProductWithMetric {
268                manifolds: parts, ..
269            } => parts.iter().map(|part| part.ambient_dim(1)).sum(),
270        }
271    }
272
273    /// Per-axis weights for the Riemannian trust-region metric.
274    ///
275    /// Defaults use `1/scale²`: Circle scale is `2π`, Sphere scale is `π`,
276    /// Interval scale is `hi - lo`, and Euclidean scale is `1`. Product
277    /// manifolds recurse and concatenate; [`Self::ProductWithMetric`] uses
278    /// the caller-supplied weights directly.
279    pub fn metric_weights(&self) -> Vec<f64> {
280        match self {
281            Self::Euclidean => vec![1.0],
282            Self::Circle { period } => {
283                assert!(
284                    period.is_finite() && *period > 0.0,
285                    "LatentManifold::Circle requires a finite positive period; got {period}"
286                );
287                vec![1.0 / (period * period)]
288            }
289            Self::Sphere { dim } => {
290                let w = 1.0 / (std::f64::consts::PI * std::f64::consts::PI);
291                vec![w; *dim]
292            }
293            Self::Interval { lo, hi } => {
294                let scale = hi - lo;
295                assert!(
296                    scale.is_finite() && scale > 0.0,
297                    "LatentManifold::Interval requires finite lo < hi; got lo={lo}, hi={hi}"
298                );
299                vec![1.0 / (scale * scale)]
300            }
301            Self::Product(parts) => {
302                let mut out = Vec::with_capacity(self.ambient_dim(1));
303                for part in parts {
304                    out.extend(part.metric_weights());
305                }
306                out
307            }
308            Self::ProductWithMetric { manifolds, weights } => {
309                let expected: usize = manifolds.iter().map(|part| part.ambient_dim(1)).sum();
310                assert_eq!(
311                    weights.len(),
312                    expected,
313                    "LatentManifold::ProductWithMetric weights length must match ambient dimension"
314                );
315                weights.clone()
316            }
317        }
318    }
319
320    /// Per-ambient-axis periodicity: `Some(period)` for an axis that wraps
321    /// modulo a finite period (a `Circle` factor, including the longitude of
322    /// the lat/lon sphere chart), `None` for a non-periodic axis (Euclidean,
323    /// Interval, or an embedded `Sphere` axis whose retraction is smooth and
324    /// has no cut).
325    ///
326    /// Used by the SAE-manifold ARD prior to switch from the cut-discontinuous
327    /// Euclidean `½α t²` to a smooth von-Mises energy on periodic axes. The
328    /// embedded `Sphere` is deliberately reported as non-periodic: its
329    /// retraction `(t+ξ)/‖t+ξ‖` is globally smooth, so the ambient `½α‖t‖²`
330    /// prior has no discontinuity there.
331    pub fn axis_periods(&self) -> Vec<Option<f64>> {
332        match self {
333            Self::Euclidean => vec![None],
334            Self::Circle { period } => {
335                assert!(
336                    period.is_finite() && *period > 0.0,
337                    "LatentManifold::Circle requires a finite positive period; got {period}"
338                );
339                vec![Some(*period)]
340            }
341            Self::Sphere { dim } => vec![None; *dim],
342            Self::Interval { .. } => vec![None],
343            Self::Product(parts) => {
344                let mut out = Vec::with_capacity(self.ambient_dim(1));
345                for part in parts {
346                    out.extend(part.axis_periods());
347                }
348                out
349            }
350            Self::ProductWithMetric { manifolds, .. } => {
351                let mut out = Vec::with_capacity(self.ambient_dim(1));
352                for part in manifolds {
353                    out.extend(part.axis_periods());
354                }
355                out
356            }
357        }
358    }
359
360    /// Project an arbitrary ambient point back to the manifold.
361    pub fn project_point(&self, t: ArrayView1<'_, f64>) -> Array1<f64> {
362        match self {
363            Self::Euclidean => t.to_owned(),
364            Self::Circle { period } => {
365                let mut out = Array1::<f64>::zeros(1);
366                out[0] = wrap_to_period(t[0], *period);
367                out
368            }
369            Self::Sphere { dim } => {
370                assert_eq!(t.len(), *dim);
371                normalize_or_axis(t, *dim)
372            }
373            Self::Interval { lo, hi } => {
374                // Order the bounds defensively: `f64::clamp` panics if min > max,
375                // so a reversed `Interval { lo, hi }` would otherwise crash deep
376                // in projection rather than clamp into the intended range.
377                let (lo, hi) = if lo <= hi { (*lo, *hi) } else { (*hi, *lo) };
378                let mut out = Array1::<f64>::zeros(1);
379                out[0] = t[0].clamp(lo, hi);
380                out
381            }
382            Self::Product(parts)
383            | Self::ProductWithMetric {
384                manifolds: parts, ..
385            } => {
386                let mut out = Array1::<f64>::zeros(t.len());
387                let mut offset = 0_usize;
388                for part in parts {
389                    let dim = part.ambient_dim(1);
390                    let projected = part.project_point(t.slice(ndarray::s![offset..offset + dim]));
391                    for a in 0..dim {
392                        out[offset + a] = projected[a];
393                    }
394                    offset += dim;
395                }
396                assert_eq!(offset, t.len());
397                out
398            }
399        }
400    }
401
402    /// Retraction `R_t(ξ)`, using closed-form analytic maps for every variant.
403    pub fn retract(&self, t: ArrayView1<'_, f64>, xi: ArrayView1<'_, f64>) -> Array1<f64> {
404        assert_eq!(t.len(), xi.len());
405        match self {
406            Self::Euclidean => {
407                let mut out = t.to_owned();
408                for a in 0..out.len() {
409                    out[a] += xi[a];
410                }
411                out
412            }
413            Self::Circle { period } => {
414                let mut out = Array1::<f64>::zeros(1);
415                out[0] = wrap_to_period(t[0] + xi[0], *period);
416                out
417            }
418            Self::Sphere { dim } => {
419                assert_eq!(t.len(), *dim);
420                let mut y = Array1::<f64>::zeros(*dim);
421                for a in 0..*dim {
422                    y[a] = t[a] + xi[a];
423                }
424                normalize_or_axis(y.view(), *dim)
425            }
426            Self::Interval { lo, hi } => {
427                // Order the bounds defensively: `f64::clamp` panics if min > max,
428                // so a reversed `Interval { lo, hi }` would otherwise crash the
429                // retraction instead of clamping into the intended range.
430                let (lo, hi) = if lo <= hi { (*lo, *hi) } else { (*hi, *lo) };
431                let mut out = Array1::<f64>::zeros(1);
432                out[0] = (t[0] + xi[0]).clamp(lo, hi);
433                out
434            }
435            Self::Product(parts)
436            | Self::ProductWithMetric {
437                manifolds: parts, ..
438            } => {
439                let mut out = Array1::<f64>::zeros(t.len());
440                let mut offset = 0_usize;
441                for part in parts {
442                    let dim = part.ambient_dim(1);
443                    let next = part.retract(
444                        t.slice(ndarray::s![offset..offset + dim]),
445                        xi.slice(ndarray::s![offset..offset + dim]),
446                    );
447                    for a in 0..dim {
448                        out[offset + a] = next[a];
449                    }
450                    offset += dim;
451                }
452                assert_eq!(offset, t.len());
453                out
454            }
455        }
456    }
457
458    /// Orthogonal projection of an ambient vector onto `T_t M`.
459    pub fn project_to_tangent(
460        &self,
461        t: ArrayView1<'_, f64>,
462        v: ArrayView1<'_, f64>,
463    ) -> Array1<f64> {
464        assert_eq!(t.len(), v.len());
465        match self {
466            Self::Euclidean | Self::Circle { .. } => v.to_owned(),
467            Self::Sphere { dim } => {
468                assert_eq!(t.len(), *dim);
469                let tv = dot_views(t, v);
470                let mut out = v.to_owned();
471                for a in 0..*dim {
472                    out[a] -= tv * t[a];
473                }
474                out
475            }
476            Self::Interval { lo, hi } => {
477                let mut out = Array1::<f64>::zeros(1);
478                let at_lo = t[0] <= *lo && v[0] < 0.0;
479                let at_hi = t[0] >= *hi && v[0] > 0.0;
480                out[0] = if at_lo || at_hi { 0.0 } else { v[0] };
481                out
482            }
483            Self::Product(parts)
484            | Self::ProductWithMetric {
485                manifolds: parts, ..
486            } => {
487                let mut out = Array1::<f64>::zeros(v.len());
488                let mut offset = 0_usize;
489                for part in parts {
490                    let dim = part.ambient_dim(1);
491                    let projected = part.project_to_tangent(
492                        t.slice(ndarray::s![offset..offset + dim]),
493                        v.slice(ndarray::s![offset..offset + dim]),
494                    );
495                    for a in 0..dim {
496                        out[offset + a] = projected[a];
497                    }
498                    offset += dim;
499                }
500                assert_eq!(offset, v.len());
501                out
502            }
503        }
504    }
505
506    /// Project an objective gradient onto the linearized feasible update space.
507    ///
508    /// For smooth manifolds this is the usual tangent projection. For interval
509    /// endpoints the sign test is applied to the descent direction `-g`: at the
510    /// upper endpoint, a negative gradient would step outward, so the coordinate
511    /// is held fixed; at the lower endpoint, a positive gradient would step
512    /// outward. This is distinct from [`Self::project_to_tangent`], whose
513    /// interval branch projects update velocities.
514    pub fn project_gradient_to_tangent(
515        &self,
516        t: ArrayView1<'_, f64>,
517        g: ArrayView1<'_, f64>,
518    ) -> Array1<f64> {
519        assert_eq!(t.len(), g.len());
520        match self {
521            Self::Euclidean | Self::Circle { .. } | Self::Sphere { .. } => {
522                self.project_to_tangent(t, g)
523            }
524            Self::Interval { lo, hi } => {
525                let mut out = Array1::<f64>::zeros(1);
526                let descent_exits_lo = t[0] <= *lo && g[0] > 0.0;
527                let descent_exits_hi = t[0] >= *hi && g[0] < 0.0;
528                out[0] = if descent_exits_lo || descent_exits_hi {
529                    0.0
530                } else {
531                    g[0]
532                };
533                out
534            }
535            Self::Product(parts)
536            | Self::ProductWithMetric {
537                manifolds: parts, ..
538            } => {
539                let mut out = Array1::<f64>::zeros(g.len());
540                let mut offset = 0_usize;
541                for part in parts {
542                    let dim = part.ambient_dim(1);
543                    let projected = part.project_gradient_to_tangent(
544                        t.slice(ndarray::s![offset..offset + dim]),
545                        g.slice(ndarray::s![offset..offset + dim]),
546                    );
547                    for a in 0..dim {
548                        out[offset + a] = projected[a];
549                    }
550                    offset += dim;
551                }
552                assert_eq!(offset, g.len());
553                out
554            }
555        }
556    }
557
558    /// Project a coordinate-space Jacobian/cross-block column with the same
559    /// active interval coordinates selected by
560    /// [`Self::project_gradient_to_tangent`].
561    pub fn project_vector_to_gradient_tangent(
562        &self,
563        t: ArrayView1<'_, f64>,
564        g: ArrayView1<'_, f64>,
565        v: ArrayView1<'_, f64>,
566    ) -> Array1<f64> {
567        assert_eq!(t.len(), g.len());
568        assert_eq!(t.len(), v.len());
569        match self {
570            Self::Euclidean | Self::Circle { .. } | Self::Sphere { .. } => {
571                self.project_to_tangent(t, v)
572            }
573            Self::Interval { lo, hi } => {
574                let mut out = Array1::<f64>::zeros(1);
575                let descent_exits_lo = t[0] <= *lo && g[0] > 0.0;
576                let descent_exits_hi = t[0] >= *hi && g[0] < 0.0;
577                out[0] = if descent_exits_lo || descent_exits_hi {
578                    0.0
579                } else {
580                    v[0]
581                };
582                out
583            }
584            Self::Product(parts)
585            | Self::ProductWithMetric {
586                manifolds: parts, ..
587            } => {
588                let mut out = Array1::<f64>::zeros(v.len());
589                let mut offset = 0_usize;
590                for part in parts {
591                    let dim = part.ambient_dim(1);
592                    let projected = part.project_vector_to_gradient_tangent(
593                        t.slice(ndarray::s![offset..offset + dim]),
594                        g.slice(ndarray::s![offset..offset + dim]),
595                        v.slice(ndarray::s![offset..offset + dim]),
596                    );
597                    for a in 0..dim {
598                        out[offset + a] = projected[a];
599                    }
600                    offset += dim;
601                }
602                assert_eq!(offset, v.len());
603                out
604            }
605        }
606    }
607
608    /// Project every column of `matrix` with
609    /// [`Self::project_vector_to_gradient_tangent`].
610    pub fn project_matrix_columns_to_gradient_tangent(
611        &self,
612        t: ArrayView1<'_, f64>,
613        g: ArrayView1<'_, f64>,
614        matrix: ArrayView2<'_, f64>,
615    ) -> Array2<f64> {
616        let mut out = Array2::<f64>::zeros(matrix.dim());
617        assert_eq!(matrix.nrows(), t.len());
618        for col_idx in 0..matrix.ncols() {
619            let col = self.project_vector_to_gradient_tangent(t, g, matrix.column(col_idx));
620            for row_idx in 0..matrix.nrows() {
621                out[[row_idx, col_idx]] = col[row_idx];
622            }
623        }
624        out
625    }
626
627    /// Convert Euclidean Hessian action `eh · xi` to Riemannian Hessian action.
628    ///
629    /// For the sphere this is the Absil/Mahony/Sepulchre embedded-sphere
630    /// conversion: differentiate the projected gradient and project back to
631    /// the tangent space. The ambient derivative includes the normal
632    /// curvature term `-<grad_R, ξ> t`; the tangent action is equivalent to
633    /// `P_t(eh ξ) - <eg, t> ξ`.
634    pub fn euclidean_to_riemannian_hessian(
635        &self,
636        t: ArrayView1<'_, f64>,
637        eg: ArrayView1<'_, f64>,
638        eh: ArrayView2<'_, f64>,
639        xi: ArrayView1<'_, f64>,
640    ) -> Array1<f64> {
641        assert_eq!(t.len(), eg.len());
642        assert_eq!(t.len(), xi.len());
643        assert_eq!(eh.nrows(), t.len());
644        assert_eq!(eh.ncols(), t.len());
645        let eh_xi = matvec(eh, xi);
646        self.euclidean_hessian_action_to_riemannian(t, eg, xi, eh_xi.view())
647    }
648
649    fn euclidean_hessian_action_to_riemannian(
650        &self,
651        t: ArrayView1<'_, f64>,
652        eg: ArrayView1<'_, f64>,
653        xi: ArrayView1<'_, f64>,
654        eh_xi: ArrayView1<'_, f64>,
655    ) -> Array1<f64> {
656        assert_eq!(t.len(), eg.len());
657        assert_eq!(t.len(), xi.len());
658        assert_eq!(t.len(), eh_xi.len());
659        match self {
660            Self::Euclidean | Self::Circle { .. } => self.project_to_tangent(t, eh_xi),
661            Self::Interval { .. } => self.project_vector_to_gradient_tangent(t, eg, eh_xi),
662            Self::Sphere { dim } => {
663                assert_eq!(t.len(), *dim);
664                let grad_r = self.project_to_tangent(t, eg);
665                let mut ambient = self.project_to_tangent(t, eh_xi);
666                let eg_normal = dot_views(eg, t);
667                let normal_curve = dot_views(grad_r.view(), xi);
668                for a in 0..*dim {
669                    ambient[a] -= eg_normal * xi[a];
670                    ambient[a] -= normal_curve * t[a];
671                }
672                self.project_to_tangent(t, ambient.view())
673            }
674            Self::Product(parts)
675            | Self::ProductWithMetric {
676                manifolds: parts, ..
677            } => {
678                let mut out = Array1::<f64>::zeros(t.len());
679                let mut offset = 0_usize;
680                for part in parts {
681                    let dim = part.ambient_dim(1);
682                    let converted = part.euclidean_hessian_action_to_riemannian(
683                        t.slice(ndarray::s![offset..offset + dim]),
684                        eg.slice(ndarray::s![offset..offset + dim]),
685                        xi.slice(ndarray::s![offset..offset + dim]),
686                        eh_xi.slice(ndarray::s![offset..offset + dim]),
687                    );
688                    for a in 0..dim {
689                        out[offset + a] = converted[a];
690                    }
691                    offset += dim;
692                }
693                assert_eq!(offset, t.len());
694                out
695            }
696        }
697    }
698
699    /// Dense ambient matrix representation of the tangent Hessian action.
700    ///
701    /// Normal directions are pinned with an identity block for embedded
702    /// constrained factors so existing BA Cholesky code can factor the ambient
703    /// matrix while RHS/cross blocks stay tangent-projected.
704    pub fn riemannian_hessian_matrix(
705        &self,
706        t: ArrayView1<'_, f64>,
707        eg: ArrayView1<'_, f64>,
708        eh: ArrayView2<'_, f64>,
709    ) -> Array2<f64> {
710        let d = t.len();
711        let mut out = Array2::<f64>::zeros((d, d));
712        let mut xi = Array1::<f64>::zeros(d);
713        for a in 0..d {
714            xi.fill(0.0);
715            xi[a] = 1.0;
716            let tangent_xi = self.project_vector_to_gradient_tangent(t, eg, xi.view());
717            let col = self.euclidean_to_riemannian_hessian(t, eg, eh, tangent_xi.view());
718            for b in 0..d {
719                out[[b, a]] = col[b];
720            }
721        }
722        self.add_normal_pinning(t, &mut out);
723        symmetrize(&mut out);
724        out
725    }
726
727    /// Project every column of an ambient matrix into `T_t M`.
728    pub fn project_matrix_columns_to_tangent(
729        &self,
730        t: ArrayView1<'_, f64>,
731        matrix: ArrayView2<'_, f64>,
732    ) -> Array2<f64> {
733        let mut out = Array2::<f64>::zeros(matrix.dim());
734        self.project_matrix_columns_to_tangent_into(t, matrix, out.view_mut());
735        out
736    }
737
738    /// In-place column-wise tangent projection: writes the projection of every
739    /// column of `matrix` into the matching column of `out`. Both `matrix` and
740    /// `out` must have shape `(ambient_dim × ncols)`. Callers that project the
741    /// same `(q × p)` scratch every row hoist `out` outside the loop to avoid
742    /// reallocating an `Array2` per row; the projection itself reuses the
743    /// allocation-free [`Self::project_to_tangent`] per column.
744    pub fn project_matrix_columns_to_tangent_into(
745        &self,
746        t: ArrayView1<'_, f64>,
747        matrix: ArrayView2<'_, f64>,
748        mut out: ndarray::ArrayViewMut2<'_, f64>,
749    ) {
750        assert_eq!(
751            matrix.dim(),
752            out.dim(),
753            "project_matrix_columns_to_tangent_into: matrix {:?} != out {:?}",
754            matrix.dim(),
755            out.dim(),
756        );
757        for col_idx in 0..matrix.ncols() {
758            let col = self.project_to_tangent(t, matrix.column(col_idx));
759            for row_idx in 0..matrix.nrows() {
760                out[[row_idx, col_idx]] = col[row_idx];
761            }
762        }
763    }
764
765    fn add_normal_pinning(&self, t: ArrayView1<'_, f64>, matrix: &mut Array2<f64>) {
766        match self {
767            Self::Sphere { dim } => {
768                assert_eq!(t.len(), *dim);
769                for a in 0..*dim {
770                    for b in 0..*dim {
771                        matrix[[a, b]] += SPHERE_NORMAL_PIN * t[a] * t[b];
772                    }
773                }
774            }
775            Self::Product(parts)
776            | Self::ProductWithMetric {
777                manifolds: parts, ..
778            } => {
779                let mut offset = 0_usize;
780                for part in parts {
781                    let dim = part.ambient_dim(1);
782                    let mut block =
783                        matrix.slice_mut(ndarray::s![offset..offset + dim, offset..offset + dim]);
784                    let mut owned = block.to_owned();
785                    part.add_normal_pinning(t.slice(ndarray::s![offset..offset + dim]), &mut owned);
786                    block.assign(&owned);
787                    offset += dim;
788                }
789            }
790            Self::Euclidean | Self::Circle { .. } | Self::Interval { .. } => {}
791        }
792    }
793}
794
795impl LatentIdMode {
796    /// Fixes the audit finding that ARD/DimSelection alone is rotation
797    /// symmetric and therefore not a standalone identifiability mode.
798    pub fn is_identifiable(&self) -> bool {
799        match self {
800            Self::AuxPrior { .. } | Self::AuxPriorDimSelection { .. } => true,
801            // A fixed-reference anchor pins the full gauge on its own.
802            Self::IsometryToReference { .. } => true,
803            // The behavioral head anchors the gauge through the label channel
804            // and always composes with ARD axis-selection; it is a standalone
805            // identifiable mode provided the head actually carries labels (an
806            // empty head pins nothing, rejected by `validate`).
807            Self::AuxOutcome { head, .. } => head.effective_labeled_count() > 0.0,
808            Self::DimSelection { .. } | Self::None => false,
809        }
810    }
811
812    /// Validate the mode's identifiability composition (issue #912 step 2).
813    ///
814    /// `AuxOutcome` must carry a non-vacuous head (at least one labeled row)
815    /// and composes with ARD — a bare label channel with no axis-selection
816    /// under-pins the gauge. Returns the offending reason on failure so the
817    /// builder can reject before fitting. (The former `reject_dim_selection_alone`
818    /// guard was unified here into the Result path for a panic-free gate.)
819    pub fn validate(&self) -> Result<(), String> {
820        if matches!(self, Self::DimSelection { .. }) {
821            // `DimSelection` alone is rotation-symmetric — not a valid
822            // gauge fix; callers must pair ARD with `AuxPrior`/`Isometry`.
823            // Beautiful unification: return a proper error instead of a
824            // panic guard (removes the tracked ban stub while keeping the
825            // gate).
826            return Err("LatentIdMode::DimSelection is not a standalone gauge fix; \
827                 pair ARD with AuxPrior or Isometry"
828                .to_string());
829        }
830        if let Self::AuxOutcome { head, .. } = self
831            && head.effective_labeled_count() <= 0.0
832        {
833            return Err(
834                "LatentIdMode::AuxOutcome: the behavioral head has no labeled rows \
835                 (Σ row-weights = 0); a label-free head pins no gauge dimension. \
836                 Provide labels or use AuxPrior/DimSelection composition."
837                    .to_string(),
838            );
839        }
840        Ok(())
841    }
842}
843
844/// Carrier for the `∂Φ/∂t` chain-rule input, dispatched on basis kind by
845/// [`LatentCoordValues::design_gradient_wrt_t_dispatch`].
846///
847/// * [`InputLocationDerivative::Radial`] is the *radial-kernel* path: the
848///   caller supplies the radial kernel family together with the center
849///   coordinates, and the chain rule
850///   `∂Φ/∂t = q(r) · (t − c)` is applied internally. This covers every
851///   isotropic radial basis — Duchon (any nullspace order), Matérn (every
852///   supported half-integer ν), and anything else whose pointwise
853///   gradient is radial. Helpers:
854///   [`crate::basis::duchon_radial_first_derivative_nd`],
855///   [`crate::basis::matern_radial_first_derivative_nd`].
856/// * [`InputLocationDerivative::Jet`] is the *pre-computed jet* path: the
857///   caller has already assembled a closed-form `(N, K, d)` tensor for a
858///   basis whose chain rule is not a simple radial scalar times a unit
859///   vector. Sphere kernels carry the tangent-direction times `K'(cos γ)`;
860///   periodic-cyclic B-splines carry the closed-form cardinal derivative;
861///   tensor-product B-splines carry the product-rule mix. Helpers:
862///   [`crate::basis::sphere_first_derivative_nd`],
863///   [`crate::basis::periodic_bspline_first_derivative_nd`],
864///   [`crate::basis::bspline_tensor_first_derivative`].
865///
866/// The dispatch is an enum rather than a trait because each path's
867/// arguments differ structurally (radial bases reuse scalar radial kernels shared with
868/// the kernel-shape chain machinery; jet bases ship the full tensor). All chain rules
869/// are analytic and closed-form; no autodiff, no finite differences.
870pub enum InputLocationDerivative<'a> {
871    /// Radial-kernel chain rule. The chain rule `(t − c)/r` is reconstructed
872    /// internally from the finite `q = φ'(r)/r` scalar and the center coordinates.
873    Radial {
874        centers: ArrayView2<'a, f64>,
875        radial_kind: &'a RadialScalarKind,
876    },
877    /// Pre-computed analytic `(n_obs, n_centers, latent_dim)` jet.
878    Jet(ArrayView3<'a, f64>),
879}
880
881/// Per-row latent coordinates `t ∈ ℝ^{N × d}` stored as a flat
882/// row-major `Array1<f64>` of length `n_obs * latent_dim`.
883///
884/// The flat-`Array1` layout mirrors [`crate::smooth::SpatialLogKappaCoords`]
885/// so the same `HyperDesignDerivative::from_implicit` / `DirectionalHyperParam`
886/// outer plumbing can consume it without modification.
887#[derive(Debug, Clone)]
888pub struct LatentCoordValues {
889    /// Stable process-local identity for this latent-coordinate block.
890    id: u64,
891    /// Flattened (n_obs, latent_dim) latent matrix, row-major
892    /// (so `values[n * d + k] = t_n[k]`).
893    values: Array1<f64>,
894    /// Number of rows `N`.
895    n_obs: usize,
896    /// Number of latent dimensions `d`.
897    latent_dim: usize,
898    /// Identifiability / gauge-fix mode.
899    id_mode: LatentIdMode,
900    /// Manifold used for per-row Riemannian updates.
901    manifold: LatentManifold,
902    /// Explicit update-side retraction. The empty registry is Euclidean.
903    retraction_registry: LatentRetractionRegistry,
904}
905
906impl LatentCoordValues {
907    /// Construct from a dense `(n_obs, latent_dim)` matrix.
908    pub fn from_matrix(matrix: ArrayView2<'_, f64>, id_mode: LatentIdMode) -> Self {
909        Self::from_matrix_with_manifold(matrix, id_mode, LatentManifold::Euclidean)
910    }
911
912    /// Construct from a dense matrix and explicit latent manifold.
913    pub fn from_matrix_with_manifold(
914        matrix: ArrayView2<'_, f64>,
915        id_mode: LatentIdMode,
916        manifold: LatentManifold,
917    ) -> Self {
918        Self::from_matrix_with_manifold_and_retraction(
919            matrix,
920            id_mode,
921            manifold,
922            LatentRetractionRegistry::all_euclidean(),
923        )
924    }
925
926    pub fn from_matrix_with_manifold_and_retraction(
927        matrix: ArrayView2<'_, f64>,
928        id_mode: LatentIdMode,
929        manifold: LatentManifold,
930        retraction_registry: LatentRetractionRegistry,
931    ) -> Self {
932        id_mode
933            .validate()
934            .expect("invalid LatentIdMode for LatentCoordValues::from_matrix_with_manifold");
935        let n_obs = matrix.nrows();
936        let latent_dim = matrix.ncols();
937        retraction_registry
938            .validate_dim(latent_dim, "LatentCoordValues::from_matrix_with_manifold")
939            .expect("invalid latent retraction dimension");
940        let mut values = Array1::<f64>::zeros(n_obs * latent_dim);
941        for n in 0..n_obs {
942            for k in 0..latent_dim {
943                values[n * latent_dim + k] = matrix[[n, k]];
944            }
945        }
946        let mut out = Self {
947            id: next_latent_coord_id(),
948            values,
949            n_obs,
950            latent_dim,
951            id_mode,
952            manifold,
953            retraction_registry,
954        };
955        out.project_all_rows_to_manifold();
956        out
957    }
958
959    /// Construct directly from a flat (`n_obs * latent_dim`) array.
960    pub fn from_flat(
961        values: Array1<f64>,
962        n_obs: usize,
963        latent_dim: usize,
964        id_mode: LatentIdMode,
965    ) -> Self {
966        Self::from_flat_with_manifold(
967            values,
968            n_obs,
969            latent_dim,
970            id_mode,
971            LatentManifold::Euclidean,
972        )
973    }
974
975    /// Construct directly from a flat array and explicit latent manifold.
976    pub fn from_flat_with_manifold(
977        values: Array1<f64>,
978        n_obs: usize,
979        latent_dim: usize,
980        id_mode: LatentIdMode,
981        manifold: LatentManifold,
982    ) -> Self {
983        Self::from_flat_with_manifold_and_retraction_and_id(
984            values,
985            n_obs,
986            latent_dim,
987            id_mode,
988            manifold,
989            LatentRetractionRegistry::all_euclidean(),
990            next_latent_coord_id(),
991        )
992    }
993
994    pub fn from_flat_with_manifold_and_retraction_and_id(
995        values: Array1<f64>,
996        n_obs: usize,
997        latent_dim: usize,
998        id_mode: LatentIdMode,
999        manifold: LatentManifold,
1000        retraction_registry: LatentRetractionRegistry,
1001        id: u64,
1002    ) -> Self {
1003        id_mode
1004            .validate()
1005            .expect("invalid LatentIdMode for LatentCoordValues::from_flat");
1006        assert_eq!(
1007            values.len(),
1008            n_obs * latent_dim,
1009            "LatentCoordValues::from_flat: length {} != n_obs * latent_dim = {}",
1010            values.len(),
1011            n_obs * latent_dim
1012        );
1013        retraction_registry
1014            .validate_dim(latent_dim, "LatentCoordValues::from_flat_with_manifold")
1015            .expect("invalid latent retraction dimension");
1016        let mut out = Self {
1017            id,
1018            values,
1019            n_obs,
1020            latent_dim,
1021            id_mode,
1022            manifold,
1023            retraction_registry,
1024        };
1025        out.project_all_rows_to_manifold();
1026        out
1027    }
1028
1029    pub fn latent_id(&self) -> u64 {
1030        self.id
1031    }
1032
1033    pub fn n_obs(&self) -> usize {
1034        self.n_obs
1035    }
1036
1037    pub fn latent_dim(&self) -> usize {
1038        self.latent_dim
1039    }
1040
1041    /// Total length of the flat value array (= `n_obs * latent_dim`).
1042    pub fn len(&self) -> usize {
1043        self.values.len()
1044    }
1045
1046    pub fn is_empty(&self) -> bool {
1047        self.values.is_empty()
1048    }
1049
1050    pub fn id_mode(&self) -> &LatentIdMode {
1051        &self.id_mode
1052    }
1053
1054    pub fn manifold(&self) -> &LatentManifold {
1055        &self.manifold
1056    }
1057
1058    pub fn retraction_registry(&self) -> &LatentRetractionRegistry {
1059        &self.retraction_registry
1060    }
1061
1062    /// Effective "is all Euclidean" check used by the inner solver:
1063    /// returns `true` only when *both* the declared `LatentManifold` and the
1064    /// optional override retraction registry are Euclidean. The registry's
1065    /// own `is_all_euclidean` answers a strictly narrower question (was an
1066    /// explicit non-Euclidean override installed?) and would silently miss
1067    /// non-Euclidean manifolds installed via `from_matrix_with_manifold` /
1068    /// `with_manifold`, which left the registry at its `all_euclidean`
1069    /// default. See `retract_flat_delta` for the matching update path.
1070    pub fn effective_is_all_euclidean(&self) -> bool {
1071        self.manifold.is_euclidean() && self.retraction_registry.is_all_euclidean()
1072    }
1073
1074    /// Effective per-axis trust-region metric weights. When the manifold is
1075    /// non-Euclidean it is the authoritative geometric description (it
1076    /// covers `Interval` and `ProductWithMetric`, which the registry's
1077    /// `RetractionKind` cannot express), so we read weights from it. When
1078    /// the manifold is Euclidean but an explicit override retraction was
1079    /// supplied (e.g. via the JSON `retraction:` key) the registry's
1080    /// weights win.
1081    pub fn effective_metric_weights(&self) -> Vec<f64> {
1082        if self.manifold.is_euclidean() {
1083            self.retraction_registry.metric_weights(self.latent_dim)
1084        } else {
1085            self.manifold.metric_weights()
1086        }
1087    }
1088
1089    /// Effective per-axis periodicity (`Some(period)` on wrapped axes). When
1090    /// the declared manifold is non-Euclidean it is authoritative; when it is
1091    /// Euclidean, an explicit override retraction (if any) decides. Returns a
1092    /// `Vec` of length `latent_dim`.
1093    pub fn effective_axis_periods(&self) -> Vec<Option<f64>> {
1094        let periods = if self.manifold.is_euclidean() {
1095            self.retraction_registry.axis_periods(self.latent_dim)
1096        } else {
1097            self.manifold.axis_periods()
1098        };
1099        assert_eq!(
1100            periods.len(),
1101            self.latent_dim,
1102            "effective_axis_periods length {} != latent_dim {}",
1103            periods.len(),
1104            self.latent_dim
1105        );
1106        periods
1107    }
1108
1109    pub fn with_manifold(&self, manifold: LatentManifold) -> Self {
1110        Self::from_flat_with_manifold_and_retraction_and_id(
1111            self.values.clone(),
1112            self.n_obs,
1113            self.latent_dim,
1114            self.id_mode.clone(),
1115            manifold,
1116            self.retraction_registry.clone(),
1117            self.id,
1118        )
1119    }
1120
1121    /// View the flat value array.
1122    pub fn as_flat(&self) -> &Array1<f64> {
1123        &self.values
1124    }
1125
1126    /// View row `n` as a length-`d` slice.
1127    pub fn row(&self, n: usize) -> &[f64] {
1128        let start = n * self.latent_dim;
1129        let end = start + self.latent_dim;
1130        &self.values.as_slice().expect("contiguous")[start..end]
1131    }
1132
1133    /// Materialize as a dense `(n_obs, latent_dim)` matrix view.
1134    /// Useful when handing `t` to a row-major basis evaluator
1135    /// (e.g. `build_duchon_basis`).
1136    pub fn as_matrix(&self) -> Array2<f64> {
1137        let mut out = Array2::<f64>::zeros((self.n_obs, self.latent_dim));
1138        for n in 0..self.n_obs {
1139            for k in 0..self.latent_dim {
1140                out[[n, k]] = self.values[n * self.latent_dim + k];
1141            }
1142        }
1143        out
1144    }
1145
1146    /// Mutable write back of the flat value array, e.g. after a Newton step.
1147    pub fn set_flat(&mut self, flat: ArrayView1<'_, f64>) {
1148        assert_eq!(flat.len(), self.values.len());
1149        self.values.assign(&flat);
1150        self.project_all_rows_to_manifold();
1151    }
1152
1153    /// Apply a flat tangent update row-by-row through the manifold retraction.
1154    pub fn retract_flat_delta(&mut self, delta: ArrayView1<'_, f64>) {
1155        assert_eq!(delta.len(), self.values.len());
1156        if self.retraction_registry.is_all_euclidean() {
1157            if self.manifold.is_euclidean() {
1158                for (t, dt) in self.values.iter_mut().zip(delta.iter()) {
1159                    *t += *dt;
1160                }
1161                return;
1162            }
1163            assert_eq!(
1164                self.manifold.ambient_dim(self.latent_dim),
1165                self.latent_dim,
1166                "LatentCoordValues::retract_flat_delta: manifold ambient dim does not match latent_dim",
1167            );
1168            for n in 0..self.n_obs {
1169                let start = n * self.latent_dim;
1170                let end = start + self.latent_dim;
1171                let next = self.manifold.retract(
1172                    self.values.slice(ndarray::s![start..end]),
1173                    delta.slice(ndarray::s![start..end]),
1174                );
1175                for a in 0..self.latent_dim {
1176                    self.values[start + a] = next[a];
1177                }
1178            }
1179            return;
1180        }
1181        for n in 0..self.n_obs {
1182            let start = n * self.latent_dim;
1183            let end = start + self.latent_dim;
1184            let mut current = self.values.slice_mut(ndarray::s![start..end]);
1185            let xi = delta.slice(ndarray::s![start..end]);
1186            self.retraction_registry.retract(&mut current, xi);
1187        }
1188    }
1189
1190    fn project_all_rows_to_manifold(&mut self) {
1191        if self.manifold.is_euclidean() {
1192            return;
1193        }
1194        assert_eq!(self.manifold.ambient_dim(self.latent_dim), self.latent_dim);
1195        for n in 0..self.n_obs {
1196            let start = n * self.latent_dim;
1197            let end = start + self.latent_dim;
1198            let projected = self
1199                .manifold
1200                .project_point(self.values.slice(ndarray::s![start..end]));
1201            for a in 0..self.latent_dim {
1202                self.values[start + a] = projected[a];
1203            }
1204        }
1205    }
1206
1207    /// Apply this latent block back to a `TermCollectionSpec`-style covariate
1208    /// table: returns the `(N, d)` materialized matrix that downstream basis
1209    /// evaluators (Duchon, Matérn, ...) take as their feature input.
1210    ///
1211    /// This mirrors [`crate::smooth::SpatialLogKappaCoords::apply_tospec`],
1212    /// but the carrier on the spec side is the data-row covariate block rather
1213    /// than the per-term `length_scale`. The spec-mutation is handled at the
1214    /// call site (the consuming term needs to know which columns of its
1215    /// feature view to overwrite).
1216    pub fn apply_tospec(&self) -> Array2<f64> {
1217        self.as_matrix()
1218    }
1219
1220    /// Compute `∂Φ/∂t` for a radial-kernel design Φ — the original
1221    /// Duchon/Matérn path. See [`Self::design_gradient_wrt_t_dispatch`] for
1222    /// the basis-agnostic dispatch entry point.
1223    ///
1224    /// `centers` is `(n_centers, d)`.
1225    /// Returns a `(n_obs, n_centers, d)` jet whose `(n, k, a)` entry is
1226    /// `∂Φ_{n,k} / ∂t_{n,a} = q(r_{n,k}) · (t_{n,a} − c_{k,a})`.
1227    ///
1228    /// At `r = 0` the unit vector `(t − c)/r` is undefined; the radial scalar
1229    /// path therefore asks the kernel for the finite `q` limit and surfaces
1230    /// `BasisError::DegenerateAtCollision` when that limit does not exist.
1231    pub(crate) fn design_gradient_wrt_t(
1232        &self,
1233        centers: ArrayView2<'_, f64>,
1234        radial_kind: &RadialScalarKind,
1235    ) -> Result<Array3<f64>, BasisError> {
1236        let n_obs = self.n_obs;
1237        let d = self.latent_dim;
1238        let n_centers = centers.nrows();
1239        if centers.ncols() != d {
1240            crate::bail_dim_basis!(
1241                "LatentCoordValues::design_gradient_wrt_t center dimension mismatch: centers have {} cols but latent_dim is {}",
1242                centers.ncols(),
1243                d
1244            );
1245        }
1246        let mut jet = Array3::<f64>::zeros((n_obs, n_centers, d));
1247        for n in 0..n_obs {
1248            let t_n = self.row(n);
1249            for k in 0..n_centers {
1250                let mut r2 = 0.0_f64;
1251                for a in 0..d {
1252                    let delta = t_n[a] - centers[[k, a]];
1253                    r2 += delta * delta;
1254                }
1255                let r = r2.sqrt();
1256                let (_, q, _) = radial_kind.eval_design_triplet(r)?;
1257                if q == 0.0 {
1258                    continue;
1259                }
1260                for a in 0..d {
1261                    jet[[n, k, a]] = q * (t_n[a] - centers[[k, a]]);
1262                }
1263            }
1264        }
1265        Ok(jet)
1266    }
1267
1268    /// Compute `∂Φ/∂t` for an arbitrary supported basis kind, by dispatching
1269    /// to the right closed-form chain rule.
1270    ///
1271    /// All radial-kernel bases (Duchon, Matérn) reduce to the same
1272    /// `q(r) · (t − c)` chain that `design_gradient_wrt_t` already implements.
1273    /// Non-radial bases (sphere, periodic-cyclic B-spline, tensor
1274    /// B-spline) carry their own analytic `(N, K, d)` jet — the caller
1275    /// pre-builds that jet using the matching `*_first_derivative_nd` helper
1276    /// in [`crate::basis`] and passes it in via
1277    /// [`InputLocationDerivative::Jet`].
1278    ///
1279    /// This is the single entry point the outer optimizer should call; it
1280    /// stays in lock-step with the kernel-parameter chain rule that
1281    /// `SpatialLogKappaCoords` uses (re-pointed at the first kernel argument
1282    /// rather than at kernel anisotropy).
1283    pub(crate) fn design_gradient_wrt_t_dispatch(
1284        &self,
1285        input: InputLocationDerivative<'_>,
1286    ) -> Result<Array3<f64>, BasisError> {
1287        match input {
1288            InputLocationDerivative::Radial {
1289                centers,
1290                radial_kind,
1291            } => self.design_gradient_wrt_t(centers, radial_kind),
1292            InputLocationDerivative::Jet(jet) => {
1293                if jet.shape() != [self.n_obs, jet.shape()[1], self.latent_dim] {
1294                    crate::bail_dim_basis!(
1295                        "LatentCoordValues::design_gradient_wrt_t_dispatch jet shape {:?} does not match latent shape ({}, {}, {})",
1296                        jet.shape(),
1297                        self.n_obs,
1298                        jet.shape()[1],
1299                        self.latent_dim
1300                    );
1301                }
1302                // The non-radial helpers already produce a (N, K, d) tensor
1303                // in the layout downstream contraction consumes. Return a copy
1304                // so the caller owns the data and is decoupled from the source
1305                // array's lifetime.
1306                Ok(jet.to_owned())
1307            }
1308        }
1309    }
1310}
1311
1312/// Minimum total active assignment mass a per-row atom code must retain after a
1313/// Whether the active assignment mass of a per-row code is degenerate
1314/// (non-finite), i.e. a NaN/Inf assignment.
1315///
1316/// #1074: the magic `LATENT_ACTIVE_MASS_FLOOR = 1e-6` collapse-detect-and-reseed
1317/// floor was DELETED. It masked the real defect — rows being driven dark by the
1318/// assignment optimizer — with a detect-then-reseed bandaid rather than
1319/// preventing the collapse. Only the genuine NaN/Inf guard remains. The proper
1320/// fix (prevent the active-set collapse in the assignment step) is tracked
1321/// separately.
1322///
1323/// `active_weights` is the slice of per-atom assignment weights on the row's
1324/// active support. Returns `true` only when the summed magnitude is non-finite.
1325pub fn active_mass_breached(active_weights: &[f64]) -> bool {
1326    let mut mass = 0.0_f64;
1327    for &w in active_weights {
1328        mass += w.abs();
1329    }
1330    !mass.is_finite()
1331}
1332
1333fn wrap_to_period(x: f64, period: f64) -> f64 {
1334    assert!(
1335        period.is_finite() && period > 0.0,
1336        "wrap_to_period requires a finite positive period; got {period}"
1337    );
1338    let y = x.rem_euclid(period);
1339    if y == period { 0.0 } else { y }
1340}
1341
1342/// Normalize `v[0..dim]` to a unit vector (for `LatentManifold::Sphere`
1343/// projection and retraction).
1344///
1345/// "Or axis": if the input is zero or non-finite (degenerate or numerical
1346/// mishap in caller), gracefully fall back to the canonical first axis
1347/// unit vector `[1, 0, …, 0]`. This removes a hard panic while preserving
1348/// the sphere contract that every returned point has unit Euclidean norm.
1349/// Callers (project_point / retract on Sphere) already ensure dim matches
1350/// the view length for the manifold component.
1351fn normalize_or_axis(v: ArrayView1<'_, f64>, dim: usize) -> Array1<f64> {
1352    let mut norm_sq = 0.0_f64;
1353    for a in 0..dim {
1354        norm_sq += v[a] * v[a];
1355    }
1356    const EPS: f64 = 1e-300; // protect against underflow/denorm that would give Inf
1357    if norm_sq > EPS && norm_sq.is_finite() {
1358        let inv = 1.0 / norm_sq.sqrt();
1359        let mut out = Array1::<f64>::zeros(dim);
1360        for a in 0..dim {
1361            out[a] = v[a] * inv;
1362        }
1363        out
1364    } else {
1365        // "or axis" fallback — beautiful, non-panicking resolution for
1366        // degenerate ambient vector on the sphere.
1367        let mut out = Array1::<f64>::zeros(dim);
1368        if dim > 0 {
1369            out[0] = 1.0;
1370        }
1371        out
1372    }
1373}
1374
1375fn dot_views(a: ArrayView1<'_, f64>, b: ArrayView1<'_, f64>) -> f64 {
1376    assert_eq!(a.len(), b.len());
1377    let mut acc = 0.0_f64;
1378    for i in 0..a.len() {
1379        acc += a[i] * b[i];
1380    }
1381    acc
1382}
1383
1384fn matvec(a: ArrayView2<'_, f64>, x: ArrayView1<'_, f64>) -> Array1<f64> {
1385    assert_eq!(a.ncols(), x.len());
1386    let mut out = Array1::<f64>::zeros(a.nrows());
1387    for i in 0..a.nrows() {
1388        let mut acc = 0.0_f64;
1389        for j in 0..a.ncols() {
1390            acc += a[[i, j]] * x[j];
1391        }
1392        out[i] = acc;
1393    }
1394    out
1395}
1396
1397#[inline]
1398fn symmetrize(a: &mut Array2<f64>) {
1399    // Callers in this module always pass square (d, d) matrices; delegate to
1400    // the canonical helper in `linalg::utils`.
1401    gam_linalg::matrix::symmetrize_in_place(a)
1402}
1403
1404/// Auxiliary-prior penalty contribution: returns the per-row reference
1405/// coordinates `ĥ(u_n)` shape `(n_obs, d)` and the effective strength `μ`.
1406///
1407/// `t_target` is broadcast across the inner ridge of `½ μ · ‖t − t_target‖²`,
1408/// which the call site folds into the Y-stack via a virtual-row augmentation
1409/// (`y' = [y; √μ · t_target]`, `X' = [X; √μ · I_d ⊗ row-block]`). This
1410/// keeps the inner solver Gaussian-closed-form.
1411///
1412/// For `AuxPriorFamily::Ridge` the conditional mean is the closed-form ridge
1413/// regression `(UᵀU + ε I)⁻¹ UᵀT` evaluated at each row's `u_n`. For
1414/// `Linear` the ridge is zero (which raises if `UᵀU` is singular).
1415/// Closed-form auxiliary-prior REML statistics at a fixed outer coordinate `t`.
1416pub struct AuxPriorRemlStats {
1417    pub residual_sq: f64,
1418    pub log_mu: f64,
1419    pub mu: f64,
1420    pub auto: bool,
1421    pub score: f64,
1422}
1423
1424/// Auxiliary-prior REML statistics for a fixed outer coordinate `t`, given the
1425/// precomputed `targets` (see [`aux_prior_targets`]). Returns the residual sum of
1426/// squares, the precision `mu` (the supplied `aux_strength` when `Some`, else the
1427/// closed-form REML optimum `mu = K / Σr²`), whether it was auto-selected, and
1428/// the prior score `0.5·mu·Σr² − 0.5·K·ln(mu)`. The `log_mu` coordinate has this
1429/// closed-form optimum at fixed `t` because only the normalized auxiliary prior
1430/// depends on it.
1431///
1432/// `K = n_obs · latent_dim` is the number of scalar latent coordinates the single
1433/// shared precision `mu` governs. The normalizer term `−0.5·K·ln(mu)` is the prior
1434/// log-determinant `−0.5·log det₊(mu · I_K)`, so it counts every governed
1435/// coordinate. Counting only `n_obs` undercounts a `latent_dim`-dimensional latent
1436/// by exactly `latent_dim`, which biases the REML precision toward under-shrinkage
1437/// (the per-axis ARD path emits `−0.5·n_obs·ln(α)` for each of `latent_dim` axes;
1438/// a single shared `mu` must match that sum).
1439pub fn aux_prior_reml_stats(
1440    t_mat: ArrayView2<'_, f64>,
1441    targets: ArrayView2<'_, f64>,
1442    aux_strength: Option<f64>,
1443) -> Result<AuxPriorRemlStats, String> {
1444    let n_obs = t_mat.nrows();
1445    let latent_dim = t_mat.ncols();
1446    let mut residual_sq = 0.0_f64;
1447    for n in 0..n_obs {
1448        for a in 0..latent_dim {
1449            let diff = t_mat[[n, a]] - targets[[n, a]];
1450            residual_sq += diff * diff;
1451        }
1452    }
1453    if !residual_sq.is_finite() {
1454        return Err("auxiliary prior residual norm must be finite".to_string());
1455    }
1456    let (log_mu, mu, auto) = match aux_strength {
1457        Some(mu) => {
1458            if !(mu.is_finite() && mu > 0.0) {
1459                return Err(format!(
1460                    "aux_strength must be finite and positive; got {mu}"
1461                ));
1462            }
1463            (mu.ln(), mu, false)
1464        }
1465        None => {
1466            if residual_sq <= 0.0 {
1467                return Err(
1468                    "aux_strength='auto' has no finite REML optimum when the auxiliary residual is zero"
1469                        .to_string(),
1470                );
1471            }
1472            let mu = ((n_obs * latent_dim) as f64) / residual_sq;
1473            if !(mu.is_finite() && mu > 0.0) {
1474                return Err(format!(
1475                    "auto aux_strength selected a non-finite precision: {mu}"
1476                ));
1477            }
1478            (mu.ln(), mu, true)
1479        }
1480    };
1481    let score = 0.5 * mu * residual_sq - 0.5 * ((n_obs * latent_dim) as f64) * log_mu;
1482    Ok(AuxPriorRemlStats {
1483        residual_sq,
1484        log_mu,
1485        mu,
1486        auto,
1487        score,
1488    })
1489}
1490
1491pub fn aux_prior_targets(
1492    t: ArrayView2<'_, f64>,
1493    u: ArrayView2<'_, f64>,
1494    family: AuxPriorFamily,
1495) -> Result<Array2<f64>, String> {
1496    let n_obs = t.nrows();
1497    let d = t.ncols();
1498    if u.nrows() != n_obs {
1499        return Err(format!(
1500            "aux_prior_targets: u has {} rows but t has {}",
1501            u.nrows(),
1502            n_obs
1503        ));
1504    }
1505    let p = u.ncols();
1506    if p == 0 {
1507        return Err("aux_prior_targets: auxiliary u must have at least one column".into());
1508    }
1509    // gram = UᵀU  (p × p)
1510    let mut gram = Array2::<f64>::zeros((p, p));
1511    for n in 0..n_obs {
1512        for i in 0..p {
1513            for j in 0..p {
1514                gram[[i, j]] += u[[n, i]] * u[[n, j]];
1515            }
1516        }
1517    }
1518    let ridge_eps = match family {
1519        AuxPriorFamily::Ridge => {
1520            let trace: f64 = (0..p).map(|i| gram[[i, i]]).sum();
1521            (1e-6 * trace / p as f64).max(1e-12)
1522        }
1523        AuxPriorFamily::Linear => 0.0,
1524    };
1525    for i in 0..p {
1526        gram[[i, i]] += ridge_eps;
1527    }
1528    // rhs = UᵀT  (p × d)
1529    let mut rhs = Array2::<f64>::zeros((p, d));
1530    for n in 0..n_obs {
1531        for i in 0..p {
1532            for k in 0..d {
1533                rhs[[i, k]] += u[[n, i]] * t[[n, k]];
1534            }
1535        }
1536    }
1537    let coeffs = solve_spd(gram.view(), rhs.view())?;
1538    // targets = U · coeffs  (n_obs × d)
1539    let mut targets = Array2::<f64>::zeros((n_obs, d));
1540    for n in 0..n_obs {
1541        for k in 0..d {
1542            let mut acc = 0.0_f64;
1543            for i in 0..p {
1544                acc += u[[n, i]] * coeffs[[i, k]];
1545            }
1546            targets[[n, k]] = acc;
1547        }
1548    }
1549    Ok(targets)
1550}
1551
1552/// Lightweight Cholesky-based SPD solve. Keeps this module dependency-free
1553/// from the broader faer-wrapping surface; matrices here are tiny
1554/// (`p × p` with p = aux-feature count, typically O(10)).
1555fn solve_spd(a: ArrayView2<'_, f64>, b: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
1556    let n = a.nrows();
1557    if a.ncols() != n {
1558        return Err("solve_spd: A must be square".into());
1559    }
1560    if b.nrows() != n {
1561        return Err("solve_spd: RHS row count mismatch".into());
1562    }
1563    // In-place Cholesky factorization. We pay the O(n³) copy + O(n³) factor
1564    // up front; n is tiny in the auxiliary-prior path.
1565    let mut l = Array2::<f64>::zeros((n, n));
1566    for i in 0..n {
1567        for j in 0..=i {
1568            let mut sum = a[[i, j]];
1569            for k in 0..j {
1570                sum -= l[[i, k]] * l[[j, k]];
1571            }
1572            if i == j {
1573                if sum <= 0.0 {
1574                    return Err(format!(
1575                        "solve_spd: non-positive pivot {sum} at index {i} \
1576                         (matrix is not positive definite)"
1577                    ));
1578                }
1579                l[[i, j]] = sum.sqrt();
1580            } else {
1581                l[[i, j]] = sum / l[[j, j]];
1582            }
1583        }
1584    }
1585    // Solve L y = b, then Lᵀ x = y, column by column.
1586    let d = b.ncols();
1587    let mut out = Array2::<f64>::zeros((n, d));
1588    for col in 0..d {
1589        let mut y = Array1::<f64>::zeros(n);
1590        for i in 0..n {
1591            let mut sum = b[[i, col]];
1592            for k in 0..i {
1593                sum -= l[[i, k]] * y[k];
1594            }
1595            y[i] = sum / l[[i, i]];
1596        }
1597        for i in (0..n).rev() {
1598            let mut sum = y[i];
1599            for k in (i + 1)..n {
1600                sum -= l[[k, i]] * out[[k, col]];
1601            }
1602            out[[i, col]] = sum / l[[i, i]];
1603        }
1604    }
1605    Ok(out)
1606}
1607
1608#[cfg(test)]
1609mod tests {
1610    use super::*;
1611    use ndarray::array;
1612
1613    #[test]
1614    fn from_matrix_roundtrip() {
1615        let m = array![[1.0_f64, 2.0], [3.0, 4.0], [5.0, 6.0]];
1616        let lc = LatentCoordValues::from_matrix(m.view(), LatentIdMode::None);
1617        assert_eq!(lc.n_obs(), 3);
1618        assert_eq!(lc.latent_dim(), 2);
1619        let back = lc.as_matrix();
1620        assert_eq!(back, m);
1621    }
1622
1623    #[test]
1624    fn row_access() {
1625        let m = array![[1.0_f64, 2.0], [3.0, 4.0]];
1626        let lc = LatentCoordValues::from_matrix(m.view(), LatentIdMode::None);
1627        assert_eq!(lc.row(0), &[1.0, 2.0]);
1628        assert_eq!(lc.row(1), &[3.0, 4.0]);
1629    }
1630
1631    /// `preserves_isometry_cross_block_coherence` must report exactly the
1632    /// charts whose Euclidean→Riemannian geometry transform is the identity on
1633    /// the per-row gradient / `H_tt` blocks. Keying the SAE isometry
1634    /// cross-block coupling decision on `is_euclidean()` instead of this
1635    /// predicate dropped the cross-block on the flat `Circle` chart, leaving a
1636    /// block-diagonal Hessian whose joint Newton step never reached KKT
1637    /// stationarity — the arrow-Schur proximal ridge then saturated at 1e15
1638    /// (issue #795, regression of #681). We pin the predicate AND its grounding
1639    /// invariant: on `Circle` the geometry transform really is the identity, so
1640    /// coherence is preserved; on `Sphere` / `Interval` it is not.
1641    #[test]
1642    fn isometry_cross_block_coherence_tracks_identity_geometry_transform() {
1643        assert!(LatentManifold::Euclidean.preserves_isometry_cross_block_coherence());
1644        assert!(
1645            LatentManifold::Circle {
1646                period: std::f64::consts::TAU
1647            }
1648            .preserves_isometry_cross_block_coherence()
1649        );
1650        assert!(!LatentManifold::Sphere { dim: 3 }.preserves_isometry_cross_block_coherence());
1651        assert!(
1652            !LatentManifold::Interval { lo: -1.0, hi: 1.0 }
1653                .preserves_isometry_cross_block_coherence()
1654        );
1655        // A Product is coherent iff every factor is.
1656        assert!(
1657            LatentManifold::Product(vec![
1658                LatentManifold::Euclidean,
1659                LatentManifold::Circle {
1660                    period: std::f64::consts::TAU
1661                },
1662            ])
1663            .preserves_isometry_cross_block_coherence()
1664        );
1665        assert!(
1666            !LatentManifold::Product(vec![
1667                LatentManifold::Circle {
1668                    period: std::f64::consts::TAU
1669                },
1670                LatentManifold::Sphere { dim: 3 },
1671            ])
1672            .preserves_isometry_cross_block_coherence()
1673        );
1674
1675        // Grounding invariant: on the Circle chart the geometry transform that
1676        // `apply_riemannian_latent_geometry` applies — gradient projection and
1677        // the Euclidean→Riemannian Hessian conversion — is the EXACT identity,
1678        // so the coupled `μ AᵀA` block survives intact and the cross-block must
1679        // be kept.
1680        let circle = LatentManifold::Circle {
1681            period: std::f64::consts::TAU,
1682        };
1683        let t = array![0.73_f64];
1684        let eg = array![2.4_f64];
1685        let eh = array![[1.7_f64]];
1686        let projected_g = circle.project_gradient_to_tangent(t.view(), eg.view());
1687        assert_eq!(
1688            projected_g, eg,
1689            "Circle gradient projection must be identity"
1690        );
1691        let rhess = circle.riemannian_hessian_matrix(t.view(), eg.view(), eh.view());
1692        assert_eq!(
1693            rhess, eh,
1694            "Circle Riemannian Hessian must equal the Euclidean Hessian"
1695        );
1696    }
1697
1698    /// `project_matrix_columns_to_tangent_into` (the hoisted, allocation-reuse
1699    /// projection used by the SAE arrow-Schur assembler) must match the
1700    /// per-column ground truth `project_to_tangent`, and must agree exactly
1701    /// with the allocating `project_matrix_columns_to_tangent` it backs, on a
1702    /// non-Euclidean (Sphere) manifold where the tangent projection is
1703    /// non-trivial. This pins the in-place projection introduced for the SAE
1704    /// hot-path scratch hoist.
1705    #[test]
1706    fn project_matrix_columns_to_tangent_into_matches_columnwise() {
1707        let manifold = LatentManifold::Sphere { dim: 3 };
1708        // Unit base point on S².
1709        let norm = (1.0_f64 + 4.0 + 4.0).sqrt();
1710        let t = array![1.0 / norm, 2.0 / norm, 2.0 / norm];
1711        let matrix = array![
1712            [0.3_f64, -1.1, 0.7, 2.0],
1713            [1.5, 0.2, -0.4, 0.9],
1714            [-0.6, 0.8, 1.3, -1.7],
1715        ];
1716        let mut into = Array2::<f64>::zeros(matrix.dim());
1717        manifold.project_matrix_columns_to_tangent_into(t.view(), matrix.view(), into.view_mut());
1718        let allocating = manifold.project_matrix_columns_to_tangent(t.view(), matrix.view());
1719        for col_idx in 0..matrix.ncols() {
1720            let expected = manifold.project_to_tangent(t.view(), matrix.column(col_idx));
1721            for row_idx in 0..matrix.nrows() {
1722                assert!(
1723                    (into[[row_idx, col_idx]] - expected[row_idx]).abs() < 1e-12,
1724                    "in-place projection deviates from columnwise truth at ({row_idx},{col_idx})"
1725                );
1726                assert_eq!(
1727                    into[[row_idx, col_idx]],
1728                    allocating[[row_idx, col_idx]],
1729                    "in-place and allocating projection differ at ({row_idx},{col_idx})"
1730                );
1731            }
1732        }
1733    }
1734
1735    /// Regression for issue #191 (and the K=2 periodic case of #174):
1736    /// `from_matrix_with_manifold(Circle)` must produce a value whose
1737    /// update path wraps into `[0, 2π)` even though the override
1738    /// `LatentRetractionRegistry` is left at its `all_euclidean` default.
1739    /// Before the fix, the retraction silently decayed to Euclidean and
1740    /// values drifted outside the circle on every Newton step.
1741    #[test]
1742    fn circle_manifold_update_wraps_into_canonical_interval() {
1743        let two_pi = std::f64::consts::TAU;
1744        let near_top = 6.2_f64;
1745        let m = array![[near_top]];
1746        let mut lc = LatentCoordValues::from_matrix_with_manifold(
1747            m.view(),
1748            LatentIdMode::None,
1749            LatentManifold::Circle { period: two_pi },
1750        );
1751        let delta = Array1::from(vec![1.5_f64]);
1752        lc.retract_flat_delta(delta.view());
1753        let updated = lc.row(0)[0];
1754        let expected = (near_top + 1.5).rem_euclid(two_pi);
1755        assert!(
1756            (0.0..two_pi).contains(&updated),
1757            "Circle retraction did not wrap into [0, 2π): got {updated}",
1758        );
1759        assert!(
1760            (updated - expected).abs() < 1e-12,
1761            "Circle retraction value mismatch: got {updated}, expected {expected}",
1762        );
1763
1764        let large_delta = Array1::from(vec![10.0 * two_pi + 0.25_f64]);
1765        lc.retract_flat_delta(large_delta.view());
1766        let after_big = lc.row(0)[0];
1767        assert!(
1768            (0.0..two_pi).contains(&after_big),
1769            "Circle retraction did not wrap a large delta: got {after_big}",
1770        );
1771    }
1772
1773    /// Mirror of the Circle regression for `LatentManifold::Sphere`: the
1774    /// per-row update must preserve unit norm. Before the fix the registry
1775    /// stayed Euclidean and the additive update broke the constraint.
1776    #[test]
1777    fn sphere_manifold_update_preserves_unit_norm() {
1778        let m = array![[1.0_f64, 0.0, 0.0]];
1779        let mut lc = LatentCoordValues::from_matrix_with_manifold(
1780            m.view(),
1781            LatentIdMode::None,
1782            LatentManifold::Sphere { dim: 3 },
1783        );
1784        let delta = Array1::from(vec![0.3_f64, 0.7, -0.2]);
1785        lc.retract_flat_delta(delta.view());
1786        let row = lc.row(0);
1787        let norm_sq: f64 = row.iter().map(|x| x * x).sum();
1788        assert!(
1789            (norm_sq.sqrt() - 1.0).abs() < 1e-12,
1790            "Sphere retraction did not preserve unit norm: ||t|| = {}",
1791            norm_sq.sqrt(),
1792        );
1793
1794        let big_delta = Array1::from(vec![50.0_f64, -25.0, 13.0]);
1795        lc.retract_flat_delta(big_delta.view());
1796        let row2 = lc.row(0);
1797        let norm_sq2: f64 = row2.iter().map(|x| x * x).sum();
1798        assert!(
1799            (norm_sq2.sqrt() - 1.0).abs() < 1e-12,
1800            "Sphere retraction failed to renormalize after large delta: ||t|| = {}",
1801            norm_sq2.sqrt(),
1802        );
1803    }
1804}