gam_geometry/manifold.rs
1use std::fmt;
2
3use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
4
5pub const GEOMETRY_EPS: f64 = 1.0e-12;
6
7#[derive(Debug, Clone, PartialEq)]
8pub enum GeometryError {
9 DimensionMismatch {
10 context: &'static str,
11 expected: usize,
12 got: usize,
13 },
14 InvalidPoint(&'static str),
15 Singular(&'static str),
16 /// A manifold primitive has no implementation for this manifold and must
17 /// not silently fall back to a wrong default (e.g. a curved-manifold VJP
18 /// for which no closed form is wired up yet).
19 Unsupported(&'static str),
20}
21
22impl fmt::Display for GeometryError {
23 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24 match self {
25 Self::DimensionMismatch {
26 context,
27 expected,
28 got,
29 } => write!(f, "{context} expected length {expected}, got {got}"),
30 Self::InvalidPoint(message) => write!(f, "invalid manifold point: {message}"),
31 Self::Singular(message) => write!(f, "singular geometry operation: {message}"),
32 Self::Unsupported(message) => write!(f, "unsupported geometry operation: {message}"),
33 }
34 }
35}
36
37impl std::error::Error for GeometryError {}
38
39pub type GeometryResult<T> = Result<T, GeometryError>;
40
41pub trait RiemannianManifold: Send + Sync {
42 fn dim(&self) -> usize;
43
44 fn ambient_dim(&self) -> usize {
45 self.dim()
46 }
47
48 fn tangent_basis(&self, point: ArrayView1<'_, f64>) -> GeometryResult<Array2<f64>>;
49
50 fn exp_map(
51 &self,
52 point: ArrayView1<'_, f64>,
53 tangent_vec: ArrayView1<'_, f64>,
54 ) -> GeometryResult<Array1<f64>>;
55
56 fn log_map(
57 &self,
58 p_from: ArrayView1<'_, f64>,
59 p_to: ArrayView1<'_, f64>,
60 ) -> GeometryResult<Array1<f64>>;
61
62 fn parallel_transport(
63 &self,
64 point_along: ArrayView2<'_, f64>,
65 vec: ArrayView1<'_, f64>,
66 ) -> GeometryResult<Array1<f64>>;
67
68 fn metric_tensor(&self, point: ArrayView1<'_, f64>) -> GeometryResult<Array2<f64>>;
69
70 fn christoffel_symbols(&self, point: ArrayView1<'_, f64>) -> GeometryResult<Vec<Array2<f64>>> {
71 check_len("Christoffel point", point.len(), self.ambient_dim())?;
72 Err(GeometryError::Unsupported(
73 "Christoffel symbols require a manifold-specific local chart",
74 ))
75 }
76
77 fn sectional_curvature(
78 &self,
79 point: ArrayView1<'_, f64>,
80 tangent_pair: (ArrayView1<'_, f64>, ArrayView1<'_, f64>),
81 ) -> GeometryResult<f64>;
82
83 fn project_tangent(
84 &self,
85 point: ArrayView1<'_, f64>,
86 vec: ArrayView1<'_, f64>,
87 ) -> GeometryResult<Array1<f64>> {
88 // Default projection is the identity (Euclidean-flat tangent space).
89 // Validate that BOTH the base point and the tangent vector live in the
90 // ambient space so a caller passing a wrong-length vector fails fast
91 // here rather than producing a silently mis-shaped tangent vector. The
92 // tangent of `T_pM` is represented in the same ambient coordinates as
93 // the point, so its length must equal `ambient_dim()` too.
94 let expected = self.ambient_dim();
95 if point.len() != expected {
96 return Err(GeometryError::DimensionMismatch {
97 context: "project_tangent point",
98 expected,
99 got: point.len(),
100 });
101 }
102 if vec.len() != expected {
103 return Err(GeometryError::DimensionMismatch {
104 context: "project_tangent vector",
105 expected,
106 got: vec.len(),
107 });
108 }
109 Ok(vec.to_owned())
110 }
111
112 /// Riemannian gradient of a scalar `f` raised from its **ambient Euclidean
113 /// differential** `e` — the vector `∂f/∂x` in ambient coordinates that an
114 /// objective returns from its `value_gradient`.
115 ///
116 /// The Riemannian gradient is the Riesz representative of the differential
117 /// under the manifold metric `g`: the unique tangent vector `v` satisfying
118 ///
119 /// ```text
120 /// g_x(v, ξ) = Df_x[ξ] = ⟨e, ξ⟩ for every tangent ξ.
121 /// ```
122 ///
123 /// Orthogonally projecting `e` onto the tangent space ([`project_tangent`])
124 /// produces `v` **only** for the embedded/identity metric. For a genuine
125 /// Riemannian metric (affine-invariant SPD, canonical Stiefel, …) the
126 /// differential must be *raised through the metric* — projecting alone gives
127 /// the wrong direction and the wrong slope, so any model linear term or
128 /// Armijo slope built from it is not even first-order accurate (issue #955).
129 ///
130 /// The default raises `e` in a tangent basis `B = tangent_basis(x)` against
131 /// the metric `G = metric_tensor(x)`:
132 ///
133 /// ```text
134 /// v = B (Bᵀ G B)⁻¹ Bᵀ e.
135 /// ```
136 ///
137 /// This is the Riesz representative for ANY basis `B` of `T_xM` (proof: for
138 /// `ξ = B c`, `g_x(v, ξ) = eᵀ B (Bᵀ G B)⁻¹ (Bᵀ G B) c = eᵀ B c = ⟨e, ξ⟩`),
139 /// and it collapses to the orthogonal tangent projection `B Bᵀ e` exactly
140 /// when `B` is metric-orthonormal / the metric is the embedded one. It is the
141 /// mathematically correct fallback, so a future non-identity-metric manifold
142 /// is never silently first-order wrong.
143 ///
144 /// Manifolds whose tangent projection already coincides with this (every
145 /// *embedded* manifold carrying the induced metric — Euclidean, Sphere,
146 /// Circle, Torus, Grassmann) override with the O(m) `project_tangent`;
147 /// manifolds with a slick closed form (SPD: `P·sym(E)·P`; Stiefel:
148 /// `E − Y Eᵀ Y`) override with that, avoiding the dense `m×m` metric tensor.
149 fn riemannian_gradient(
150 &self,
151 point: ArrayView1<'_, f64>,
152 euclidean_grad: ArrayView1<'_, f64>,
153 ) -> GeometryResult<Array1<f64>> {
154 let m = self.ambient_dim();
155 check_len("riemannian_gradient point", point.len(), m)?;
156 check_len(
157 "riemannian_gradient euclidean_grad",
158 euclidean_grad.len(),
159 m,
160 )?;
161 let b = self.tangent_basis(point)?; // m × d
162 let g = self.metric_tensor(point)?; // m × m
163 // Bᵀ e (length d) and the Gram matrix Bᵀ G B (d × d).
164 let bt = b.t();
165 let bte = bt.dot(&euclidean_grad.to_owned());
166 let gb = g.dot(&b);
167 let btgb = bt.dot(&gb);
168 if btgb.nrows() == 0 {
169 // A zero-dimensional tangent space (no degrees of freedom): the only
170 // tangent vector is 0.
171 return Ok(Array1::<f64>::zeros(m));
172 }
173 // Solve (BᵀGB) c = Bᵀ e for the basis coordinates of v, then v = B c.
174 let c = inverse(&btgb)?.dot(&bte);
175 Ok(b.dot(&c))
176 }
177
178 fn retract(
179 &self,
180 point: ArrayView1<'_, f64>,
181 tangent_vec: ArrayView1<'_, f64>,
182 ) -> GeometryResult<Array1<f64>> {
183 self.exp_map(point, tangent_vec)
184 }
185
186 /// Whether [`retract`](Self::retract) is at least a SECOND-ORDER retraction,
187 /// i.e. `D²(f∘R_x)(0) = Hess f(x)` for all `f`, so the trust-region quadratic
188 /// model built from the Riemannian Hessian is a valid second-order model of
189 /// `f` along the retraction (issue #956).
190 ///
191 /// Manifolds whose `retract` is the exponential map or another second-order
192 /// retraction return `true` (the default — the default `retract` *is*
193 /// `exp_map`, which is second-order). A manifold exposing only a FIRST-ORDER
194 /// retraction (e.g. the Stiefel/Grassmann QR retraction `qf(Y + Δ)`, whose
195 /// acceleration at `0` is not normal to the manifold) must override this to
196 /// `false`: the linear model term `Df_x[η]` is retraction-independent and
197 /// stays correct, but the Riemannian-Hessian quadratic term is *not* the
198 /// second derivative of `f∘R_x` and would corrupt the predicted-vs-actual
199 /// reduction ratio `ρ` and hence the trust-region radius control. The trust
200 /// region falls back to the first-order-correct Cauchy model in that case.
201 fn retraction_is_second_order(&self) -> bool {
202 true
203 }
204
205 /// Vector–Jacobian product of the ambient map `exp_p(v)`.
206 ///
207 /// Given a cotangent `grad_output` w.r.t. the ambient output of
208 /// [`exp_map`](Self::exp_map), return `(grad_point, grad_tangent)`, the
209 /// pullbacks w.r.t. the base point `p` and the (raw, unprojected) tangent
210 /// input `v`. This is the analytic backward used by reverse-mode autodiff
211 /// wrappers (e.g. the Python `torch.autograd.Function` around
212 /// `manifold_exp_map`); it must never be the silent straight-through
213 /// identity for a curved manifold.
214 ///
215 /// The default is the exact VJP for *flat* manifolds, where
216 /// `exp_p(v) = p + v` in ambient coordinates and so both Jacobians are the
217 /// identity (Euclidean, Circle, Torus, and products thereof). Curved
218 /// manifolds **must** override this with their analytic Jacobi-field VJP;
219 /// a manifold without a closed form must override it to return an error
220 /// rather than inherit the wrong identity default.
221 fn exp_map_vjp(
222 &self,
223 point: ArrayView1<'_, f64>,
224 tangent_vec: ArrayView1<'_, f64>,
225 grad_output: ArrayView1<'_, f64>,
226 ) -> GeometryResult<(Array1<f64>, Array1<f64>)> {
227 let m = self.ambient_dim();
228 check_len("exp_map_vjp point", point.len(), m)?;
229 check_len("exp_map_vjp tangent", tangent_vec.len(), m)?;
230 check_len("exp_map_vjp grad_output", grad_output.len(), m)?;
231 Ok((grad_output.to_owned(), grad_output.to_owned()))
232 }
233}
234
235#[derive(Debug, Clone, PartialEq)]
236pub enum ManifoldSpec {
237 Euclidean(usize),
238 Circle,
239 Sphere { intrinsic_dim: usize },
240 Torus { dim: usize },
241 Grassmann { k: usize, n: usize },
242 Stiefel { k: usize, n: usize },
243 Spd { n: usize },
244 Product(Vec<ManifoldSpec>),
245}
246
247impl ManifoldSpec {
248 /// Instantiate the concrete [`RiemannianManifold`] for this descriptor.
249 ///
250 /// Fallible because the constrained-frame families have nonempty domains:
251 /// `Gr(k, n)` and `St(n, k)` exist only for `1 ≤ k ≤ n`. An out-of-domain
252 /// descriptor is rejected here (and recursively for [`Product`] parts)
253 /// before any dimension, projection, exponential, or curvature computation
254 /// can run on a nonexistent manifold.
255 ///
256 /// [`Product`]: Self::Product
257 pub fn build(&self) -> GeometryResult<Box<dyn RiemannianManifold>> {
258 match self {
259 Self::Euclidean(dim) => Ok(Box::new(crate::EuclideanManifold::new(*dim))),
260 Self::Circle => Ok(Box::new(crate::CircleManifold::new())),
261 Self::Sphere { intrinsic_dim } => Ok(Box::new(crate::SphereManifold::new(
262 *intrinsic_dim,
263 ))),
264 Self::Torus { dim } => Ok(Box::new(crate::TorusManifold::new(*dim))),
265 Self::Grassmann { k, n } => {
266 Ok(Box::new(crate::GrassmannManifold::new(*k, *n)?))
267 }
268 Self::Stiefel { k, n } => Ok(Box::new(crate::StiefelManifold::new(*k, *n)?)),
269 Self::Spd { n } => Ok(Box::new(crate::SpdManifold::new(*n))),
270 Self::Product(parts) => {
271 let mut built = Vec::with_capacity(parts.len());
272 for part in parts {
273 built.push(part.build()?);
274 }
275 Ok(Box::new(crate::ProductManifold::new(built)))
276 }
277 }
278 }
279}
280
281pub(crate) const fn check_len(
282 context: &'static str,
283 got: usize,
284 expected: usize,
285) -> GeometryResult<()> {
286 if got == expected {
287 Ok(())
288 } else {
289 Err(GeometryError::DimensionMismatch {
290 context,
291 expected,
292 got,
293 })
294 }
295}
296
297pub(crate) fn dot(a: ArrayView1<'_, f64>, b: ArrayView1<'_, f64>) -> f64 {
298 assert_eq!(a.len(), b.len());
299 let mut out = 0.0;
300 for i in 0..a.len() {
301 out += a[i] * b[i];
302 }
303 out
304}
305
306/// Multi-GPU row-tiled matrix product `A·B`, fanned across **all** usable
307/// devices.
308///
309/// `A` is `m×k` and `B` is `k×n`; the result is `m×n`. The single-device
310/// `fast_ab` shim already offloads this GEMM, but it pins the launch to the
311/// primary device. For a tall `A` (many independent output rows — the common
312/// case when a manifold operation is applied to a large batch of points/atoms),
313/// the rows split cleanly across the pool: we reshape `A` into a
314/// `tiles × rows_per_tile × k` batch and call the broadcast-`B` strided-batched
315/// GEMM, which [`crate::gpu::pool::scatter_batched`]es one cuBLAS call per device
316/// on its own bound context (`b` is shared across every tile). The output tiles
317/// are stitched back into the `m×n` result. Any leftover rows that don't fill a
318/// whole tile, and the entire batch when the pool has one device / the workload
319/// is below the multi-GPU floor / the runtime is unavailable, fall through to the
320/// auto-dispatch `fast_ab` (single-device GPU or faer). f64 throughout, so the
321/// result is identical regardless of which path produced it.
322///
323/// Choosing the tiling: we target as many equal tiles as there are output rows
324/// can support while keeping each tile a non-trivial GEMM, so the batch axis is
325/// long enough to cross `crate::gpu::linalg_dispatch`'s multi-GPU batch floor and spread
326/// across every device.
327pub(crate) fn fast_ab_rows_multi_gpu(
328 a: ArrayView2<'_, f64>,
329 b: ArrayView2<'_, f64>,
330) -> Array2<f64> {
331 use gam_linalg::faer_ndarray::fast_ab;
332 let (m, k) = a.dim();
333 let (kb, n) = b.dim();
334 assert_eq!(k, kb, "fast_ab_rows_multi_gpu inner dimension mismatch");
335
336 // Only worth the reshape/stitch overhead when the pool actually has more than
337 // one device and there are enough rows to tile across it; otherwise the plain
338 // single-device shim is strictly better.
339 let multi_gpu = gam_linalg::gpu_hook::gpu_dispatch().is_some_and(|d| d.device_count() > 1);
340 // The batch axis must clear the multi-GPU floor used inside the dispatch
341 // layer (64) for the split to engage, so we need at least that many tiles.
342 const MIN_TILES: usize = 64;
343 const MIN_TILE_ROWS: usize = 4;
344 if multi_gpu && m >= MIN_TILES * MIN_TILE_ROWS && n > 0 {
345 let rows_per_tile = (m / MIN_TILES).max(MIN_TILE_ROWS);
346 let tiles = m / rows_per_tile;
347 let covered = tiles * rows_per_tile;
348 // Reshape the first `covered` rows into a tiles×rows_per_tile×k batch
349 // (row-major reshape is exactly the row-block tiling we want).
350 let a3 = a
351 .slice(ndarray::s![0..covered, ..])
352 .to_owned()
353 .into_shape_with_order((tiles, rows_per_tile, k));
354 if let Ok(a3) = a3 {
355 if let Some(result3) = gam_linalg::gpu_hook::gpu_dispatch()
356 .and_then(|d| d.try_fast_ab_broadcast_b_batched(a3.view(), b.view()))
357 {
358 let mut out = Array2::<f64>::zeros((m, n));
359 for t in 0..tiles {
360 let block = result3.index_axis(ndarray::Axis(0), t);
361 out.slice_mut(ndarray::s![t * rows_per_tile..(t + 1) * rows_per_tile, ..])
362 .assign(&block);
363 }
364 // Tail rows that didn't fill a whole tile finish on the
365 // single-device shim; the result is bit-identical f64.
366 if covered < m {
367 let tail = fast_ab(&a.slice(ndarray::s![covered..m, ..]), &b);
368 out.slice_mut(ndarray::s![covered..m, ..]).assign(&tail);
369 }
370 return out;
371 }
372 }
373 }
374 // Single device / small batch / no runtime: plain auto-dispatch GEMM.
375 fast_ab(&a, &b)
376}
377
378pub(crate) fn norm(a: ArrayView1<'_, f64>) -> f64 {
379 dot(a, a).sqrt()
380}
381
382/// Metric inner product `aᵀ G b` for a (symmetric) metric tensor `G`.
383///
384/// For a manifold whose `metric_tensor` is the ambient identity this reduces
385/// to the Euclidean `dot`; for one with a genuine Riemannian metric (e.g. the
386/// affine-invariant SPD metric) it evaluates the correct geometric inner
387/// product on the tangent space.
388pub(crate) fn quad_form(
389 g: ArrayView2<'_, f64>,
390 a: ArrayView1<'_, f64>,
391 b: ArrayView1<'_, f64>,
392) -> f64 {
393 let n = a.len();
394 assert_eq!(g.nrows(), n);
395 assert_eq!(g.ncols(), b.len());
396 // aᵀ G b: the inner matrix–vector product G·b is the O(n²) cost and is the
397 // hot kernel of every metric inner product (g_inner / g_norm) and of the
398 // metric Gram–Schmidt tangent basis. Route it through the GPU-dispatched
399 // fast_av shim so large-ambient metrics (SPD/Stiefel/Grassmann n²×n²) offload
400 // to the GPU; the trailing a·(Gb) is an O(n) dot.
401 let gb = gam_linalg::faer_ndarray::fast_av(&g, &b);
402 dot(a, gb.view())
403}
404
405pub(crate) fn identity(n: usize) -> Array2<f64> {
406 let mut out = Array2::<f64>::zeros((n, n));
407 for i in 0..n {
408 out[[i, i]] = 1.0;
409 }
410 out
411}
412
413pub(crate) fn zero_christoffel(dim: usize) -> Vec<Array2<f64>> {
414 (0..dim).map(|_| Array2::<f64>::zeros((dim, dim))).collect()
415}
416
417pub(crate) fn wrap_angle(theta: f64) -> f64 {
418 let two_pi = std::f64::consts::PI * 2.0;
419 (theta + std::f64::consts::PI).rem_euclid(two_pi) - std::f64::consts::PI
420}
421
422pub(crate) fn sym(a: &Array2<f64>) -> Array2<f64> {
423 let mut out = a.clone();
424 for i in 0..a.nrows() {
425 for j in 0..a.ncols() {
426 out[[i, j]] = 0.5 * (a[[i, j]] + a[[j, i]]);
427 }
428 }
429 out
430}
431
432pub(crate) fn from_flat(
433 v: ArrayView1<'_, f64>,
434 rows: usize,
435 cols: usize,
436) -> GeometryResult<Array2<f64>> {
437 check_len("flat matrix", v.len(), rows * cols)?;
438 let mut out = Array2::<f64>::zeros((rows, cols));
439 for i in 0..rows {
440 for j in 0..cols {
441 out[[i, j]] = v[i * cols + j];
442 }
443 }
444 Ok(out)
445}
446
447pub(crate) fn flatten(a: &Array2<f64>) -> Array1<f64> {
448 let mut out = Array1::<f64>::zeros(a.nrows() * a.ncols());
449 for i in 0..a.nrows() {
450 for j in 0..a.ncols() {
451 out[i * a.ncols() + j] = a[[i, j]];
452 }
453 }
454 out
455}
456
457/// Build a **Euclidean-orthonormal** basis of the tangent space at `point` by
458/// modified Gram–Schmidt over the projected ambient standard basis.
459///
460/// The returned columns satisfy `Qᵀ Q = I` under the *ambient Euclidean* inner
461/// product (the plain `dot`). This is the correct, intended basis for a
462/// manifold whose Riemannian metric *is* the embedded Euclidean metric on its
463/// horizontal tangent space — notably the **Grassmann** manifold, where the
464/// tangent inner product is `tr(Δ₁ᵀΔ₂)`.
465///
466/// It is **not** metric-orthonormal for a manifold with a non-Euclidean metric
467/// (Stiefel's canonical metric `⟨Δ₁,Δ₂⟩ = tr(Δ₁ᵀ(I−½YYᵀ)Δ₂)`, or SPD's
468/// affine-invariant metric): for those, use
469/// [`tangent_basis_metric_orthonormal`], which Gram–Schmidts under the
470/// manifold's own `metric_tensor`.
471///
472/// This is the shared engine behind [`tangent_basis`](RiemannianManifold::tangent_basis)
473/// for the matrix manifolds whose tangent space has no closed-form basis. It
474/// walks the `n × k` standard basis in column-major order (outer `col`, inner
475/// `row`), projects each `e_{row,col}` onto the tangent space via
476/// `m.project_tangent`, re-orthogonalizes against the columns accepted so far,
477/// and keeps it iff its residual norm exceeds the `1e-10` drop tolerance,
478/// stopping the moment `m.dim()` independent directions have been collected.
479/// Each caller keeps its own input validation and then delegates here, so the
480/// numerically delicate orthogonalization order, drop tolerance, and early-exit
481/// logic live in exactly one place.
482pub(crate) fn projected_standard_basis_tangent<M: RiemannianManifold + ?Sized>(
483 m: &M,
484 point: ArrayView1<'_, f64>,
485 n: usize,
486 k: usize,
487) -> GeometryResult<Array2<f64>> {
488 let mut columns: Vec<Array1<f64>> = Vec::with_capacity(m.dim());
489 for col in 0..k {
490 for row in 0..n {
491 let mut e = Array2::<f64>::zeros((n, k));
492 e[[row, col]] = 1.0;
493 let mut v = m.project_tangent(point, flatten(&e).view())?;
494 for q in &columns {
495 let proj = dot(q.view(), v.view());
496 v -= &(q * proj);
497 }
498 let nrm = dot(v.view(), v.view()).sqrt();
499 if nrm > 1.0e-10 {
500 columns.push(v / nrm);
501 }
502 if columns.len() == m.dim() {
503 let mut out = Array2::<f64>::zeros((m.ambient_dim(), m.dim()));
504 for j in 0..columns.len() {
505 for i in 0..m.ambient_dim() {
506 out[[i, j]] = columns[j][i];
507 }
508 }
509 return Ok(out);
510 }
511 }
512 }
513 Ok(Array2::<f64>::zeros((m.ambient_dim(), columns.len())))
514}
515
516/// Build a **metric-orthonormal** basis of the tangent space at `point`, i.e. a
517/// set of columns `Q` satisfying `Qᵀ W Q = I` where `W = m.metric_tensor(point)`
518/// is the manifold's Riemannian metric in flattened ambient coordinates.
519///
520/// This is the correct tangent basis for a manifold whose metric is **not** the
521/// embedded Euclidean inner product — Stiefel's canonical metric
522/// `⟨Δ₁,Δ₂⟩ = tr(Δ₁ᵀ(I−½YYᵀ)Δ₂)` and SPD's affine-invariant metric. (For a
523/// Euclidean-metric manifold like Grassmann, `W = I` and this coincides with
524/// [`projected_standard_basis_tangent`].)
525///
526/// Same projected-standard-basis walk as the Euclidean routine, but every inner
527/// product is the metric inner product `⟨u,v⟩_W = uᵀ W v` (via
528/// [`quad_form`]): Gram–Schmidt projections subtract `⟨q,v⟩_W · q` and the
529/// retained columns are normalized by `‖v‖_W = sqrt(⟨v,v⟩_W)`, so the resulting
530/// `Q` is orthonormal *in the manifold's metric*.
531///
532/// Concretely on `St(3, 2)` at `Y = [e₁, e₂]`, the vertical tangent
533/// `Δ = Y·[[0,−1],[1,0]]` has Euclidean norm² 2 but canonical-metric norm² 1, so
534/// a metric-orthonormal basis must reflect that — the Euclidean routine would
535/// mis-scale it.
536pub(crate) fn tangent_basis_metric_orthonormal<M: RiemannianManifold + ?Sized>(
537 m: &M,
538 point: ArrayView1<'_, f64>,
539 n: usize,
540 k: usize,
541) -> GeometryResult<Array2<f64>> {
542 let w = m.metric_tensor(point)?;
543 let mut columns: Vec<Array1<f64>> = Vec::with_capacity(m.dim());
544 for col in 0..k {
545 for row in 0..n {
546 let mut e = Array2::<f64>::zeros((n, k));
547 e[[row, col]] = 1.0;
548 let mut v = m.project_tangent(point, flatten(&e).view())?;
549 for q in &columns {
550 let proj = quad_form(w.view(), q.view(), v.view());
551 v -= &(q * proj);
552 }
553 let nrm = quad_form(w.view(), v.view(), v.view()).max(0.0).sqrt();
554 if nrm > 1.0e-10 {
555 columns.push(v / nrm);
556 }
557 if columns.len() == m.dim() {
558 let mut out = Array2::<f64>::zeros((m.ambient_dim(), m.dim()));
559 for j in 0..columns.len() {
560 for i in 0..m.ambient_dim() {
561 out[[i, j]] = columns[j][i];
562 }
563 }
564 return Ok(out);
565 }
566 }
567 }
568 Ok(Array2::<f64>::zeros((m.ambient_dim(), columns.len())))
569}
570
571/// Thin/compact Gram–Schmidt QR factorization `A = Q·R` for an `n×k` input
572/// (`n ≥ k`). The returned `Q` is `n×k` with **orthonormal columns**
573/// (`QᵀQ = I`) and `R` is `k×k` upper-triangular.
574///
575/// On a rank-deficient column (residual ≈ 0 after orthogonalizing against the
576/// previously accepted columns) the diagonal `R[j, j]` is set to 0 and a
577/// *fallback* unit column is synthesized so the column count stays `k` and `Q`
578/// remains a valid orthonormal frame. The fallback is a standard axis `e_a`
579/// Gram–Schmidted against ALL previously accepted columns and renormalized; if
580/// that residual also vanishes (the axis lies in the accepted span) the next
581/// axis is tried, until an axis with a nonzero orthogonal residual is found.
582/// Simply planting `e_j` (the old behavior) breaks orthonormality — e.g. two
583/// identical columns `(1,1)/√2` would yield a fallback `e₂` with
584/// `q₁·q₂ = 1/√2 ≠ 0`.
585pub(crate) fn qr_thin(a: &Array2<f64>) -> (Array2<f64>, Array2<f64>) {
586 let n = a.nrows();
587 let k = a.ncols();
588 let mut q = Array2::<f64>::zeros((n, k));
589 let mut r = Array2::<f64>::zeros((k, k));
590 for j in 0..k {
591 let mut v = a.column(j).to_owned();
592 for i in 0..j {
593 let qi = q.column(i);
594 let rij = dot(qi, v.view());
595 r[[i, j]] = rij;
596 for row in 0..n {
597 v[row] -= rij * q[[row, i]];
598 }
599 }
600 let nrm = norm(v.view());
601 if nrm > GEOMETRY_EPS {
602 r[[j, j]] = nrm;
603 for row in 0..n {
604 q[[row, j]] = v[row] / nrm;
605 }
606 } else {
607 // Rank-deficient column: `R[j, j] = 0`. Synthesize a fallback unit
608 // column orthogonal to ALL accepted columns 0..j by Gram–Schmidting
609 // a standard axis against them; try successive axes until one has a
610 // nonzero orthogonal residual (always succeeds for j < n since the
611 // accepted columns span a j-dimensional subspace of ℝⁿ, leaving an
612 // (n−j)-dimensional orthogonal complement that at least one axis
613 // touches).
614 r[[j, j]] = 0.0;
615 for axis in 0..n {
616 let mut f = Array1::<f64>::zeros(n);
617 f[axis] = 1.0;
618 for i in 0..j {
619 let qi = q.column(i);
620 let proj = dot(qi, f.view());
621 for row in 0..n {
622 f[row] -= proj * q[[row, i]];
623 }
624 }
625 let fnrm = norm(f.view());
626 if fnrm > GEOMETRY_EPS {
627 for row in 0..n {
628 q[[row, j]] = f[row] / fnrm;
629 }
630 break;
631 }
632 }
633 }
634 }
635 (q, r)
636}
637
638pub(crate) fn inverse(a: &Array2<f64>) -> GeometryResult<Array2<f64>> {
639 let n = a.nrows();
640 if n != a.ncols() {
641 return Err(GeometryError::Singular("inverse requires a square matrix"));
642 }
643 let mut aug = Array2::<f64>::zeros((n, 2 * n));
644 for i in 0..n {
645 for j in 0..n {
646 aug[[i, j]] = a[[i, j]];
647 }
648 aug[[i, n + i]] = 1.0;
649 }
650 for col in 0..n {
651 let mut pivot = col;
652 let mut best = aug[[col, col]].abs();
653 for row in col + 1..n {
654 let val = aug[[row, col]].abs();
655 if val > best {
656 best = val;
657 pivot = row;
658 }
659 }
660 if best < GEOMETRY_EPS {
661 return Err(GeometryError::Singular("matrix inverse pivot underflow"));
662 }
663 if pivot != col {
664 for j in 0..2 * n {
665 let tmp = aug[[col, j]];
666 aug[[col, j]] = aug[[pivot, j]];
667 aug[[pivot, j]] = tmp;
668 }
669 }
670 let scale = aug[[col, col]];
671 for j in 0..2 * n {
672 aug[[col, j]] /= scale;
673 }
674 for row in 0..n {
675 if row == col {
676 continue;
677 }
678 let factor = aug[[row, col]];
679 for j in 0..2 * n {
680 aug[[row, j]] -= factor * aug[[col, j]];
681 }
682 }
683 }
684 let mut out = Array2::<f64>::zeros((n, n));
685 for i in 0..n {
686 for j in 0..n {
687 out[[i, j]] = aug[[i, n + j]];
688 }
689 }
690 Ok(out)
691}
692
693/// Sweep budget multiplier for the classical Jacobi eigensolver: the iteration
694/// cap is `JACOBI_SWEEP_BUDGET · n²`. Classical (largest-off-diagonal) Jacobi
695/// converges quadratically once the off-diagonals are small, needing only a
696/// handful of full `O(n²)` sweeps; this generous multiple lets even clustered
697/// spectra finish while still failing loudly on a genuinely stalled matrix.
698const JACOBI_SWEEP_BUDGET: usize = 64;
699
700/// Relative off-diagonal convergence threshold for [`jacobi_symmetric`]: the
701/// largest off-diagonal magnitude must fall below `JACOBI_REL_TOL · ‖A‖_F`. Near
702/// `f64` precision so the diagonalization is accurate to working precision.
703const JACOBI_REL_TOL: f64 = 1.0e-13;
704
705pub(crate) fn jacobi_symmetric(a: &Array2<f64>) -> GeometryResult<(Array1<f64>, Array2<f64>)> {
706 let n = a.nrows();
707 if n != a.ncols() {
708 return Err(GeometryError::InvalidPoint(
709 "Jacobi eigensolver requires square input",
710 ));
711 }
712 let mut d = sym(a);
713 let mut v = identity(n);
714 let max_iter = JACOBI_SWEEP_BUDGET * n.max(1) * n.max(1);
715 // Relative convergence threshold: the largest off-diagonal magnitude must
716 // fall to `1e-13 * ||A||_F`. A fixed absolute `1e-13` is meaningless for
717 // matrices whose scale is far from unity (a well-scaled large-norm matrix
718 // could never reach it; a tiny-norm matrix would "converge" trivially),
719 // and silently returning the partially-diagonalized state after exhausting
720 // `max_iter` hides genuine non-convergence (e.g. clustered/degenerate
721 // spectra that stall the classical sweep). The Frobenius norm is invariant
722 // under the orthogonal Jacobi rotations, so it is computed once from the
723 // symmetrized input.
724 let frob_norm = {
725 let mut acc = 0.0;
726 for i in 0..n {
727 for j in 0..n {
728 acc += d[[i, j]] * d[[i, j]];
729 }
730 }
731 acc.sqrt()
732 };
733 let threshold = JACOBI_REL_TOL * frob_norm;
734 let mut converged = false;
735 for _ in 0..max_iter {
736 let mut p = 0usize;
737 let mut q = 0usize;
738 let mut best = 0.0;
739 for i in 0..n {
740 for j in i + 1..n {
741 let val = d[[i, j]].abs();
742 if val > best {
743 best = val;
744 p = i;
745 q = j;
746 }
747 }
748 }
749 // `best <= threshold` (rather than `<`) makes the exactly-diagonal and
750 // zero-norm cases (`best == threshold == 0`) converge immediately.
751 if best <= threshold {
752 converged = true;
753 break;
754 }
755 let tau = (d[[q, q]] - d[[p, p]]) / (2.0 * d[[p, q]]);
756 let t = tau.signum() / (tau.abs() + (1.0 + tau * tau).sqrt());
757 let c = 1.0 / (1.0 + t * t).sqrt();
758 let s = t * c;
759 for k in 0..n {
760 let dpk = d[[p, k]];
761 let dqk = d[[q, k]];
762 d[[p, k]] = c * dpk - s * dqk;
763 d[[q, k]] = s * dpk + c * dqk;
764 }
765 for k in 0..n {
766 let dkp = d[[k, p]];
767 let dkq = d[[k, q]];
768 d[[k, p]] = c * dkp - s * dkq;
769 d[[k, q]] = s * dkp + c * dkq;
770 }
771 for k in 0..n {
772 let vkp = v[[k, p]];
773 let vkq = v[[k, q]];
774 v[[k, p]] = c * vkp - s * vkq;
775 v[[k, q]] = s * vkp + c * vkq;
776 }
777 }
778 if !converged {
779 return Err(GeometryError::Singular(
780 "Jacobi eigensolver did not converge within max_iter (off-diagonal mass above 1e-13 * Frobenius norm)",
781 ));
782 }
783 let mut evals = Array1::<f64>::zeros(n);
784 for i in 0..n {
785 evals[i] = d[[i, i]];
786 }
787 Ok((evals, v))
788}
789
790pub(crate) fn spectral_map_spd(
791 a: &Array2<f64>,
792 f: impl Fn(f64) -> GeometryResult<f64>,
793) -> GeometryResult<Array2<f64>> {
794 let (evals, evecs) = jacobi_symmetric(a)?;
795 let n = a.nrows();
796 let mut diag = Array2::<f64>::zeros((n, n));
797 for i in 0..n {
798 if evals[i] <= 0.0 || !evals[i].is_finite() {
799 return Err(GeometryError::InvalidPoint(
800 "SPD eigenvalue is not positive",
801 ));
802 }
803 diag[[i, i]] = f(evals[i])?;
804 }
805 // Reconstruction V·f(Λ)·Vᵀ: two dense n×n products GPU-dispatched via
806 // fast_ab/fast_abt for large ambient dimension.
807 use gam_linalg::faer_ndarray::{fast_ab, fast_abt};
808 Ok(fast_abt(&fast_ab(&evecs, &diag), &evecs))
809}
810
811pub(crate) fn spectral_map_symmetric(
812 a: &Array2<f64>,
813 f: impl Fn(f64) -> GeometryResult<f64>,
814) -> GeometryResult<Array2<f64>> {
815 let (evals, evecs) = jacobi_symmetric(a)?;
816 let n = a.nrows();
817 let mut diag = Array2::<f64>::zeros((n, n));
818 for i in 0..n {
819 diag[[i, i]] = f(evals[i])?;
820 }
821 // Reconstruction V·f(Λ)·Vᵀ, GPU-dispatched via fast_ab/fast_abt.
822 use gam_linalg::faer_ndarray::{fast_ab, fast_abt};
823 Ok(fast_abt(&fast_ab(&evecs, &diag), &evecs))
824}
825
826/// Thin singular value decomposition of a tall matrix `Y` (`n × k`, `n ≥ k`)
827/// via the symmetric eigendecomposition of the small `k × k` Gram matrix
828/// `YᵀY = V Σ² Vᵀ`: returns `(U, σ, V)` with `Y = U diag(σ) Vᵀ`, where `U` is
829/// `n × k` with orthonormal columns spanning `range(Y)`, `σ` holds the singular
830/// values, and `V` is `k × k` orthogonal. Forming the Gram keeps the
831/// eigenproblem at the small dimension `k`; the two products that carry the
832/// large ambient dimension `n` (`YᵀY` and `U = Y V Σ⁻¹`) are GPU-dispatched.
833///
834/// A numerically-zero singular value (`σ ≤ GEOMETRY_EPS`) leaves the
835/// corresponding `U` column zero rather than dividing through, which is what the
836/// Grassmann/Stiefel geodesic needs (a zero singular value is a vanishing
837/// principal angle); a caller requiring full rank inspects `σ` itself.
838pub(crate) fn thin_svd_gram(
839 y: &Array2<f64>,
840) -> GeometryResult<(Array2<f64>, Array1<f64>, Array2<f64>)> {
841 use gam_linalg::faer_ndarray::{fast_ab, fast_atb};
842 let (n, k) = y.dim();
843 let gram = fast_atb(y, y);
844 let (evals, v) = jacobi_symmetric(&gram)?;
845 let yv = fast_ab(y, &v);
846 let mut sigma = Array1::<f64>::zeros(k);
847 let mut u = Array2::<f64>::zeros((n, k));
848 for j in 0..k {
849 sigma[j] = evals[j].max(0.0).sqrt();
850 if sigma[j] > GEOMETRY_EPS {
851 let inv_sigma = 1.0 / sigma[j];
852 for i in 0..n {
853 u[[i, j]] = yv[[i, j]] * inv_sigma;
854 }
855 }
856 }
857 Ok((u, sigma, v))
858}
859
860/// Dense matrix exponential `exp(A)` via scaling-and-squaring with a truncated
861/// Taylor series. The Frobenius norm of `A` is driven below 1/4 by repeated
862/// halving (`A → A / 2^s`), where Taylor converges rapidly and stably; the
863/// result is then squared `s` times. With the scaled norm `θ < 1/4`, the
864/// degree-12 Taylor tail is bounded by `θ^{13} / 13! · 1/(1 - θ)`; since `13! ≈
865/// 6.23e9`, this is below `4·0.25^{13}/6.23e9 ≈ 3.8e-18`, i.e. under one f64 ulp,
866/// so the fixed degree truly reaches full f64 precision (the `< 1/2` threshold
867/// previously used left a ~2e-14 tail, two orders above an ulp). This is the
868/// standard, exact algorithm; no eigendecomposition is assumed (the inputs here
869/// are the non-normal canonical-metric block matrices on Stiefel, which are
870/// skew-like but not symmetric, so `spectral_map_*` does not apply).
871pub(crate) fn matrix_exp(a: &Array2<f64>) -> GeometryResult<Array2<f64>> {
872 let n = a.nrows();
873 if n != a.ncols() {
874 return Err(GeometryError::InvalidPoint(
875 "matrix exponential requires square input",
876 ));
877 }
878 if !a.iter().all(|v| v.is_finite()) {
879 return Err(GeometryError::InvalidPoint(
880 "matrix exponential requires finite entries",
881 ));
882 }
883 // Frobenius norm; choose the squaring count so the scaled matrix has norm
884 // below 1/4, which keeps the degree-12 Taylor truncation under one f64 ulp.
885 let mut frob = 0.0;
886 for v in a.iter() {
887 frob += v * v;
888 }
889 let frob = frob.sqrt();
890 let squarings = if frob > 0.25 {
891 (frob / 0.25).log2().ceil() as i32
892 } else {
893 0
894 };
895 let scale = 2.0_f64.powi(squarings);
896 let a_scaled = a / scale;
897
898 // exp(A_scaled) = sum_{k>=0} A_scaled^k / k! by term recurrence:
899 // term_k = term_{k-1} · A_scaled / k.
900 // Both the Taylor term recurrence and the scaling-and-squaring use dense
901 // n×n products; GPU-dispatch them via fast_ab for large blocks.
902 use gam_linalg::faer_ndarray::fast_ab;
903 let mut result = identity(n);
904 let mut term = identity(n);
905 for k in 1..=12 {
906 term = fast_ab(&term, &a_scaled) / (k as f64);
907 result = result + &term;
908 }
909 // exp(A) = exp(A_scaled)^{2^squarings}.
910 for _ in 0..squarings {
911 result = fast_ab(&result, &result);
912 }
913 Ok(result)
914}
915
916/// Principal real logarithm of a real **orthogonal** matrix `V`, returned as
917/// the skew-symmetric `S` with `exp(S) = V`.
918///
919/// Rather than reach for a general (Schur-based) matrix logarithm — which the
920/// linear-algebra backend does not expose — this exploits the structure of an
921/// orthogonal matrix. Split `V = M + K` into its symmetric and skew parts
922///
923/// ```text
924/// M = ½(V + Vᵀ) (symmetric, eigenvalues cos θⱼ ∈ [−1, 1])
925/// K = ½(V − Vᵀ) (skew)
926/// ```
927///
928/// For an orthogonal (hence normal) `V`, `M` and `K` are both polynomials in
929/// `V`, so they **commute** and are simultaneously block-diagonalizable. In an
930/// eigenbasis `Q` of the symmetric `M` (which the self-adjoint eigensolver
931/// returns), `K̃ = QᵀKQ` is block-diagonal across distinct eigenvalues of `M`.
932/// On each 2-D rotation plane `M` has the degenerate eigenvalue `cos θ` and `K`
933/// acts as a skew `[[0,−sin θ],[sin θ,0]]`, whose principal logarithm is the
934/// same skew matrix scaled by `θ / sin θ`. Because `cos θ ↦ θ = arccos(cos θ)`
935/// is single-valued on `(0, π)`, the scale `c(λ) = arccos(λ)/√(1−λ²)` is a
936/// well-defined function of the eigenvalue `λ` of `M`, independent of the
937/// arbitrary in-plane basis the eigensolver picks. The whole logarithm is then
938///
939/// ```text
940/// S = Q · (c(λ̄ᵢⱼ) ⊙ K̃) · Qᵀ , λ̄ᵢⱼ = ½(λᵢ + λⱼ),
941/// ```
942///
943/// the element-wise scaling being exact on-block (where `λᵢ = λⱼ`) and
944/// multiplying a numerically-zero entry off-block (where `K̃ᵢⱼ ≈ 0` because the
945/// blocks are decoupled). The scaling is symmetric in `(i, j)`, so `S` stays
946/// skew.
947///
948/// An eigenvalue `λ → −1` is a rotation by `π`: the geodesic to that point is
949/// not unique (it is the cut locus / beyond the injectivity radius), so the
950/// principal logarithm does not exist. We refuse rather than return a value
951/// that silently picks one of the two equal-length geodesics.
952pub(crate) fn skew_log_orthogonal(v: &Array2<f64>) -> GeometryResult<Array2<f64>> {
953 use faer::Side;
954 use gam_linalg::faer_ndarray::{FaerEigh, fast_ab, fast_abt, fast_atb};
955
956 let n = v.nrows();
957 if v.ncols() != n {
958 return Err(GeometryError::InvalidPoint(
959 "matrix logarithm requires a square matrix",
960 ));
961 }
962 if !v.iter().all(|x| x.is_finite()) {
963 return Err(GeometryError::InvalidPoint(
964 "matrix logarithm requires finite entries",
965 ));
966 }
967 let mut m = Array2::<f64>::zeros((n, n));
968 let mut k = Array2::<f64>::zeros((n, n));
969 for i in 0..n {
970 for j in 0..n {
971 m[[i, j]] = 0.5 * (v[[i, j]] + v[[j, i]]);
972 k[[i, j]] = 0.5 * (v[[i, j]] - v[[j, i]]);
973 }
974 }
975 let (evals, q) = m.eigh(Side::Lower).map_err(|_| {
976 GeometryError::Singular("matrix logarithm: symmetric eigendecomposition failed")
977 })?;
978 // A rotation by π (eigenvalue −1 of V) is the cut locus: the logarithm is
979 // not single-valued there. Detect it from M's spectrum directly — on such a
980 // plane sin θ = 0 so K carries no signal and an element-wise scaling would
981 // silently drop the π rotation.
982 const CUT_LOCUS_EPS: f64 = 1.0e-7;
983 if evals.iter().any(|&lam| lam <= -1.0 + CUT_LOCUS_EPS) {
984 return Err(GeometryError::Unsupported(
985 "matrix logarithm undefined: rotation angle at π (beyond the injectivity radius)",
986 ));
987 }
988 let kt = fast_ab(&fast_atb(&q, &k), &q); // K̃ = Qᵀ K Q
989 let mut st = Array2::<f64>::zeros((n, n));
990 for i in 0..n {
991 for j in 0..n {
992 let lam = (0.5 * (evals[i] + evals[j])).clamp(-1.0, 1.0);
993 let sin_theta = (1.0 - lam * lam).max(0.0).sqrt();
994 // c(λ) = θ / sin θ, with the removable singularity at θ = 0
995 // (λ = 1) taken in the limit c → 1.
996 let scale = if sin_theta <= 1.0e-9 {
997 1.0
998 } else {
999 lam.acos() / sin_theta
1000 };
1001 st[[i, j]] = scale * kt[[i, j]];
1002 }
1003 }
1004 let s = fast_abt(&fast_ab(&q, &st), &q); // Q S̃ Qᵀ
1005 // Project out the rounding-level symmetric part so the result is exactly
1006 // skew, as the logarithm of an orthogonal matrix must be.
1007 let mut out = Array2::<f64>::zeros((n, n));
1008 for i in 0..n {
1009 for j in 0..n {
1010 out[[i, j]] = 0.5 * (s[[i, j]] - s[[j, i]]);
1011 }
1012 }
1013 Ok(out)
1014}
1015
1016/// Complete the `m × p` matrix `cols` (assumed to have orthonormal columns) to
1017/// a full `m × m` orthogonal matrix `[cols | C]`, returning the completion in
1018/// place: the first `p` columns are `cols`, the remaining `m − p` are an
1019/// orthonormal basis of the orthogonal complement of `cols`'s column space.
1020///
1021/// The complement is built by Gram–Schmidt-ing the standard axes `e₀ … e_{m−1}`
1022/// (in order) against the accumulated columns, with one reorthogonalization
1023/// pass for numerical safety. Taking the axes in order means that when `cols`
1024/// is `[Iₚ; 0]` the completion is exactly `[0; I_{m−p}]`, so the assembled
1025/// matrix is the identity — the property the Stiefel logarithm relies on to
1026/// start its iteration near `I₂ₚ` for nearby frames. The result is forced into
1027/// `SO(m)` (determinant `+1`) by flipping the sign of the last completion
1028/// column when needed, so its principal logarithm is skew-symmetric.
1029pub(crate) fn orthonormal_completion(cols: &Array2<f64>) -> Array2<f64> {
1030 let m = cols.nrows();
1031 let p = cols.ncols();
1032 let mut basis = Array2::<f64>::zeros((m, m));
1033 for j in 0..p {
1034 for i in 0..m {
1035 basis[[i, j]] = cols[[i, j]];
1036 }
1037 }
1038 let mut filled = p;
1039 let mut axis = 0usize;
1040 while filled < m && axis < m {
1041 let mut f = Array1::<f64>::zeros(m);
1042 f[axis] = 1.0;
1043 // Two Gram–Schmidt passes against the columns accepted so far.
1044 for _ in 0..2 {
1045 for c in 0..filled {
1046 let col = basis.column(c);
1047 let proj = dot(col, f.view());
1048 for i in 0..m {
1049 f[i] -= proj * basis[[i, c]];
1050 }
1051 }
1052 }
1053 let nrm = norm(f.view());
1054 if nrm > GEOMETRY_EPS {
1055 for i in 0..m {
1056 basis[[i, filled]] = f[i] / nrm;
1057 }
1058 filled += 1;
1059 }
1060 axis += 1;
1061 }
1062 // Force det = +1 so the completion lies in SO(m) and its principal log is
1063 // skew. det of an orthogonal matrix is ±1; flip the last *appended* column
1064 // if −1. When nothing was appended (`p == m`, e.g. a square input) the
1065 // input columns are returned untouched — flipping one would corrupt the
1066 // caller's frame, and a square input's orientation is the caller's to own.
1067 if filled == m && m > p && matrix_det(&basis) < 0.0 {
1068 for i in 0..m {
1069 basis[[i, m - 1]] = -basis[[i, m - 1]];
1070 }
1071 }
1072 basis
1073}
1074
1075/// Determinant via Gaussian elimination with partial pivoting. Used only for
1076/// small orientation checks (e.g. forcing a completion into `SO(n)`); not a
1077/// hot path.
1078pub(crate) fn matrix_det(a: &Array2<f64>) -> f64 {
1079 let n = a.nrows();
1080 if n == 0 || a.ncols() != n {
1081 return 1.0;
1082 }
1083 let mut lu = a.clone();
1084 let mut det = 1.0_f64;
1085 for col in 0..n {
1086 // Partial pivot.
1087 let mut pivot = col;
1088 let mut best = lu[[col, col]].abs();
1089 for r in (col + 1)..n {
1090 let v = lu[[r, col]].abs();
1091 if v > best {
1092 best = v;
1093 pivot = r;
1094 }
1095 }
1096 if best == 0.0 {
1097 return 0.0;
1098 }
1099 if pivot != col {
1100 for c in 0..n {
1101 lu.swap([col, c], [pivot, c]);
1102 }
1103 det = -det;
1104 }
1105 det *= lu[[col, col]];
1106 for r in (col + 1)..n {
1107 let factor = lu[[r, col]] / lu[[col, col]];
1108 for c in col..n {
1109 lu[[r, c]] -= factor * lu[[col, c]];
1110 }
1111 }
1112 }
1113 det
1114}
1115
1116/// Cholesky factor `L` of a symmetric positive-definite matrix (`A = L Lᵀ`).
1117///
1118/// This is a *positive-definiteness* test, not a conditioning test: a genuine
1119/// SPD matrix with tiny eigenvalues (e.g. `[[1e-16]]`) must factor
1120/// successfully. A pivot is rejected only when it is non-finite or fails to be
1121/// strictly positive *relative to the matrix scale*. The floor
1122/// `GEOMETRY_EPS · max(1, trace(A)/n)` is the ambient scale of the matrix
1123/// multiplied by the relative machine-noise tolerance, so a positive pivot that
1124/// is merely small in absolute terms (but large relative to nothing — the whole
1125/// matrix is small) passes, while a zero, negative, or numerically-noise pivot
1126/// (indefinite / singular directions) is rejected.
1127///
1128/// Callers needing a *conditioning* margin (a lower bound on the smallest
1129/// eigenvalue) must check that separately; overloading this PD test with an
1130/// absolute `GEOMETRY_EPS` floor wrongly rejected well-formed small-scale SPD
1131/// points. No current caller (only `SpdManifold::matrix`, which validates SPD
1132/// membership) depends on a conditioning margin here.
1133pub(crate) fn cholesky_spd(a: &Array2<f64>) -> GeometryResult<Array2<f64>> {
1134 let n = a.nrows();
1135 if n != a.ncols() {
1136 return Err(GeometryError::InvalidPoint(
1137 "Cholesky requires square input",
1138 ));
1139 }
1140 // Scale-relative positive-definiteness floor. `trace(A)/n` is the mean
1141 // diagonal, which equals `mean(eigenvalues)` and is therefore the natural
1142 // scale of an SPD matrix's spectrum. The acceptance floor scales WITH the
1143 // matrix (it shrinks for tiny matrices), so a uniformly small but genuine
1144 // SPD matrix like `[[1e-16]]` — scale 1e-16, floor GEOMETRY_EPS·1e-16 =
1145 // 1e-28 — passes, while a pivot that has collapsed to numerical noise
1146 // relative to the matrix's own scale (the indefinite/singular directions)
1147 // is rejected. An absolute `GEOMETRY_EPS` floor would have wrongly rejected
1148 // such tiny SPD matrices; clamping the floor up to a constant would do the
1149 // same, so we deliberately let it shrink with the spectrum.
1150 let mut trace = 0.0_f64;
1151 for i in 0..n {
1152 trace += a[[i, i]];
1153 }
1154 if !trace.is_finite() {
1155 return Err(GeometryError::InvalidPoint(
1156 "matrix is not positive definite",
1157 ));
1158 }
1159 // Reference scale of the matrix's spectrum. The acceptance floor is this
1160 // scale times the relative tolerance, so a uniformly-tiny SPD matrix (small
1161 // scale) has a correspondingly tiny floor and still factors, while a pivot
1162 // that has collapsed to noise *relative to the matrix's own scale* (the
1163 // indefinite/singular case) is rejected.
1164 let scale = (trace / n as f64).abs().max(f64::MIN_POSITIVE);
1165 let scale_eps = GEOMETRY_EPS * scale;
1166 let mut l = Array2::<f64>::zeros((n, n));
1167 for i in 0..n {
1168 for j in 0..=i {
1169 let mut sum = a[[i, j]];
1170 for k in 0..j {
1171 sum -= l[[i, k]] * l[[j, k]];
1172 }
1173 if i == j {
1174 if !sum.is_finite() || sum <= scale_eps {
1175 return Err(GeometryError::InvalidPoint(
1176 "matrix is not positive definite",
1177 ));
1178 }
1179 l[[i, j]] = sum.sqrt();
1180 } else {
1181 l[[i, j]] = sum / l[[j, j]];
1182 }
1183 }
1184 }
1185 Ok(l)
1186}
1187
1188#[cfg(test)]
1189mod cholesky_tests {
1190 use super::{GeometryError, cholesky_spd};
1191 use ndarray::Array2;
1192
1193 /// A genuine SPD matrix with a uniformly tiny spectrum (`[[1e-16]]`) must
1194 /// factor: the issue is positive-definiteness, not absolute scale. The old
1195 /// absolute `GEOMETRY_EPS` floor wrongly rejected it.
1196 #[test]
1197 fn cholesky_accepts_tiny_spd() {
1198 let mut a = Array2::<f64>::zeros((1, 1));
1199 a[[0, 0]] = 1.0e-16;
1200 let l = cholesky_spd(&a).expect("tiny positive 1x1 must be SPD");
1201 assert!((l[[0, 0]] - 1.0e-8).abs() <= 1.0e-16);
1202 }
1203
1204 /// A well-scaled SPD matrix factors and reproduces `L Lᵀ = A`.
1205 #[test]
1206 fn cholesky_accepts_well_scaled_spd() {
1207 // [[4, 2], [2, 3]] is SPD (eigenvalues ≈ 5.56, 1.44).
1208 let mut a = Array2::<f64>::zeros((2, 2));
1209 a[[0, 0]] = 4.0;
1210 a[[0, 1]] = 2.0;
1211 a[[1, 0]] = 2.0;
1212 a[[1, 1]] = 3.0;
1213 let l = cholesky_spd(&a).expect("well-scaled SPD must factor");
1214 let recon = l.dot(&l.t());
1215 for i in 0..2 {
1216 for j in 0..2 {
1217 assert!(
1218 (recon[[i, j]] - a[[i, j]]).abs() <= 1.0e-12,
1219 "L Lᵀ != A at ({i},{j})"
1220 );
1221 }
1222 }
1223 }
1224
1225 /// A zero pivot (singular) and an indefinite matrix must be rejected as not
1226 /// positive definite — the scale-relative floor still catches the genuine
1227 /// non-PD case.
1228 #[test]
1229 fn cholesky_rejects_zero_and_indefinite() {
1230 let zero = Array2::<f64>::zeros((1, 1));
1231 match cholesky_spd(&zero) {
1232 Err(GeometryError::InvalidPoint(_)) => {}
1233 other => panic!("expected non-PD rejection of zero pivot, got {other:?}"),
1234 }
1235 // [[1, 2], [2, 1]] has eigenvalues 3 and −1 (indefinite): the Schur
1236 // complement pivot 1 − 4 = −3 is negative.
1237 let mut indef = Array2::<f64>::zeros((2, 2));
1238 indef[[0, 0]] = 1.0;
1239 indef[[0, 1]] = 2.0;
1240 indef[[1, 0]] = 2.0;
1241 indef[[1, 1]] = 1.0;
1242 match cholesky_spd(&indef) {
1243 Err(GeometryError::InvalidPoint(_)) => {}
1244 other => panic!("expected non-PD rejection of indefinite matrix, got {other:?}"),
1245 }
1246 }
1247}
1248
1249#[cfg(test)]
1250mod qr_thin_tests {
1251 use super::qr_thin;
1252 use ndarray::Array2;
1253
1254 /// Two identical columns make the second residual vanish; the fallback axis
1255 /// must be Gram–Schmidted against the first accepted column so `QᵀQ = I`.
1256 /// The old behavior planted `e₂` directly, giving `q₁·q₂ = 1/√2`.
1257 #[test]
1258 fn qr_thin_duplicated_columns_orthonormal() {
1259 let mut a = Array2::<f64>::zeros((2, 2));
1260 // Both columns = (1, 1).
1261 a[[0, 0]] = 1.0;
1262 a[[1, 0]] = 1.0;
1263 a[[0, 1]] = 1.0;
1264 a[[1, 1]] = 1.0;
1265 let (q, r) = qr_thin(&a);
1266 // Deficient second column ⇒ R[1,1] = 0.
1267 assert!(
1268 r[[1, 1]].abs() <= 1.0e-14,
1269 "deficient column must set R[1,1]=0"
1270 );
1271 let gram = q.t().dot(&q);
1272 for i in 0..2 {
1273 for j in 0..2 {
1274 let want = if i == j { 1.0 } else { 0.0 };
1275 assert!(
1276 (gram[[i, j]] - want).abs() <= 1.0e-12,
1277 "QᵀQ != I at ({i},{j}): got {}",
1278 gram[[i, j]]
1279 );
1280 }
1281 }
1282 }
1283
1284 /// A full-rank input still gives `QᵀQ = I` and reconstructs `A = QR`.
1285 #[test]
1286 fn qr_thin_full_rank_reconstructs() {
1287 let mut a = Array2::<f64>::zeros((3, 2));
1288 a[[0, 0]] = 1.0;
1289 a[[1, 0]] = 1.0;
1290 a[[2, 0]] = 0.0;
1291 a[[0, 1]] = 1.0;
1292 a[[1, 1]] = 0.0;
1293 a[[2, 1]] = 1.0;
1294 let (q, r) = qr_thin(&a);
1295 let gram = q.t().dot(&q);
1296 for i in 0..2 {
1297 for j in 0..2 {
1298 let want = if i == j { 1.0 } else { 0.0 };
1299 assert!(
1300 (gram[[i, j]] - want).abs() <= 1.0e-12,
1301 "QᵀQ != I at ({i},{j})"
1302 );
1303 }
1304 }
1305 let recon = q.dot(&r);
1306 for i in 0..3 {
1307 for j in 0..2 {
1308 assert!(
1309 (recon[[i, j]] - a[[i, j]]).abs() <= 1.0e-12,
1310 "QR != A at ({i},{j})"
1311 );
1312 }
1313 }
1314 }
1315}
1316
1317#[cfg(test)]
1318mod matrix_log_tests {
1319 use super::{matrix_exp, orthonormal_completion, skew_log_orthogonal};
1320 use ndarray::Array2;
1321
1322 /// `exp(skew_log_orthogonal(V)) = V` for a block-diagonal rotation built
1323 /// from two planes — including one with angle θ > π/2, which an
1324 /// `arcsin`-only scheme (no `cos θ` disambiguation) would get wrong.
1325 #[test]
1326 fn log_then_exp_recovers_rotation() {
1327 // 5×5 orthogonal: rotation by 2.3 rad in (0,1), by 0.4 rad in (2,3),
1328 // identity on axis 4.
1329 let mut v = Array2::<f64>::zeros((5, 5));
1330 let (c0, s0) = (2.3_f64.cos(), 2.3_f64.sin());
1331 let (c1, s1) = (0.4_f64.cos(), 0.4_f64.sin());
1332 v[[0, 0]] = c0;
1333 v[[0, 1]] = -s0;
1334 v[[1, 0]] = s0;
1335 v[[1, 1]] = c0;
1336 v[[2, 2]] = c1;
1337 v[[2, 3]] = -s1;
1338 v[[3, 2]] = s1;
1339 v[[3, 3]] = c1;
1340 v[[4, 4]] = 1.0;
1341 let s = skew_log_orthogonal(&v).expect("log of rotation");
1342 // S must be skew.
1343 for i in 0..5 {
1344 for j in 0..5 {
1345 assert!(
1346 (s[[i, j]] + s[[j, i]]).abs() < 1e-12,
1347 "log not skew at ({i},{j})"
1348 );
1349 }
1350 }
1351 let back = matrix_exp(&s).expect("exp of skew");
1352 let mut worst = 0.0_f64;
1353 for i in 0..5 {
1354 for j in 0..5 {
1355 worst = worst.max((back[[i, j]] - v[[i, j]]).abs());
1356 }
1357 }
1358 assert!(worst < 1e-10, "exp∘log != id for rotation: {worst:.3e}");
1359 }
1360
1361 /// A rotation by exactly π (eigenvalue −1) is the cut locus: the logarithm
1362 /// is not single-valued and must be refused, not silently dropped.
1363 #[test]
1364 fn log_refuses_pi_rotation() {
1365 // Rotation by π in the (0,1) plane: diag block [[-1,0],[0,-1]].
1366 let mut v = Array2::<f64>::zeros((3, 3));
1367 v[[0, 0]] = -1.0;
1368 v[[1, 1]] = -1.0;
1369 v[[2, 2]] = 1.0;
1370 assert!(
1371 skew_log_orthogonal(&v).is_err(),
1372 "π rotation must be refused as the cut locus"
1373 );
1374 }
1375
1376 /// Completing an `m×p` orthonormal block must yield an `SO(m)` matrix whose
1377 /// first `p` columns are the input, and a square (`p==m`) input must be
1378 /// returned untouched (never sign-flipped).
1379 #[test]
1380 fn completion_is_orthogonal_and_preserves_input() {
1381 // 4×2 orthonormal block.
1382 let mut cols = Array2::<f64>::zeros((4, 2));
1383 cols[[0, 0]] = 1.0;
1384 cols[[1, 1]] = 1.0;
1385 let full = orthonormal_completion(&cols);
1386 let gram = full.t().dot(&full);
1387 for i in 0..4 {
1388 for j in 0..4 {
1389 let want = if i == j { 1.0 } else { 0.0 };
1390 assert!((gram[[i, j]] - want).abs() < 1e-12, "not orthogonal");
1391 }
1392 }
1393 for j in 0..2 {
1394 for i in 0..4 {
1395 assert!((full[[i, j]] - cols[[i, j]]).abs() < 1e-14, "input column changed");
1396 }
1397 }
1398 // Square input with det −1 must be returned verbatim (no flip).
1399 let mut sq = Array2::<f64>::zeros((2, 2));
1400 sq[[0, 0]] = 1.0;
1401 sq[[1, 1]] = -1.0; // det = −1
1402 let out = orthonormal_completion(&sq);
1403 assert!((out[[1, 1]] + 1.0).abs() < 1e-14, "square input was modified");
1404 }
1405}
1406
1407#[cfg(test)]
1408mod jacobi_tests {
1409 use super::{GeometryError, jacobi_symmetric};
1410 use ndarray::Array2;
1411
1412 /// A large-norm SPD matrix has off-diagonal residuals after
1413 /// diagonalization that scale with `||A||_F`, so they sit far above the
1414 /// old *absolute* `1e-13` cutoff even when the decomposition is, in fact,
1415 /// fully converged. The relative threshold (`1e-13 * ||A||_F`) recognizes
1416 /// convergence here and returns the correct spectrum instead of grinding
1417 /// through `max_iter` sweeps and silently returning a partial diagonal.
1418 #[test]
1419 fn jacobi_converges_on_large_norm_spd() {
1420 // Q diag(1e8, 2e8, 3e8) Qᵀ for an orthogonal Q built from a planar
1421 // rotation in the (0,1) plane; eigenvalues are huge so the matrix
1422 // norm is ~1e8 and any absolute 1e-13 off-diagonal test is hopeless.
1423 let theta = 0.7_f64;
1424 let (c, s) = (theta.cos(), theta.sin());
1425 let mut q = Array2::<f64>::eye(3);
1426 q[[0, 0]] = c;
1427 q[[0, 1]] = -s;
1428 q[[1, 0]] = s;
1429 q[[1, 1]] = c;
1430 let lambda = [1.0e8_f64, 2.0e8, 3.0e8];
1431 let mut diag = Array2::<f64>::zeros((3, 3));
1432 for i in 0..3 {
1433 diag[[i, i]] = lambda[i];
1434 }
1435 let a = q.dot(&diag).dot(&q.t());
1436
1437 let (evals, evecs) = jacobi_symmetric(&a).expect("large-norm SPD must converge");
1438 let mut sorted: Vec<f64> = evals.to_vec();
1439 sorted.sort_by(|x, y| x.partial_cmp(y).unwrap());
1440 for (got, want) in sorted.iter().zip(lambda.iter()) {
1441 assert!(
1442 (got - want).abs() <= 1.0e-6 * want,
1443 "eigenvalue mismatch: got {got}, want {want}"
1444 );
1445 }
1446 // V diag(evals) Vᵀ must reconstruct A (relative to its scale).
1447 let mut diag_e = Array2::<f64>::zeros((3, 3));
1448 for i in 0..3 {
1449 diag_e[[i, i]] = evals[i];
1450 }
1451 let recon = evecs.dot(&diag_e).dot(&evecs.t());
1452 for i in 0..3 {
1453 for j in 0..3 {
1454 assert!(
1455 (recon[[i, j]] - a[[i, j]]).abs() <= 1.0e-6 * 3.0e8,
1456 "reconstruction mismatch at ({i},{j})"
1457 );
1458 }
1459 }
1460 }
1461
1462 /// A clustered/degenerate spectrum (two coincident eigenvalues) must still
1463 /// converge and reproduce the multiplicity. This guards against the
1464 /// relative threshold being so tight that ordinary near-degenerate SPD
1465 /// inputs trip the new non-convergence error.
1466 #[test]
1467 fn jacobi_handles_clustered_spectrum() {
1468 // diag(5, 5, 1) rotated in the (0,2) plane; the degenerate pair stays
1469 // degenerate under rotation.
1470 let theta = 0.4_f64;
1471 let (c, s) = (theta.cos(), theta.sin());
1472 let mut q = Array2::<f64>::eye(3);
1473 q[[0, 0]] = c;
1474 q[[0, 2]] = -s;
1475 q[[2, 0]] = s;
1476 q[[2, 2]] = c;
1477 let lambda = [5.0_f64, 5.0, 1.0];
1478 let mut diag = Array2::<f64>::zeros((3, 3));
1479 for i in 0..3 {
1480 diag[[i, i]] = lambda[i];
1481 }
1482 let a = q.dot(&diag).dot(&q.t());
1483
1484 let (evals, evecs) = jacobi_symmetric(&a).expect("clustered SPD must converge");
1485 let mut sorted: Vec<f64> = evals.to_vec();
1486 sorted.sort_by(|x, y| x.partial_cmp(y).unwrap());
1487 assert!((sorted[0] - 1.0).abs() <= 1.0e-12);
1488 assert!((sorted[1] - 5.0).abs() <= 1.0e-12);
1489 assert!((sorted[2] - 5.0).abs() <= 1.0e-12);
1490 // Eigenvectors must remain orthonormal even across the degenerate pair.
1491 let gram = evecs.t().dot(&evecs);
1492 for i in 0..3 {
1493 for j in 0..3 {
1494 let want = if i == j { 1.0 } else { 0.0 };
1495 assert!(
1496 (gram[[i, j]] - want).abs() <= 1.0e-12,
1497 "eigenvectors not orthonormal at ({i},{j})"
1498 );
1499 }
1500 }
1501 }
1502
1503 /// Non-convergence must now surface as `GeometryError::Singular` instead
1504 /// of a silently-returned partial diagonal. A symmetric input carrying a
1505 /// non-finite off-diagonal can never drive the largest off-diagonal
1506 /// magnitude below `1e-13 * ||A||_F` (the norm itself is non-finite), so
1507 /// the sweep exhausts `max_iter` and the solver must error rather than
1508 /// hand back the un-diagonalized matrix's diagonal.
1509 #[test]
1510 fn jacobi_errors_on_non_convergence() {
1511 let mut a = Array2::<f64>::eye(3);
1512 a[[0, 1]] = f64::NAN;
1513 a[[1, 0]] = f64::NAN;
1514 match jacobi_symmetric(&a) {
1515 Err(GeometryError::Singular(_)) => {}
1516 other => panic!("expected Singular non-convergence error, got {other:?}"),
1517 }
1518 }
1519}