Skip to main content

gam_solve/
spline_scan.rs

1//! Exact O(n) state-space polynomial smoothing spline ("the scan").
2//!
3//! The order-`m` intrinsic Gaussian prior whose penalized posterior mean is the
4//! degree-`(2m−1)` smoothing spline (penalty `λ∫(f^{(m)})²`) is a Markov process
5//! in the state `α(x) = (f, f′, …, f^{(m−1)})`: an `m`-fold integrated Wiener
6//! process. The Kalman filter + RTS smoother over the x-sorted observations
7//! therefore computes the EXACT smoothing-spline posterior — mean, derivatives,
8//! pointwise variance — and the diffuse innovations decomposition computes the
9//! EXACT restricted (REML) likelihood, all in O(n) work per smoothing-parameter
10//! trial instead of the dense O(n·k²) design/Gram + O(k³) solve per trial
11//! (Wahba 1978; Kohn & Ansley 1987; Durbin & Koopman exact diffuse init).
12//!
13//! Supported orders are `m ∈ {1, 2, 3}` (`MAX_ORDER`): `m = 1` is the
14//! random-walk / linear smoother (penalty `λ∫f′²`), `m = 2` the cubic smoother
15//! (`λ∫f″²`), `m = 3` the quintic smoother (`λ∫(f‴)²`, natural spline degree
16//! `2m−1 = 5`). The diffuse prior carries `m` improper dimensions consumed by
17//! the first `m` distinct abscissae, leaving `m − 1` *partially-diffuse leading
18//! nodes* whose smoothed moments the ordinary RTS recursion cannot reach (its
19//! predicted covariance is rank-deficient there). For `m = 2` that is the
20//! single node 0; for `m = 3` the pair {0, 1}. These are recovered exactly by a
21//! joint Gaussian conditioning of the whole leading block on the first proper
22//! smoothed node (see the smoother pass) — the exact diffuse analog of RTS, and
23//! the multi-node generalization of the `m = 2` reverse-Markov closure.
24//!
25//! Model, after sorting and pooling tied abscissae (precision-weighted):
26//!   α_{t+1} = F_t α_t + η_t,   η_t ~ N(0, q·Q(δ_t)),   q = σ_w²/σ² = 1/λ,
27//!   y_t     = H α_t + ε_t,     ε_t ~ N(0, σ²/w_t),     H = [1 0 … 0],
28//!   F(δ) = exp(δA) (nilpotent shift A),   Q(δ) the m-fold IWP noise,
29//! with a diffuse (improper, flat) prior on the first `m` states carrying the
30//! unpenalized degree-`<m` polynomial null space the spline leaves unshrunk.
31//! (`m = 2`: `F = [[1,δ],[0,1]]`, `Q = [[δ³/3,δ²/2],[δ²/2,δ]]`.)
32//!
33//! Exactness boundaries, by construction:
34//! - the diffuse dimension is `m` and is consumed by the first `m` distinct
35//!   abscissae, after which the filter is an ordinary proper Kalman filter;
36//! - the `m − 1` partially-diffuse leading nodes are recovered by exact Markov
37//!   conditioning of the whole leading block on the first proper smoothed node,
38//!   `p(α_{0..m−2} | y) = ∫ p(α_{0..m−2} | α_{m−1}, y_{0..m−2}) p(α_{m−1} | y)`
39//!   — an affine `((m−1)m)×m` Bayes update built from the flat leading prior,
40//!   the Markov increments, and the leading observations; it reduces to the
41//!   single-node reverse-Markov closure at `m = 2` and needs no diffuse RTS
42//!   recursion;
43//! - off-knot prediction is the Gaussian bridge conditional on the two
44//!   flanking smoothed states (using the exact lag-one smoothed
45//!   cross-covariance `G_t · P^s_{t+1}`), or boundary extrapolation from the
46//!   end states, which reproduces the spline's polynomial extrapolation with
47//!   growing variance — bridge-don't-sag is a theorem here.
48//!
49//! The smoothing parameter is selected by maximizing the concentrated diffuse
50//! (restricted) log-likelihood over log λ with a deterministic coarse-grid +
51//! golden-section refinement; σ² is profiled in closed form from the proper
52//! innovations plus the within-tie residual sum.
53
54/// One pooled (distinct-abscissa) observation node.
55#[derive(Clone, Copy, Debug)]
56struct PooledNode {
57    x: f64,
58    /// Precision-weighted mean of the tied responses.
59    y: f64,
60    /// Total weight of the pooled ties (observation variance is `σ²/w`).
61    w: f64,
62}
63
64/// Deterministic coarse-grid width for the log-λ search.
65const LOG_LAMBDA_GRID: usize = 25;
66/// Search interval for log λ (natural log), generous on both sides.
67const LOG_LAMBDA_LO: f64 = -18.0;
68const LOG_LAMBDA_HI: f64 = 18.0;
69/// Golden-section refinement tolerance on log λ.
70const LOG_LAMBDA_TOL: f64 = 1e-7;
71/// Numerical floor treating a predicted innovation variance as singular.
72const INNOVATION_VAR_FLOOR: f64 = 1e-300;
73
74/// Maximum supported smoothing-spline order handled by the fixed-capacity
75/// small-matrix layer. Order `m` penalizes `∫(f^{(m)})²`; the state dimension
76/// is `m`. The exact diffuse leading-block smoother (see the smoother pass)
77/// recovers the `m − 1` partially-diffuse leading nodes for any `m`: `m = 1`
78/// has none, `m = 2` has node 0, `m = 3` has {0, 1}. Order 3 (the quintic
79/// smoothing spline, #1044) is the current cap; bumping it further only needs a
80/// wider `mat_inv` branch and the (already order-general) leading-block solve.
81const MAX_ORDER: usize = 3;
82
83/// Row-major `m × m` matrix stored in a fixed `MAX_ORDER`-capacity buffer; only
84/// the top-left `m × m` block is meaningful. Generalizing the order-2 cubic
85/// scan to order `m ∈ {1, 2, 3}` (#1034 item 2, #1044) keeps the
86/// allocation-free fixed storage of the hot filter loop while letting `m` vary
87/// at runtime.
88type Mat2 = [[f64; MAX_ORDER]; MAX_ORDER];
89type Vec2 = [f64; MAX_ORDER];
90
91#[inline]
92fn mat_mul(a: &Mat2, b: &Mat2, m: usize) -> Mat2 {
93    let mut c = [[0.0; MAX_ORDER]; MAX_ORDER];
94    for i in 0..m {
95        for j in 0..m {
96            let mut acc = 0.0;
97            for k in 0..m {
98                acc += a[i][k] * b[k][j];
99            }
100            c[i][j] = acc;
101        }
102    }
103    c
104}
105
106#[inline]
107fn mat_t(a: &Mat2, m: usize) -> Mat2 {
108    let mut c = [[0.0; MAX_ORDER]; MAX_ORDER];
109    for i in 0..m {
110        for j in 0..m {
111            c[i][j] = a[j][i];
112        }
113    }
114    c
115}
116
117#[inline]
118fn mat_vec(a: &Mat2, v: &Vec2, m: usize) -> Vec2 {
119    let mut out = [0.0; MAX_ORDER];
120    for i in 0..m {
121        let mut acc = 0.0;
122        for j in 0..m {
123            acc += a[i][j] * v[j];
124        }
125        out[i] = acc;
126    }
127    out
128}
129
130#[inline]
131fn mat_add(a: &Mat2, b: &Mat2, m: usize) -> Mat2 {
132    let mut c = [[0.0; MAX_ORDER]; MAX_ORDER];
133    for i in 0..m {
134        for j in 0..m {
135            c[i][j] = a[i][j] + b[i][j];
136        }
137    }
138    c
139}
140
141#[inline]
142fn mat_sub(a: &Mat2, b: &Mat2, m: usize) -> Mat2 {
143    let mut c = [[0.0; MAX_ORDER]; MAX_ORDER];
144    for i in 0..m {
145        for j in 0..m {
146            c[i][j] = a[i][j] - b[i][j];
147        }
148    }
149    c
150}
151
152/// Inverse of an `m × m` (`m ∈ {1, 2, 3}`) with a hard singularity error.
153/// Closed-form cofactor inverses keep the hot-loop arithmetic exact and
154/// branch-free; order 3 is the quintic smoother's state dimension (#1044).
155fn mat_inv(a: &Mat2, m: usize, what: &str) -> Result<Mat2, String> {
156    let mut out = [[0.0; MAX_ORDER]; MAX_ORDER];
157    match m {
158        1 => {
159            let d = a[0][0];
160            if !(d.is_finite() && d.abs() > 0.0) {
161                return Err(format!("spline scan: singular 1x1 in {what} (a00={d})"));
162            }
163            out[0][0] = 1.0 / d;
164        }
165        2 => {
166            let det = a[0][0] * a[1][1] - a[0][1] * a[1][0];
167            if !(det.is_finite() && det.abs() > 0.0) {
168                return Err(format!("spline scan: singular 2x2 in {what} (det={det})"));
169            }
170            out[0][0] = a[1][1] / det;
171            out[0][1] = -a[0][1] / det;
172            out[1][0] = -a[1][0] / det;
173            out[1][1] = a[0][0] / det;
174        }
175        3 => {
176            // Cofactor / adjugate inverse. Cofactors of the 2×2 minors:
177            let c00 = a[1][1] * a[2][2] - a[1][2] * a[2][1];
178            let c01 = a[1][2] * a[2][0] - a[1][0] * a[2][2];
179            let c02 = a[1][0] * a[2][1] - a[1][1] * a[2][0];
180            let det = a[0][0] * c00 + a[0][1] * c01 + a[0][2] * c02;
181            if !(det.is_finite() && det.abs() > 0.0) {
182                return Err(format!("spline scan: singular 3x3 in {what} (det={det})"));
183            }
184            let inv_det = 1.0 / det;
185            // inv = adj/det = (cofactor matrix)ᵀ / det.
186            out[0][0] = c00 * inv_det;
187            out[0][1] = (a[0][2] * a[2][1] - a[0][1] * a[2][2]) * inv_det;
188            out[0][2] = (a[0][1] * a[1][2] - a[0][2] * a[1][1]) * inv_det;
189            out[1][0] = c01 * inv_det;
190            out[1][1] = (a[0][0] * a[2][2] - a[0][2] * a[2][0]) * inv_det;
191            out[1][2] = (a[0][2] * a[1][0] - a[0][0] * a[1][2]) * inv_det;
192            out[2][0] = c02 * inv_det;
193            out[2][1] = (a[0][1] * a[2][0] - a[0][0] * a[2][1]) * inv_det;
194            out[2][2] = (a[0][0] * a[1][1] - a[0][1] * a[1][0]) * inv_det;
195        }
196        _ => return Err(format!("spline scan: unsupported order {m} in {what}")),
197    }
198    Ok(out)
199}
200
201/// Inverse of a general dense `d × d` SPD matrix via Gauss–Jordan elimination
202/// with partial pivoting, symmetric diagonal (Jacobi) equilibration, and one
203/// iterative-refinement step. Used once per fit by the leading-block diffuse
204/// smoother (dimension `(order−1)·order ≤ 6`), so clarity over speed — it is
205/// NOT on the hot REML grid path (that runs only `run_filter`).
206///
207/// Equilibration matters at order `m ≥ 3`: the IWP process noise `Q(δ)` scales
208/// the `f^{(k)}` state components by `δ^{2m−1}` down to `δ`, so its inverse
209/// `(qQ)⁻¹` — and hence the leading-block precision `Λ` — spans many orders of
210/// magnitude (the f-component carries the `O(w)` observation term, the
211/// high-derivative components carry `O(1/(qδ^{2m−1}))` penalty mass). A bare
212/// Gauss–Jordan inverse of such a `Λ` loses `≈ ε·κ(Λ)` digits, which at heavy
213/// smoothing (small `q`) would corrupt the quintic's leading smoothed nodes.
214/// Rescaling to unit diagonal (`Λ̃ = SΛS`, `s_i = 1/√Λ_ii`) collapses that
215/// scale disparity before the elimination, then `Λ⁻¹ = S Λ̃⁻¹ S`.
216fn dense_spd_inverse(a: &[Vec<f64>], what: &str) -> Result<Vec<Vec<f64>>, String> {
217    let d = a.len();
218    // Jacobi equilibration scale s_i = 1/√Λ_ii (Λ SPD ⇒ Λ_ii > 0).
219    let s: Vec<f64> = (0..d)
220        .map(|i| {
221            let dii = a[i][i];
222            if dii.is_finite() && dii > 0.0 {
223                1.0 / dii.sqrt()
224            } else {
225                1.0
226            }
227        })
228        .collect();
229    let a_s: Vec<Vec<f64>> = (0..d)
230        .map(|i| (0..d).map(|j| s[i] * a[i][j] * s[j]).collect())
231        .collect();
232    // Gauss–Jordan inverse of the equilibrated matrix.
233    let mut inv_s = gauss_jordan_inverse(&a_s, what)?;
234    // One iterative-refinement step against the equilibrated system:
235    // X ← X + X·(I − Λ̃·X), reducing the residual to near machine precision.
236    let mut resid = vec![vec![0.0_f64; d]; d]; // R = I − Λ̃·X
237    for i in 0..d {
238        for j in 0..d {
239            let mut ax = 0.0;
240            for k in 0..d {
241                ax += a_s[i][k] * inv_s[k][j];
242            }
243            resid[i][j] = f64::from(u8::from(i == j)) - ax;
244        }
245    }
246    let mut delta = vec![vec![0.0_f64; d]; d]; // ΔX = X·R
247    for i in 0..d {
248        for j in 0..d {
249            let mut acc = 0.0;
250            for k in 0..d {
251                acc += inv_s[i][k] * resid[k][j];
252            }
253            delta[i][j] = acc;
254        }
255    }
256    for i in 0..d {
257        for j in 0..d {
258            inv_s[i][j] += delta[i][j];
259        }
260    }
261    // Un-equilibrate: Λ⁻¹ = S·Λ̃⁻¹·S.
262    Ok((0..d)
263        .map(|i| (0..d).map(|j| s[i] * inv_s[i][j] * s[j]).collect())
264        .collect())
265}
266
267/// Gauss–Jordan inverse with partial pivoting (helper for `dense_spd_inverse`).
268fn gauss_jordan_inverse(a: &[Vec<f64>], what: &str) -> Result<Vec<Vec<f64>>, String> {
269    let d = a.len();
270    let mut aug = a.to_vec();
271    let mut inv = vec![vec![0.0_f64; d]; d];
272    for i in 0..d {
273        inv[i][i] = 1.0;
274    }
275    for col in 0..d {
276        let piv = (col..d)
277            .max_by(|&i, &j| aug[i][col].abs().total_cmp(&aug[j][col].abs()))
278            .unwrap();
279        let p = aug[piv][col];
280        if !(p.is_finite() && p.abs() > 0.0) {
281            return Err(format!(
282                "spline scan: singular {d}x{d} in {what} (pivot={p})"
283            ));
284        }
285        aug.swap(col, piv);
286        inv.swap(col, piv);
287        let d_piv = aug[col][col];
288        for k in 0..d {
289            aug[col][k] /= d_piv;
290            inv[col][k] /= d_piv;
291        }
292        for r in 0..d {
293            if r == col {
294                continue;
295            }
296            let f = aug[r][col];
297            if f == 0.0 {
298                continue;
299            }
300            for k in 0..d {
301                aug[r][k] -= f * aug[col][k];
302                inv[r][k] -= f * inv[col][k];
303            }
304        }
305    }
306    Ok(inv)
307}
308
309/// Factorials `k!` for `k ≤ 2·MAX_ORDER` — the only ones the order-`m`
310/// transition and process-noise formulas reference.
311#[inline]
312fn factorial(k: usize) -> f64 {
313    (1..=k).map(|v| v as f64).product::<f64>().max(1.0)
314}
315
316/// Transition `F(δ) = exp(δ·A)` of the `m`-th order integrated Wiener process,
317/// `A` the nilpotent shift: `F[i][j] = δ^{j−i}/(j−i)!` for `j ≥ i`, else 0.
318/// `m = 1 ⇒ [[1]]`; `m = 2 ⇒ [[1, δ], [0, 1]]` (the cubic case, unchanged).
319#[inline]
320fn transition(delta: f64, m: usize) -> Mat2 {
321    let mut f = [[0.0; MAX_ORDER]; MAX_ORDER];
322    for i in 0..m {
323        for j in i..m {
324            f[i][j] = delta.powi((j - i) as i32) / factorial(j - i);
325        }
326    }
327    f
328}
329
330/// Process noise `Q(δ) = ∫₀^δ e^{As} b bᵀ e^{Aᵀs} ds` (`b = e_{m−1}`) of the
331/// `m`-th order IWP at unit `q`, scaled by `q`:
332/// `Q[i][j] = q · δ^{2m−1−i−j} / ((m−1−i)! (m−1−j)! (2m−1−i−j))`.
333/// `m = 1 ⇒ [[q·δ]]`; `m = 2 ⇒ [[q·δ³/3, q·δ²/2], [q·δ²/2, q·δ]]` (unchanged).
334#[inline]
335fn process_noise(delta: f64, q: f64, m: usize) -> Mat2 {
336    let mut out = [[0.0; MAX_ORDER]; MAX_ORDER];
337    for i in 0..m {
338        for j in 0..m {
339            let p = 2 * m - 1 - i - j;
340            out[i][j] = q * delta.powi(p as i32)
341                / (factorial(m - 1 - i) * factorial(m - 1 - j) * (p as f64));
342        }
343    }
344    out
345}
346
347/// Symmetrize in place against drift from the rank-one update arithmetic.
348#[inline]
349fn symmetrize(a: &mut Mat2, m: usize) {
350    for i in 0..m {
351        for j in (i + 1)..m {
352            let off = 0.5 * (a[i][j] + a[j][i]);
353            a[i][j] = off;
354            a[j][i] = off;
355        }
356    }
357}
358
359/// Per-node filter storage needed by the RTS backward pass.
360struct FilterStep {
361    /// Filtered mean `a_{t|t}` and proper covariance `P*_{t|t}`.
362    a_filt: Vec2,
363    p_filt: Mat2,
364    /// One-step prediction `a_{t|t-1}`, proper covariance `P*_{t|t-1}` (for t ≥ 1).
365    a_pred: Vec2,
366    p_pred: Mat2,
367}
368
369/// Output of one full filter pass at a fixed `q = 1/λ` (run at unit σ²).
370struct FilterPass {
371    steps: Vec<FilterStep>,
372    /// Σ over proper steps of `log F̃_t` (innovation variances at σ²=1).
373    sum_log_f: f64,
374    /// Σ over proper steps of `v_t² / F̃_t`.
375    sum_v2_over_f: f64,
376    /// Number of proper (non-diffuse) innovations.
377    n_proper: usize,
378}
379
380fn run_filter(nodes: &[PooledNode], q: f64, order: usize) -> Result<FilterPass, String> {
381    let n = nodes.len();
382    let mut steps = Vec::with_capacity(n);
383    // Exact diffuse initialization (Durbin–Koopman): P = P* + κ·P_∞, κ → ∞.
384    // The order-`m` polynomial null space (degree < m) is fully diffuse: the
385    // diffuse rank starts at `order`, consumed by the first `order` distinct
386    // abscissae.
387    let mut a: Vec2 = [0.0; MAX_ORDER];
388    let mut p_star: Mat2 = [[0.0; MAX_ORDER]; MAX_ORDER];
389    let mut p_inf: Mat2 = [[0.0; MAX_ORDER]; MAX_ORDER];
390    for i in 0..order {
391        p_inf[i][i] = 1.0;
392    }
393    let mut diffuse_rank = order;
394    let mut sum_log_f = 0.0;
395    let mut sum_v2_over_f = 0.0;
396    let mut n_proper = 0usize;
397    for t in 0..n {
398        let a_pred = a;
399        let p_pred = p_star;
400        let r = 1.0 / nodes[t].w;
401        let v = nodes[t].y - a[0];
402        // H = [1 0 … 0] ⇒ M = P·H' is the first column, F = M[0] (+ r).
403        let mut m_star: Vec2 = [0.0; MAX_ORDER];
404        for i in 0..order {
405            m_star[i] = p_star[i][0];
406        }
407        let f_star = m_star[0] + r;
408        if diffuse_rank > 0 {
409            let mut m_inf: Vec2 = [0.0; MAX_ORDER];
410            for i in 0..order {
411                m_inf[i] = p_inf[i][0];
412            }
413            let f_inf = m_inf[0];
414            if f_inf > INNOVATION_VAR_FLOOR {
415                // Exact diffuse update (Koopman 1997): the κ→∞ limit of the
416                // standard update; the diffuse step contributes −½·log F_∞ to
417                // the restricted likelihood and consumes one diffuse dimension.
418                for i in 0..order {
419                    a[i] += (m_inf[i] / f_inf) * v;
420                }
421                let mut p_new = p_star;
422                for i in 0..order {
423                    for j in 0..order {
424                        p_new[i][j] += -m_inf[i] * m_star[j] / f_inf - m_star[i] * m_inf[j] / f_inf
425                            + m_inf[i] * m_inf[j] * f_star / (f_inf * f_inf);
426                    }
427                }
428                p_star = p_new;
429                symmetrize(&mut p_star, order);
430                for i in 0..order {
431                    for j in 0..order {
432                        p_inf[i][j] -= m_inf[i] * m_inf[j] / f_inf;
433                    }
434                }
435                symmetrize(&mut p_inf, order);
436                diffuse_rank -= 1;
437                if diffuse_rank == 0 {
438                    p_inf = [[0.0; MAX_ORDER]; MAX_ORDER];
439                }
440            } else {
441                // Diffuse direction orthogonal to H at this node: ordinary
442                // proper update with P* (F_∞ = 0 ⇒ standard Kalman step).
443                if f_star <= INNOVATION_VAR_FLOOR {
444                    return Err("spline scan: non-positive innovation variance".to_string());
445                }
446                for i in 0..order {
447                    a[i] += (m_star[i] / f_star) * v;
448                }
449                for i in 0..order {
450                    for j in 0..order {
451                        p_star[i][j] -= m_star[i] * m_star[j] / f_star;
452                    }
453                }
454                symmetrize(&mut p_star, order);
455                sum_log_f += f_star.ln();
456                sum_v2_over_f += v * v / f_star;
457                n_proper += 1;
458            }
459        } else {
460            if f_star <= INNOVATION_VAR_FLOOR {
461                return Err("spline scan: non-positive innovation variance".to_string());
462            }
463            for i in 0..order {
464                a[i] += (m_star[i] / f_star) * v;
465            }
466            for i in 0..order {
467                for j in 0..order {
468                    p_star[i][j] -= m_star[i] * m_star[j] / f_star;
469                }
470            }
471            symmetrize(&mut p_star, order);
472            sum_log_f += f_star.ln();
473            sum_v2_over_f += v * v / f_star;
474            n_proper += 1;
475        }
476        steps.push(FilterStep {
477            a_filt: a,
478            p_filt: p_star,
479            a_pred,
480            p_pred,
481        });
482        // Predict to the next node.
483        if t + 1 < n {
484            let delta = nodes[t + 1].x - nodes[t].x;
485            let f_t = transition(delta, order);
486            a = mat_vec(&f_t, &a, order);
487            let mut p_next = mat_add(
488                &mat_mul(&mat_mul(&f_t, &p_star, order), &mat_t(&f_t, order), order),
489                &process_noise(delta, q, order),
490                order,
491            );
492            symmetrize(&mut p_next, order);
493            p_star = p_next;
494            if diffuse_rank > 0 {
495                let mut pi_next =
496                    mat_mul(&mat_mul(&f_t, &p_inf, order), &mat_t(&f_t, order), order);
497                symmetrize(&mut pi_next, order);
498                p_inf = pi_next;
499            }
500        }
501    }
502    Ok(FilterPass {
503        steps,
504        sum_log_f,
505        sum_v2_over_f,
506        n_proper,
507    })
508}
509
510/// Fitted exact smoothing-spline posterior on the pooled knots.
511#[derive(Clone, Debug)]
512pub struct SplineScanFit {
513    /// Smoothing-spline order `m` (penalize `∫(f^{(m)})²`); state dimension.
514    /// `m = 1` is the random-walk/linear smoother, `m = 2` the cubic smoother,
515    /// `m = 3` the quintic smoother.
516    pub order: usize,
517    /// Distinct sorted abscissae (pooled knots).
518    pub knots: Vec<f64>,
519    /// Smoothed posterior mean of `f` at each knot.
520    pub mean: Vec<f64>,
521    /// Smoothed posterior mean of `f′` at each knot (`0` for order `m = 1`,
522    /// which carries no derivative state).
523    pub deriv: Vec<f64>,
524    /// Posterior variance of `f` at each knot (scaled by `sigma2`).
525    pub var: Vec<f64>,
526    /// Selected (or supplied) log smoothing parameter `log λ`.
527    pub log_lambda: f64,
528    /// Profiled (or supplied) observation variance σ².
529    pub sigma2: f64,
530    /// Concentrated diffuse restricted log-likelihood at the optimum, up to a
531    /// λ- and data-independent additive constant. Differences across λ are
532    /// exact REML criterion differences.
533    pub restricted_loglik: f64,
534    /// Raw observation count `n` (pre-pooling; ties collapse to fewer knots).
535    /// The profiled σ² used the restricted residual d.o.f. `n − order`, so this
536    /// is exactly the count needed to recover the Gaussian deviance
537    /// (`σ²·(n − order)`) and the residual d.o.f. for introspection (#1046).
538    pub n_obs: usize,
539    /// Smoothed full states `(f, f′)` per knot.
540    smoothed_state: Vec<Vec2>,
541    /// Smoothed full state covariances per knot (unit-σ² scale).
542    smoothed_cov: Vec<Mat2>,
543    /// RTS backward gains `G_t` (lag-one cross-covariance is `G_t · P^s_{t+1}`).
544    rts_gain: Vec<Mat2>,
545    /// q = 1/λ used by the pass (unit-σ² scale).
546    q: f64,
547    /// Pooled observation weight per knot (sum of tied raw weights).
548    node_weight: Vec<f64>,
549}
550
551/// Pool tied abscissae and validate inputs. Returns nodes plus the within-tie
552/// weighted residual sum and the raw observation count.
553fn pool_nodes(
554    x: &[f64],
555    y: &[f64],
556    w: &[f64],
557    order: usize,
558) -> Result<(Vec<PooledNode>, f64, usize), String> {
559    let n = x.len();
560    if y.len() != n || w.len() != n {
561        return Err(format!(
562            "spline scan: length mismatch x={n}, y={}, w={}",
563            y.len(),
564            w.len()
565        ));
566    }
567    for i in 0..n {
568        if !(x[i].is_finite() && y[i].is_finite() && w[i].is_finite() && w[i] > 0.0) {
569            return Err(format!(
570                "spline scan: non-finite or non-positive input at row {i} (x={}, y={}, w={})",
571                x[i], y[i], w[i]
572            ));
573        }
574    }
575    let mut perm: Vec<usize> = (0..n).collect();
576    perm.sort_by(|&i, &j| x[i].total_cmp(&x[j]));
577    let mut nodes: Vec<PooledNode> = Vec::new();
578    for &i in &perm {
579        match nodes.last_mut() {
580            Some(last) if last.x == x[i] => {
581                let w_new = last.w + w[i];
582                last.y = (last.y * last.w + y[i] * w[i]) / w_new;
583                last.w = w_new;
584            }
585            _ => nodes.push(PooledNode {
586                x: x[i],
587                y: y[i],
588                w: w[i],
589            }),
590        }
591    }
592    // Need the `order` diffuse dimensions plus at least one proper innovation.
593    if nodes.len() < order + 1 {
594        return Err(format!(
595            "spline scan: order {order} needs at least {} distinct abscissae, got {}",
596            order + 1,
597            nodes.len()
598        ));
599    }
600    // Within-tie residual sum Σ w_i (y_i − ȳ_group)², part of the profiled σ².
601    let mut ssr_within = 0.0;
602    let mut k = 0usize;
603    for &i in &perm {
604        while nodes[k].x != x[i] {
605            k += 1;
606        }
607        let d = y[i] - nodes[k].y;
608        ssr_within += w[i] * d * d;
609    }
610    Ok((nodes, ssr_within, n))
611}
612
613/// Concentrated diffuse restricted log-likelihood at `log λ` (σ² profiled).
614fn concentrated_criterion(
615    nodes: &[PooledNode],
616    ssr_within: f64,
617    n_obs: usize,
618    log_lambda: f64,
619    order: usize,
620) -> Result<f64, String> {
621    let pass = run_filter(nodes, (-log_lambda).exp(), order)?;
622    // Profiled σ̂² over the proper innovations plus within-tie residuals;
623    // the restricted degrees of freedom subtract the diffuse dimension `order`.
624    let dof = (n_obs - order) as f64;
625    let rss = pass.sum_v2_over_f + ssr_within;
626    if rss <= 0.0 {
627        return Err("spline scan: degenerate zero residual sum".to_string());
628    }
629    let sigma2 = rss / dof;
630    if pass.n_proper != nodes.len() - order {
631        return Err(format!(
632            "spline scan: expected {} proper innovations, got {} (diffuse rank not consumed)",
633            nodes.len() - order,
634            pass.n_proper
635        ));
636    }
637    Ok(-0.5 * (pass.sum_log_f + dof * sigma2.ln()))
638}
639
640/// Exact diffuse smoother for the `order−1` partially-diffuse leading nodes
641/// (#1044 — the multi-node generalization of the `m = 2` reverse-Markov
642/// closure).
643///
644/// Ordinary RTS recovers every node `t ≥ order−1` (where the filtered
645/// distribution is proper). The first `order−1` nodes are partially diffuse:
646/// their filtered covariance still carries unresolved diffuse mass, so RTS —
647/// which needs the predicted covariance `P_{t+1|t}` to be invertible — cannot
648/// reach them. By the Markov property the leading block depends on all future
649/// data ONLY through the first proper smoothed node `α_{order−1}`:
650///
651///   p(α_{0..order−2} | y) = ∫ p(α_{0..order−2} | α_{order−1}, y_{0..order−2})
652///                             · p(α_{order−1} | y) dα_{order−1}.
653///
654/// The inner conditional is a proper Gaussian: it is the flat (improper)
655/// leading prior tightened by the Markov increments `(α_{t+1} − Fα_t)ᵀ(qQ)⁻¹(·)`
656/// and the leading observations `w_t (y_t − f_t)²`, with `α_{order−1}` entering
657/// linearly through the last increment. Writing `u = (α_0, …, α_{order−2})`,
658///
659///   u | α_{order−1} ~ N(C·α_{order−1} + d,  Σ),   Σ = Λ⁻¹,
660///   Λ  = increments(F'(qQ)⁻¹F …) + leading obs,
661///   d  = Σ·b_const,   C = Σ·B   (B = the pinned-node coupling F'(qQ)⁻¹),
662///
663/// and pushing the smoothed `α_{order−1} ~ N(α̂_p, V_p)` through the affine map
664/// gives the EXACT smoothed leading block, its covariances, and the lag-one
665/// cross-covariances `Cov(α_j, α_{j+1} | y)` the bridge `predict` needs:
666///
667///   mean(u) = C·α̂_p + d,   Cov(u) = C V_p Cᵀ + Σ,   Cov(u, α_p) = C V_p.
668///
669/// This is exact Gaussian conditioning — no diffuse RTS recursion, no
670/// sign-convention-laden `r/N` adjoint. At `order = 2` (one leading node) it is
671/// algebraically the existing single-node closure.
672fn leading_block_smooth(
673    sm_state: &mut [Vec2],
674    sm_cov: &mut [Mat2],
675    gains: &mut [Mat2],
676    nodes: &[PooledNode],
677    q: f64,
678    order: usize,
679) -> Result<(), String> {
680    let nb = order - 1; // leading nodes 0..nb-1 (the partially-diffuse ones)
681    let pin = order - 1; // first proper smoothed node (conditioning anchor)
682    let d = nb * order; // joint dimension of the leading block
683    let mut lambda = vec![vec![0.0_f64; d]; d];
684    let mut b_const = vec![0.0_f64; d];
685    let mut bmat = vec![vec![0.0_f64; order]; d]; // coupling to the pinned node
686
687    // Markov increments t = 0..order-2, each connecting node t and node t+1.
688    for t in 0..order - 1 {
689        let delta = nodes[t + 1].x - nodes[t].x;
690        let f = transition(delta, order);
691        let qn = process_noise(delta, q, order);
692        let a = mat_inv(&qn, order, "leading-block increment noise")?; // (qQ)⁻¹ (symmetric)
693        let ft = mat_t(&f, order);
694        let fta = mat_mul(&ft, &a, order); // F'A
695        let ftaf = mat_mul(&fta, &f, order); // F'A F
696        let af = mat_mul(&a, &f, order); // A F = (F'A)'
697        // Node t diagonal block (node t is always in the block): += F'A F.
698        for i in 0..order {
699            for j in 0..order {
700                lambda[t * order + i][t * order + j] += ftaf[i][j];
701            }
702        }
703        if t + 1 <= nb - 1 {
704            // Both nodes are in the block: fill node t+1's diagonal and the
705            // symmetric cross blocks.
706            for i in 0..order {
707                for j in 0..order {
708                    lambda[(t + 1) * order + i][(t + 1) * order + j] += a[i][j];
709                    lambda[t * order + i][(t + 1) * order + j] -= fta[i][j];
710                    lambda[(t + 1) * order + i][t * order + j] -= af[i][j];
711                }
712            }
713        } else {
714            // t+1 is the pinned node: it enters the conditional only linearly,
715            // through B (its coupling into node t's score is F'A·α_pin).
716            for i in 0..order {
717                for j in 0..order {
718                    bmat[t * order + i][j] += fta[i][j];
719                }
720            }
721        }
722    }
723    // Leading observations: y_t informs the f-component (local index 0) of node t.
724    for t in 0..nb {
725        let w = nodes[t].w;
726        lambda[t * order][t * order] += w;
727        b_const[t * order] += w * nodes[t].y;
728    }
729
730    // Conditional covariance Σ = Λ⁻¹, intercept d = Σ·b_const, coupling C = Σ·B.
731    let sigma = dense_spd_inverse(&lambda, "leading-block precision")?;
732    let dvec: Vec<f64> = (0..d)
733        .map(|i| (0..d).map(|k| sigma[i][k] * b_const[k]).sum())
734        .collect();
735    let cmat: Vec<Vec<f64>> = (0..d)
736        .map(|i| {
737            (0..order)
738                .map(|j| (0..d).map(|k| sigma[i][k] * bmat[k][j]).sum())
739                .collect()
740        })
741        .collect();
742
743    // Pinned smoothed moments (from the ordinary RTS pass).
744    let ahat_p = sm_state[pin];
745    let vp = sm_cov[pin];
746    // cvp = C·V_p  (= Cov(u, α_pin)), D×order.
747    let cvp: Vec<Vec<f64>> = (0..d)
748        .map(|i| {
749            (0..order)
750                .map(|j| (0..order).map(|k| cmat[i][k] * vp[k][j]).sum())
751                .collect()
752        })
753        .collect();
754    // mean(u) = C·α̂_p + d.
755    let mean_u: Vec<f64> = (0..d)
756        .map(|i| (0..order).map(|j| cmat[i][j] * ahat_p[j]).sum::<f64>() + dvec[i])
757        .collect();
758    // Cov(u) = cvp·Cᵀ + Σ.
759    let cov_u: Vec<Vec<f64>> = (0..d)
760        .map(|i| {
761            (0..d)
762                .map(|k| (0..order).map(|j| cvp[i][j] * cmat[k][j]).sum::<f64>() + sigma[i][k])
763                .collect()
764        })
765        .collect();
766
767    // Scatter the smoothed leading states and covariances.
768    for j in 0..nb {
769        for i in 0..order {
770            sm_state[j][i] = mean_u[j * order + i];
771        }
772        let mut cov = [[0.0_f64; MAX_ORDER]; MAX_ORDER];
773        for i in 0..order {
774            for k in 0..order {
775                cov[i][k] = cov_u[j * order + i][j * order + k];
776            }
777        }
778        symmetrize(&mut cov, order);
779        sm_cov[j] = cov;
780    }
781    // Lag-one bridge gains for the leading intervals [j, j+1], j = 0..order-2.
782    // gain_j = Cov(α_j, α_{j+1} | y) · Cov(α_{j+1} | y)⁻¹, so that the bridge's
783    // `gain_j · P^s_{j+1}` reproduces the exact lag-one smoothed cross-cov.
784    for j in 0..nb {
785        let mut cross = [[0.0_f64; MAX_ORDER]; MAX_ORDER];
786        if j + 1 <= nb - 1 {
787            // Both in the block: read the (j, j+1) sub-block of Cov(u).
788            for i in 0..order {
789                for k in 0..order {
790                    cross[i][k] = cov_u[j * order + i][(j + 1) * order + k];
791                }
792            }
793        } else {
794            // j+1 is the pinned node: read node j's rows of Cov(u, α_pin) = cvp.
795            for i in 0..order {
796                for k in 0..order {
797                    cross[i][k] = cvp[j * order + i][k];
798                }
799            }
800        }
801        let denom_inv = mat_inv(&sm_cov[j + 1], order, "leading-block gain denominator")?;
802        gains[j] = mat_mul(&cross, &denom_inv, order);
803    }
804    Ok(())
805}
806
807/// Fit at a FIXED `log λ` and order `m ∈ {1, 2, 3}`, σ² either supplied or
808/// profiled.
809pub fn fit_spline_scan_at(
810    x: &[f64],
811    y: &[f64],
812    w: &[f64],
813    log_lambda: f64,
814    sigma2: Option<f64>,
815    order: usize,
816) -> Result<SplineScanFit, String> {
817    if order == 0 || order > MAX_ORDER {
818        return Err(format!(
819            "spline scan: order must be in 1..={MAX_ORDER}, got {order}"
820        ));
821    }
822    let (nodes, ssr_within, n_obs) = pool_nodes(x, y, w, order)?;
823    let q = (-log_lambda).exp();
824    let pass = run_filter(&nodes, q, order)?;
825    let n = nodes.len();
826    let dof = (n_obs - order) as f64;
827    let sigma2 = match sigma2 {
828        Some(s) => {
829            if !(s.is_finite() && s > 0.0) {
830                return Err(format!("spline scan: invalid sigma2 {s}"));
831            }
832            s
833        }
834        None => (pass.sum_v2_over_f + ssr_within) / dof,
835    };
836    // Full diffuse restricted log-likelihood at this (λ, σ²), up to λ- and
837    // σ-free additive constants: −½[Σ log F̃ + dof·ln σ² + RSS/σ²]. At the
838    // profiled σ̂² the quadratic term collapses to the λ-free constant `dof`,
839    // matching `concentrated_criterion` up to that constant.
840    let rss = pass.sum_v2_over_f + ssr_within;
841    let restricted_loglik = -0.5 * (pass.sum_log_f + dof * sigma2.ln() + rss / sigma2);
842
843    // ── Smoother: ordinary RTS for the proper nodes (t ≥ order−1) plus an
844    // exact diffuse conditioning of the `order−1` leading nodes. ──
845    // The filtered distribution is fully proper from node order−1 onward (the
846    // diffuse rank, = order, is consumed by node order−1), so ordinary RTS is
847    // valid for t ≥ order−1. The first order−1 nodes are partially diffuse —
848    // their filtered covariance still carries unresolved diffuse mass and the
849    // RTS predicted-covariance inverse is singular there — and are recovered
850    // exactly, jointly, by `leading_block_smooth` (conditioning the whole
851    // leading block on the first proper smoothed node). For order = 1 there is
852    // no leading node and RTS covers every node down to t = 0.
853    let mut sm_state = vec![[0.0_f64; MAX_ORDER]; n];
854    let mut sm_cov = vec![[[0.0_f64; MAX_ORDER]; MAX_ORDER]; n];
855    let mut gains = vec![[[0.0_f64; MAX_ORDER]; MAX_ORDER]; n];
856    sm_state[n - 1] = pass.steps[n - 1].a_filt;
857    sm_cov[n - 1] = pass.steps[n - 1].p_filt;
858    for t in (order - 1..n - 1).rev() {
859        let p_next_pred = &pass.steps[t + 1].p_pred;
860        let delta = nodes[t + 1].x - nodes[t].x;
861        let f_t = transition(delta, order);
862        let p_inv = mat_inv(p_next_pred, order, "RTS predicted covariance")?;
863        let g = mat_mul(
864            &mat_mul(&pass.steps[t].p_filt, &mat_t(&f_t, order), order),
865            &p_inv,
866            order,
867        );
868        let mut dm: Vec2 = [0.0; MAX_ORDER];
869        for i in 0..order {
870            dm[i] = sm_state[t + 1][i] - pass.steps[t + 1].a_pred[i];
871        }
872        let corr = mat_vec(&g, &dm, order);
873        for i in 0..order {
874            sm_state[t][i] = pass.steps[t].a_filt[i] + corr[i];
875        }
876        let dp = mat_sub(&sm_cov[t + 1], p_next_pred, order);
877        let mut cov = mat_add(
878            &pass.steps[t].p_filt,
879            &mat_mul(&mat_mul(&g, &dp, order), &mat_t(&g, order), order),
880            order,
881        );
882        symmetrize(&mut cov, order);
883        sm_cov[t] = cov;
884        gains[t] = g;
885    }
886    // The order−1 partially-diffuse leading nodes by exact joint conditioning
887    // (the multi-node generalization of the m=2 reverse-Markov closure).
888    if order >= 2 {
889        leading_block_smooth(&mut sm_state, &mut sm_cov, &mut gains, &nodes, q, order)?;
890    }
891
892    let knots: Vec<f64> = nodes.iter().map(|n| n.x).collect();
893    let mean: Vec<f64> = sm_state.iter().map(|s| s[0]).collect();
894    // f′ lives at state index 1 — present for order ≥ 2, structurally 0 at m = 1.
895    let deriv: Vec<f64> = sm_state
896        .iter()
897        .map(|s| if order >= 2 { s[1] } else { 0.0 })
898        .collect();
899    let var: Vec<f64> = sm_cov.iter().map(|p| p[0][0] * sigma2).collect();
900    Ok(SplineScanFit {
901        order,
902        knots,
903        mean,
904        deriv,
905        var,
906        log_lambda,
907        sigma2,
908        restricted_loglik,
909        n_obs,
910        smoothed_state: sm_state,
911        smoothed_cov: sm_cov,
912        rts_gain: gains,
913        q,
914        node_weight: nodes.iter().map(|n| n.w).collect(),
915    })
916}
917
918/// Fit with `log λ` selected by the concentrated diffuse REML criterion:
919/// deterministic coarse grid then golden-section refinement (no RNG, no
920/// iteration-count sensitivity — same data ⇒ same fit).
921pub fn fit_spline_scan(
922    x: &[f64],
923    y: &[f64],
924    w: &[f64],
925    order: usize,
926) -> Result<SplineScanFit, String> {
927    if order == 0 || order > MAX_ORDER {
928        return Err(format!(
929            "spline scan: order must be in 1..={MAX_ORDER}, got {order}"
930        ));
931    }
932    let (nodes, ssr_within, n_obs) = pool_nodes(x, y, w, order)?;
933    // Covariate-rescaling equivariance (#1214). The order-`m` IWP process noise
934    // is `Q(δ) ∝ q · δ^{2m−1}`, so under an affine covariate rescale `x → a·x`
935    // (all abscissa gaps `δ → a·δ`) the posterior `f(x)` is *exactly* invariant
936    // iff the smoothing parameter co-transforms as `q → q / a^{2m−1}`, i.e.
937    // `log λ → log λ + (2m−1)·log a` (λ = 1/q). The whole smoother — criterion,
938    // fit, and the Gaussian-bridge `predict` — runs self-consistently in the raw
939    // covariate units, so the *only* place covariate scale leaks in is this
940    // outer `log λ` search: a fixed absolute bracket `[LOG_LAMBDA_LO,
941    // LOG_LAMBDA_HI]` does not track the data span, so at small/large covariate
942    // scale the equivariant optimum rails out of the bracket and the fit drifts.
943    // Anchor the bracket to the data's own length scale: search `log λ` around
944    // `(2m−1)·log L` where `L` is the abscissa span (which scales linearly with
945    // the covariate), so the search is performed in scale-free units and the
946    // selected `q · L^{2m−1}` — hence the posterior `f(x)` — is invariant.
947    let span = nodes.last().map(|n| n.x).unwrap_or(0.0) - nodes.first().map(|n| n.x).unwrap_or(0.0);
948    let scale_shift = if span.is_finite() && span > 0.0 {
949        (2 * order - 1) as f64 * span.ln()
950    } else {
951        0.0
952    };
953    let lo_anchor = LOG_LAMBDA_LO + scale_shift;
954    let hi_anchor = LOG_LAMBDA_HI + scale_shift;
955    let crit = |ll: f64| concentrated_criterion(&nodes, ssr_within, n_obs, ll, order);
956    let mut best_i = 0usize;
957    let mut best_v = f64::NEG_INFINITY;
958    let step = (hi_anchor - lo_anchor) / (LOG_LAMBDA_GRID - 1) as f64;
959    for i in 0..LOG_LAMBDA_GRID {
960        let ll = lo_anchor + step * i as f64;
961        let v = crit(ll)?;
962        if v > best_v {
963            best_v = v;
964            best_i = i;
965        }
966    }
967    let mut lo = lo_anchor + step * best_i.saturating_sub(1) as f64;
968    let mut hi = (lo_anchor + step * (best_i + 1) as f64).min(hi_anchor);
969    // Golden-section maximization on [lo, hi].
970    let inv_phi = 0.618_033_988_749_894_9_f64;
971    let mut x1 = hi - inv_phi * (hi - lo);
972    let mut x2 = lo + inv_phi * (hi - lo);
973    let mut f1 = crit(x1)?;
974    let mut f2 = crit(x2)?;
975    while hi - lo > LOG_LAMBDA_TOL {
976        if f1 < f2 {
977            lo = x1;
978            x1 = x2;
979            f1 = f2;
980            x2 = lo + inv_phi * (hi - lo);
981            f2 = crit(x2)?;
982        } else {
983            hi = x2;
984            x2 = x1;
985            f2 = f1;
986            x1 = hi - inv_phi * (hi - lo);
987            f1 = crit(x1)?;
988        }
989    }
990    fit_spline_scan_at(x, y, w, 0.5 * (lo + hi), None, order)
991}
992
993/// Lossless serializable snapshot of a [`SplineScanFit`] (#1034).
994///
995/// Carries exactly the smoother state the Gaussian-bridge `predict` replays:
996/// pooled knots, smoothed `(f, f′, …, f^{(m−1)})` states (`m` per knot),
997/// smoothed state covariances (unit-σ² scale, symmetric — stored as the
998/// upper triangle row-major, `m(m+1)/2` per knot), RTS backward gains (full
999/// `m×m` row-major — gains are NOT symmetric), pooled node weights, and the
1000/// three fit scalars. `q = e^{−log λ}` and the public `mean`/`deriv`/`var`
1001/// views are derived on restore rather than stored, so a snapshot cannot go
1002/// internally inconsistent. The layouts are order-derived; at the historical
1003/// cubic `m = 2` they are exactly the original `[f, f′]` / `[c00, c01, c11]` /
1004/// `[g00, g01, g10, g11]` triples, so pre-order-generality snapshots restore
1005/// unchanged.
1006#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1007pub struct SplineScanState {
1008    /// Smoothing-spline order `m ∈ {1, 2, 3}` (`#[serde(default)]` → reads as
1009    /// the historical cubic `m = 2` for snapshots written before order
1010    /// generality).
1011    #[serde(default = "default_spline_scan_order")]
1012    pub order: usize,
1013    pub knots: Vec<f64>,
1014    /// Smoothed `(f, f′, …, f^{(m−1)})` per knot, row-major (`m` per knot).
1015    pub state: Vec<f64>,
1016    /// Smoothed covariance per knot at unit-σ² scale, upper triangle row-major
1017    /// (`m(m+1)/2` per knot): `[c00, c01, …, c0,m−1, c11, …, c_{m−1,m−1}]`.
1018    pub cov: Vec<f64>,
1019    /// RTS backward gain per knot, full `m×m` row-major (`m²` per knot); the
1020    /// last knot's gain is structurally unused and stored as written.
1021    pub gain: Vec<f64>,
1022    /// Pooled (tied-abscissa summed) observation weight per knot.
1023    pub node_weight: Vec<f64>,
1024    pub log_lambda: f64,
1025    pub sigma2: f64,
1026    pub restricted_loglik: f64,
1027    /// Raw observation count `n` (#1046).
1028    pub n_obs: u64,
1029}
1030
1031/// Serde default for [`SplineScanState::order`]: historical snapshots predate
1032/// order generality and are cubic (`m = 2`).
1033fn default_spline_scan_order() -> usize {
1034    2
1035}
1036
1037impl SplineScanFit {
1038    /// Snapshot the full smoother state for persistence (#1034).
1039    pub fn to_state(&self) -> SplineScanState {
1040        let order = self.order;
1041        let tri = order * (order + 1) / 2;
1042        let nk = self.knots.len();
1043        let mut state = Vec::with_capacity(order * nk);
1044        for s in &self.smoothed_state {
1045            state.extend_from_slice(&s[..order]);
1046        }
1047        let mut cov = Vec::with_capacity(tri * nk);
1048        for c in &self.smoothed_cov {
1049            for i in 0..order {
1050                for j in i..order {
1051                    cov.push(c[i][j]);
1052                }
1053            }
1054        }
1055        let mut gain = Vec::with_capacity(order * order * nk);
1056        for g in &self.rts_gain {
1057            for i in 0..order {
1058                for j in 0..order {
1059                    gain.push(g[i][j]);
1060                }
1061            }
1062        }
1063        SplineScanState {
1064            order: self.order,
1065            knots: self.knots.clone(),
1066            state,
1067            cov,
1068            gain,
1069            node_weight: self.node_weight.clone(),
1070            log_lambda: self.log_lambda,
1071            sigma2: self.sigma2,
1072            restricted_loglik: self.restricted_loglik,
1073            n_obs: self.n_obs as u64,
1074        }
1075    }
1076
1077    /// Rebuild the exact in-memory fit from a persisted snapshot (#1034).
1078    ///
1079    /// Validates shape, finiteness, strict knot ordering, positive weights and
1080    /// σ², so a corrupt payload fails loudly here instead of inside a later
1081    /// `predict`. The restored fit replays the Gaussian bridge bit-for-bit:
1082    /// every field `predict`/`edf`/`deriv_at_knot` reads is either stored
1083    /// verbatim or derived by the same expressions the fitter uses.
1084    pub fn from_state(state: &SplineScanState) -> Result<Self, String> {
1085        let order = state.order;
1086        if order == 0 || order > MAX_ORDER {
1087            return Err(format!(
1088                "spline scan state: order must be in 1..={MAX_ORDER}, got {order}"
1089            ));
1090        }
1091        let m = state.knots.len();
1092        if m < order + 1 {
1093            return Err(format!(
1094                "spline scan state: order {order} needs at least {} knots, got {m}",
1095                order + 1
1096            ));
1097        }
1098        let tri = order * (order + 1) / 2;
1099        if state.state.len() != order * m
1100            || state.cov.len() != tri * m
1101            || state.gain.len() != order * order * m
1102            || state.node_weight.len() != m
1103        {
1104            return Err(format!(
1105                "spline scan state: inconsistent lengths (order={order}, m={m}, state={}, cov={}, gain={}, weights={})",
1106                state.state.len(),
1107                state.cov.len(),
1108                state.gain.len(),
1109                state.node_weight.len()
1110            ));
1111        }
1112        let all = state
1113            .state
1114            .iter()
1115            .chain(&state.cov)
1116            .chain(&state.gain)
1117            .chain(&state.knots)
1118            .chain(&state.node_weight);
1119        for (i, v) in all.enumerate() {
1120            if !v.is_finite() {
1121                return Err(format!("spline scan state: non-finite entry at {i}"));
1122            }
1123        }
1124        if !(state.log_lambda.is_finite()
1125            && state.restricted_loglik.is_finite()
1126            && state.sigma2.is_finite()
1127            && state.sigma2 > 0.0)
1128        {
1129            return Err(format!(
1130                "spline scan state: invalid scalars (log_lambda={}, sigma2={}, restricted_loglik={})",
1131                state.log_lambda, state.sigma2, state.restricted_loglik
1132            ));
1133        }
1134        if state.knots.windows(2).any(|kk| !(kk[0] < kk[1])) {
1135            return Err("spline scan state: knots must be strictly increasing".to_string());
1136        }
1137        if state.node_weight.iter().any(|&w| w <= 0.0) {
1138            return Err("spline scan state: node weights must be positive".to_string());
1139        }
1140        let smoothed_state: Vec<Vec2> = state
1141            .state
1142            .chunks_exact(order)
1143            .map(|s| {
1144                let mut v = [0.0_f64; MAX_ORDER];
1145                v[..order].copy_from_slice(s);
1146                v
1147            })
1148            .collect();
1149        let smoothed_cov: Vec<Mat2> = state
1150            .cov
1151            .chunks_exact(tri)
1152            .map(|c| {
1153                let mut mm = [[0.0_f64; MAX_ORDER]; MAX_ORDER];
1154                let mut idx = 0;
1155                for i in 0..order {
1156                    for j in i..order {
1157                        mm[i][j] = c[idx];
1158                        mm[j][i] = c[idx];
1159                        idx += 1;
1160                    }
1161                }
1162                mm
1163            })
1164            .collect();
1165        let rts_gain: Vec<Mat2> = state
1166            .gain
1167            .chunks_exact(order * order)
1168            .map(|g| {
1169                let mut mm = [[0.0_f64; MAX_ORDER]; MAX_ORDER];
1170                for i in 0..order {
1171                    for j in 0..order {
1172                        mm[i][j] = g[i * order + j];
1173                    }
1174                }
1175                mm
1176            })
1177            .collect();
1178        let sigma2 = state.sigma2;
1179        if state.n_obs == 0 {
1180            return Err("spline scan state: n_obs must be positive".to_string());
1181        }
1182        let n_obs = state.n_obs as usize;
1183        Ok(Self {
1184            order,
1185            knots: state.knots.clone(),
1186            mean: smoothed_state.iter().map(|s| s[0]).collect(),
1187            deriv: smoothed_state.iter().map(|s| s[1]).collect(),
1188            var: smoothed_cov.iter().map(|c| c[0][0] * sigma2).collect(),
1189            log_lambda: state.log_lambda,
1190            sigma2,
1191            restricted_loglik: state.restricted_loglik,
1192            n_obs,
1193            smoothed_state,
1194            smoothed_cov,
1195            rts_gain,
1196            q: (-state.log_lambda).exp(),
1197            node_weight: state.node_weight.clone(),
1198        })
1199    }
1200
1201    /// Exact posterior `(mean, variance)` of `f` at an arbitrary abscissa.
1202    ///
1203    /// Interior points use the Gaussian bridge conditional on the two flanking
1204    /// smoothed states with the exact lag-one smoothed cross-covariance
1205    /// `Cov(α_t, α_{t+1} | y) = G_t · P^s_{t+1}`; exterior points extrapolate
1206    /// from the boundary state (linear mean, cubically growing variance).
1207    pub fn predict(&self, x_new: f64) -> Result<(f64, f64), String> {
1208        if !x_new.is_finite() {
1209            return Err("spline scan: non-finite prediction abscissa".to_string());
1210        }
1211        let n = self.knots.len();
1212        let order = self.order;
1213        let first = self.knots[0];
1214        let last = self.knots[n - 1];
1215        if x_new <= first {
1216            let delta = first - x_new;
1217            // Backward extrapolation through the reverse map α(x) = F⁻¹(α₁ − η).
1218            let f_t = transition(delta, order);
1219            let f_inv = mat_inv(&f_t, order, "backward extrapolation transition")?;
1220            let mean_s = mat_vec(&f_inv, &self.smoothed_state[0], order);
1221            let qm = process_noise(delta, self.q, order);
1222            let cov = mat_add(
1223                &mat_mul(
1224                    &mat_mul(&f_inv, &self.smoothed_cov[0], order),
1225                    &mat_t(&f_inv, order),
1226                    order,
1227                ),
1228                &mat_mul(&mat_mul(&f_inv, &qm, order), &mat_t(&f_inv, order), order),
1229                order,
1230            );
1231            return Ok((mean_s[0], cov[0][0] * self.sigma2));
1232        }
1233        if x_new >= last {
1234            let delta = x_new - last;
1235            let f_t = transition(delta, order);
1236            let mean_s = mat_vec(&f_t, &self.smoothed_state[n - 1], order);
1237            let cov = mat_add(
1238                &mat_mul(
1239                    &mat_mul(&f_t, &self.smoothed_cov[n - 1], order),
1240                    &mat_t(&f_t, order),
1241                    order,
1242                ),
1243                &process_noise(delta, self.q, order),
1244                order,
1245            );
1246            return Ok((mean_s[0], cov[0][0] * self.sigma2));
1247        }
1248        // Flanking knot interval via binary search.
1249        let t = match self.knots.binary_search_by(|k| k.total_cmp(&x_new)) {
1250            Ok(idx) => return Ok((self.mean[idx], self.var[idx])),
1251            Err(idx) => idx - 1,
1252        };
1253        let (xa, xb) = (self.knots[t], self.knots[t + 1]);
1254        let (d1, d2) = (x_new - xa, xb - x_new);
1255        let (f1m, f2m) = (transition(d1, order), transition(d2, order));
1256        let (q1, q2) = (
1257            process_noise(d1, self.q, order),
1258            process_noise(d2, self.q, order),
1259        );
1260        let q1_inv = mat_inv(&q1, order, "bridge left noise")?;
1261        let q2_inv = mat_inv(&q2, order, "bridge right noise")?;
1262        // p(α* | α_t, α_{t+1}) ∝ N(α*; F₁α_t, Q₁)·N(α_{t+1}; F₂α*, Q₂):
1263        //   Λ = Q₁⁻¹ + F₂ᵀQ₂⁻¹F₂,  mean = Λ⁻¹(Q₁⁻¹F₁ α_t + F₂ᵀQ₂⁻¹ α_{t+1}).
1264        let lambda = mat_add(
1265            &q1_inv,
1266            &mat_mul(&mat_mul(&mat_t(&f2m, order), &q2_inv, order), &f2m, order),
1267            order,
1268        );
1269        let lam_inv = mat_inv(&lambda, order, "bridge precision")?;
1270        let ca = mat_mul(&lam_inv, &mat_mul(&q1_inv, &f1m, order), order);
1271        let cb = mat_mul(
1272            &lam_inv,
1273            &mat_mul(&mat_t(&f2m, order), &q2_inv, order),
1274            order,
1275        );
1276        let ma = mat_vec(&ca, &self.smoothed_state[t], order);
1277        let mb = mat_vec(&cb, &self.smoothed_state[t + 1], order);
1278        let mut mean_s = [0.0_f64; MAX_ORDER];
1279        for i in 0..order {
1280            mean_s[i] = ma[i] + mb[i];
1281        }
1282        // Push the joint smoothed covariance of (α_t, α_{t+1}) through the
1283        // affine map: cross term uses Cov(α_t, α_{t+1}|y) = G_t · P^s_{t+1}.
1284        let cross = mat_mul(&self.rts_gain[t], &self.smoothed_cov[t + 1], order);
1285        let mut cov = mat_add(
1286            &mat_add(
1287                &mat_mul(
1288                    &mat_mul(&ca, &self.smoothed_cov[t], order),
1289                    &mat_t(&ca, order),
1290                    order,
1291                ),
1292                &mat_mul(
1293                    &mat_mul(&cb, &self.smoothed_cov[t + 1], order),
1294                    &mat_t(&cb, order),
1295                    order,
1296                ),
1297                order,
1298            ),
1299            &lam_inv,
1300            order,
1301        );
1302        let cab = mat_mul(&mat_mul(&ca, &cross, order), &mat_t(&cb, order), order);
1303        cov = mat_add(&cov, &mat_add(&cab, &mat_t(&cab, order), order), order);
1304        symmetrize(&mut cov, order);
1305        Ok((mean_s[0], cov[0][0] * self.sigma2))
1306    }
1307
1308    /// Exact effective degrees of freedom of the fitted smoother.
1309    ///
1310    /// For a Gaussian smoother the influence (hat) matrix is
1311    /// `S = Cov_post · W / σ²` (posterior mean is linear in `y` with that
1312    /// exact coefficient matrix), so
1313    /// `EDF = tr(S) = tr(W · Cov_post) / σ² = Σ_t w_t · Var_smoothed(f_t) / σ²`.
1314    /// This is the standard Gaussian-process identity — no second smoother
1315    /// pass and no approximation. Tied abscissae pool exactly: each raw row
1316    /// `i` in tie-group `k` contributes `∂f̂(x_k)/∂y_i = C̃_kk · w_i` (the
1317    /// pooled mean `ȳ_k` is precision-weighted), so the raw-row trace
1318    /// `Σ_i w_i · C̃_{k(i),k(i)}` collapses to `Σ_k W_k · C̃_kk` with the
1319    /// pooled weights `W_k`. `smoothed_cov` is stored at unit-σ² scale
1320    /// (`C̃ = Cov_post / σ²`), so the σ² factors cancel exactly.
1321    pub fn edf(&self) -> f64 {
1322        self.node_weight
1323            .iter()
1324            .zip(self.smoothed_cov.iter())
1325            .map(|(w, c)| w * c[0][0])
1326            .sum()
1327    }
1328
1329    /// Posterior `(mean, variance)` of the derivative `f′` at a knot index.
1330    pub fn deriv_at_knot(&self, t: usize) -> (f64, f64) {
1331        (
1332            self.smoothed_state[t][1],
1333            self.smoothed_cov[t][1][1] * self.sigma2,
1334        )
1335    }
1336
1337    /// Selected smoothing parameter `λ = e^{log λ}` (#1046).
1338    pub fn lambda(&self) -> f64 {
1339        self.log_lambda.exp()
1340    }
1341
1342    /// Raw observation count `n` used to profile σ² (#1046).
1343    pub fn n_obs(&self) -> usize {
1344        self.n_obs
1345    }
1346
1347    /// Gaussian deviance — the weighted residual sum of squares `Σ wᵢ(yᵢ − f̂ᵢ)²`
1348    /// (#1046). The profiled `σ² = RSS / (n − order)` (the restricted residual
1349    /// d.o.f.), so `RSS = σ²·(n − order)` recovers the deviance exactly without
1350    /// re-touching the raw rows the scan deliberately does not retain.
1351    pub fn deviance(&self) -> f64 {
1352        self.sigma2 * (self.n_obs as f64 - self.order as f64).max(0.0)
1353    }
1354}
1355
1356#[cfg(test)]
1357mod tests {
1358    use super::*;
1359
1360    /// #1034 persistence seam: snapshot → JSON → restore must replay the
1361    /// Gaussian bridge bit-for-bit — knot posteriors, off-knot bridge,
1362    /// boundary extrapolation, EDF, and derivative posteriors all compare
1363    /// with exact equality, because every replayed field is either stored
1364    /// verbatim or derived by the fitter's own expressions. Parameterized over
1365    /// the smoothing order so the order-derived state/cov/gain layouts
1366    /// (#1044: m=3 stores 3-wide state, 6-wide upper-tri cov, 9-wide gain) are
1367    /// each round-tripped.
1368    fn round_trip_predict_bit_for_bit(order: usize) {
1369        let n = 60usize;
1370        let x: Vec<f64> = (0..n).map(|i| (i as f64) / (n as f64 - 1.0)).collect();
1371        // Deterministic wiggly response with a tie pair to exercise pooling.
1372        let mut x = x;
1373        x[7] = x[6];
1374        let y: Vec<f64> = x
1375            .iter()
1376            .enumerate()
1377            .map(|(i, &xi)| {
1378                (6.0 * xi).sin() + 0.3 * (17.0 * xi).cos() + 0.05 * ((i * 37 % 11) as f64 - 5.0)
1379            })
1380            .collect();
1381        let w: Vec<f64> = (0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)).collect();
1382        let fit = fit_spline_scan(&x, &y, &w, order).expect("scan fit");
1383        assert_eq!(fit.order, order);
1384        // The raw count is retained verbatim (n rows, one tie pair collapses a
1385        // knot but not the count) and drives the recovered deviance (#1046).
1386        assert_eq!(fit.n_obs, n);
1387
1388        let json = serde_json::to_string(&fit.to_state()).expect("serialize state");
1389        let state: SplineScanState = serde_json::from_str(&json).expect("deserialize state");
1390        let restored = SplineScanFit::from_state(&state).expect("restore fit");
1391
1392        assert_eq!(fit.n_obs, restored.n_obs);
1393        assert_eq!(fit.deviance().to_bits(), restored.deviance().to_bits());
1394        assert_eq!(fit.knots, restored.knots);
1395        assert_eq!(fit.mean, restored.mean);
1396        assert_eq!(fit.var, restored.var);
1397        assert_eq!(fit.deriv, restored.deriv);
1398        assert_eq!(fit.log_lambda.to_bits(), restored.log_lambda.to_bits());
1399        assert_eq!(fit.sigma2.to_bits(), restored.sigma2.to_bits());
1400        assert_eq!(fit.edf().to_bits(), restored.edf().to_bits());
1401        for t in 0..fit.knots.len() {
1402            let (d0, v0) = fit.deriv_at_knot(t);
1403            let (d1, v1) = restored.deriv_at_knot(t);
1404            assert_eq!(d0.to_bits(), d1.to_bits());
1405            assert_eq!(v0.to_bits(), v1.to_bits());
1406        }
1407        // Off-knot bridge, exact knot hit, and both extrapolation sides.
1408        for &xq in &[-0.2, 0.0, 0.013, 0.5, x[6], 0.987, 1.0, 1.3] {
1409            let (m0, v0) = fit.predict(xq).expect("predict original");
1410            let (m1, v1) = restored.predict(xq).expect("predict restored");
1411            assert_eq!(
1412                m0.to_bits(),
1413                m1.to_bits(),
1414                "mean drift at x={xq} (m={order})"
1415            );
1416            assert_eq!(
1417                v0.to_bits(),
1418                v1.to_bits(),
1419                "variance drift at x={xq} (m={order})"
1420            );
1421        }
1422
1423        // Corrupt payloads fail loudly, not inside a later predict.
1424        let mut bad = fit.to_state();
1425        bad.cov.truncate(bad.cov.len() - 1);
1426        SplineScanFit::from_state(&bad).expect_err("length mismatch must error");
1427        let mut bad = fit.to_state();
1428        bad.sigma2 = -1.0;
1429        SplineScanFit::from_state(&bad).expect_err("non-positive sigma2 must error");
1430        let mut bad = fit.to_state();
1431        bad.knots[2] = bad.knots[1];
1432        SplineScanFit::from_state(&bad).expect_err("non-increasing knots must error");
1433    }
1434
1435    #[test]
1436    fn state_snapshot_round_trips_predict_bit_for_bit() {
1437        round_trip_predict_bit_for_bit(2);
1438    }
1439
1440    /// #1044: the order-1 and order-3 layouts round-trip bit-for-bit too.
1441    #[test]
1442    fn state_snapshot_round_trips_predict_bit_for_bit_order1() {
1443        round_trip_predict_bit_for_bit(1);
1444    }
1445
1446    #[test]
1447    fn state_snapshot_round_trips_predict_bit_for_bit_order3() {
1448        round_trip_predict_bit_for_bit(3);
1449    }
1450
1451    /// Dense order-1 (random-walk / linear smoothing spline) posterior of the
1452    /// SAME intrinsic prior the order-1 scan integrates: improper level on
1453    /// `f_0`, increments `f_{t+1}−f_t ~ N(0, q·δ_t)`, observations `y_t` with
1454    /// precision `w_t` (unit σ²). Solve the tridiagonal precision densely and
1455    /// compare to the scan — the exact-equivalence gate for the new m=1 path.
1456    fn dense_rw_truth(x: &[f64], y: &[f64], w: &[f64], log_lambda: f64) -> (Vec<f64>, Vec<f64>) {
1457        let n = x.len();
1458        let q = (-log_lambda).exp();
1459        let mut prec = vec![vec![0.0_f64; n]; n];
1460        let mut rhs = vec![0.0_f64; n];
1461        for t in 0..n {
1462            prec[t][t] += w[t];
1463            rhs[t] += w[t] * y[t];
1464        }
1465        for t in 0..n - 1 {
1466            let p = 1.0 / (q * (x[t + 1] - x[t]));
1467            prec[t][t] += p;
1468            prec[t + 1][t + 1] += p;
1469            prec[t][t + 1] -= p;
1470            prec[t + 1][t] -= p;
1471        }
1472        // Dense inverse via Gauss-Jordan (small n in the test).
1473        let mut aug = prec.clone();
1474        let mut inv = vec![vec![0.0_f64; n]; n];
1475        for i in 0..n {
1476            inv[i][i] = 1.0;
1477        }
1478        for col in 0..n {
1479            let piv = (col..n)
1480                .max_by(|&a, &b| aug[a][col].abs().total_cmp(&aug[b][col].abs()))
1481                .unwrap();
1482            aug.swap(col, piv);
1483            inv.swap(col, piv);
1484            let d = aug[col][col];
1485            for k in 0..n {
1486                aug[col][k] /= d;
1487                inv[col][k] /= d;
1488            }
1489            for r in 0..n {
1490                if r == col {
1491                    continue;
1492                }
1493                let f = aug[r][col];
1494                if f == 0.0 {
1495                    continue;
1496                }
1497                for k in 0..n {
1498                    aug[r][k] -= f * aug[col][k];
1499                    inv[r][k] -= f * inv[col][k];
1500                }
1501            }
1502        }
1503        let mean: Vec<f64> = (0..n)
1504            .map(|i| (0..n).map(|j| inv[i][j] * rhs[j]).sum())
1505            .collect();
1506        let var: Vec<f64> = (0..n).map(|i| inv[i][i]).collect();
1507        (mean, var)
1508    }
1509
1510    /// The order-1 scan must reproduce the dense random-walk posterior exactly
1511    /// (mean, pointwise variance, and the EDF identity tr(S)=Σ w_t·Var_t/σ²) at
1512    /// the scan's own selected λ — the #1034-item-2 correctness gate.
1513    #[test]
1514    fn order_one_scan_matches_dense_random_walk_posterior() {
1515        let n = 30usize;
1516        let x: Vec<f64> = (0..n).map(|i| i as f64 / (n as f64 - 1.0)).collect();
1517        let y: Vec<f64> = x
1518            .iter()
1519            .enumerate()
1520            .map(|(i, &xi)| 2.0 * xi + 0.4 * (5.0 * xi).sin() + 0.05 * ((i * 13 % 7) as f64 - 3.0))
1521            .collect();
1522        let w = vec![1.0_f64; n];
1523        let fit = fit_spline_scan(&x, &y, &w, 1).expect("order-1 scan fit");
1524        assert_eq!(fit.order, 1);
1525
1526        let (mean, var) = dense_rw_truth(&x, &y, &w, fit.log_lambda);
1527        for t in 0..n {
1528            assert!(
1529                (fit.mean[t] - mean[t]).abs() <= 1e-7 * mean[t].abs().max(1e-3),
1530                "order-1 mean mismatch at {t}: scan={} dense={}",
1531                fit.mean[t],
1532                mean[t]
1533            );
1534            let se_scan = fit.var[t].sqrt();
1535            let se_dense = (var[t] * fit.sigma2).sqrt();
1536            assert!(
1537                (se_scan - se_dense).abs() <= 1e-7 * se_dense.max(1e-12),
1538                "order-1 SE mismatch at {t}: scan={se_scan} dense={se_dense}"
1539            );
1540        }
1541        // EDF identity against the dense posterior variance diagonal.
1542        let dense_edf: f64 = w.iter().zip(var.iter()).map(|(wt, vt)| wt * vt).sum();
1543        assert!(
1544            (fit.edf() - dense_edf).abs() <= 1e-7 * dense_edf.max(1e-12),
1545            "order-1 EDF mismatch: scan={} dense={dense_edf}",
1546            fit.edf()
1547        );
1548        // Order-1 derivative state is structurally absent.
1549        assert!(fit.deriv.iter().all(|&d| d == 0.0));
1550    }
1551}