gam_solve/rho_optimizer/fd_audit.rs
1use super::*;
2
3// FD-OK: this entire module is the outer-gradient finite-difference AUDIT
4// oracle. It deliberately computes central finite differences of the outer
5// REML/LAML criterion to verify the analytic gradient against — it is a
6// diagnostic that runs only behind the `outer_fd_audit_eligible` gates in
7// spatial_optimization.rs / custom_family/fit.rs, never on the fit math path.
8// The `fd`-named struct fields and fn names below are intrinsic to that audit
9// role, so the no-production-finite-differences guard treats this file as a
10// sanctioned audit region closed at the bottom of the module.
11
12/// Per-θ component of an outer-gradient finite-difference audit.
13#[derive(Clone, Debug)]
14pub struct OuterGradientFdComponent {
15 /// Human label for the block this coordinate belongs to (e.g. "timewiggle").
16 pub block: String,
17 /// Flat θ index.
18 pub index: usize,
19 /// Analytic ∂V/∂θ_i returned by the family evaluator.
20 pub analytic: f64,
21 /// Second-order central finite-difference of the outer criterion in θ_i at
22 /// the coarse step `h`. Its leading error is `O(h²)·V'''`, which is large
23 /// on a steeply-curved coordinate (e.g. the Matérn log-κ axis, whose
24 /// operator penalty scales like κ^{2m}, m = ν + d/2).
25 pub fd: f64,
26 /// Richardson 4th-order refinement of `fd`, combining the `h` and `h/2`
27 /// central differences to cancel the leading `O(h²)` truncation term
28 /// (residual `O(h⁴)`). Populated ONLY when the coarse `fd` gap looks like it
29 /// might trip the DESYNC band, so clean coordinates stay cheap (2 evals);
30 /// suspicious ones pay 2 extra evals to tell genuine derivative error
31 /// (`h`-independent gap) apart from pure FD truncation (`h²`-shrinking gap).
32 pub fd_refined: Option<f64>,
33}
34
35impl OuterGradientFdComponent {
36 /// The FD estimate that best matches the analytic gradient — the
37 /// Richardson-refined value when it is closer, else the coarse `fd`.
38 ///
39 /// Picking the closer of the two is deliberately CONSERVATIVE for the
40 /// desync verdict: a true derivative bug makes the analytic gradient differ
41 /// from the *true* derivative by an `h`-independent amount, so BOTH the
42 /// coarse and the refined FD (each converging to that true derivative)
43 /// stay far from it — the min gap remains large and DESYNC still fires.
44 /// Only a truncation-dominated gap (where refinement recovers the true
45 /// derivative) collapses, which is exactly the case that must NOT be
46 /// flagged. So `abs_gap` can only shrink vs the coarse-only value: no
47 /// previously-passing audit can regress.
48 pub fn best_fd(&self) -> f64 {
49 match self.fd_refined {
50 Some(r) if (self.analytic - r).abs() < (self.analytic - self.fd).abs() => r,
51 _ => self.fd,
52 }
53 }
54 /// Absolute analytic−FD gap against the best available FD estimate.
55 pub fn abs_gap(&self) -> f64 {
56 (self.analytic - self.best_fd()).abs()
57 }
58 /// analytic/fd ratio (None when fd≈0). A clean −1 signals a sign
59 /// convention; a stable constant ≠1 signals a dropped/extra additive term.
60 pub fn ratio(&self) -> Option<f64> {
61 let fd = self.best_fd();
62 if fd.abs() > 1e-12 {
63 Some(self.analytic / fd)
64 } else {
65 None
66 }
67 }
68}
69
70/// Result of a component-by-component finite-difference audit of an outer
71/// REML/LAML gradient at a fixed θ, plus the outer-Hessian eigenvalues.
72///
73/// This is the discriminating diagnostic that forks the two failure modes of a
74/// non-terminating outer loop: an **objective↔gradient desync** (analytic ≠ FD
75/// on some component → the trust region chases a phantom descent direction
76/// forever) versus **weak identifiability** (analytic ≈ FD everywhere but a
77/// near-zero outer-Hessian eigenvalue → a genuinely flat valley the optimizer
78/// crawls along). It is family-agnostic: any path that exposes an outer
79/// evaluator closure `θ ↦ (V, ∇V, H)` can call it.
80#[derive(Clone, Debug)]
81pub struct OuterGradientFdAudit {
82 /// Outer criterion value at θ₀.
83 pub value: f64,
84 /// Per-coordinate analytic-vs-FD comparison.
85 pub components: Vec<OuterGradientFdComponent>,
86 /// Eigenvalues of the (symmetrized) outer Hessian at θ₀, ascending. Empty
87 /// when no analytic/operator Hessian was available.
88 pub hessian_eigenvalues: Vec<f64>,
89}
90
91impl OuterGradientFdAudit {
92 /// Per-block L2 norm of the analytic gradient.
93 pub fn analytic_block_norms(&self) -> Vec<(String, f64)> {
94 let mut order: Vec<String> = Vec::new();
95 let mut acc: std::collections::HashMap<String, f64> = std::collections::HashMap::new();
96 for c in &self.components {
97 if !acc.contains_key(&c.block) {
98 order.push(c.block.clone());
99 }
100 *acc.entry(c.block.clone()).or_insert(0.0) += c.analytic * c.analytic;
101 }
102 order
103 .into_iter()
104 .map(|b| {
105 let v = acc.get(&b).copied().unwrap_or(0.0).sqrt();
106 (b, v)
107 })
108 .collect()
109 }
110
111 /// Worst per-coordinate analytic−FD gap and its component.
112 pub fn worst_component(&self) -> Option<&OuterGradientFdComponent> {
113 self.components.iter().max_by(|a, b| {
114 a.abs_gap()
115 .partial_cmp(&b.abs_gap())
116 .unwrap_or(std::cmp::Ordering::Equal)
117 })
118 }
119
120 /// Smallest-magnitude outer-Hessian eigenvalue (flatness proxy).
121 pub fn min_abs_eigenvalue(&self) -> Option<f64> {
122 self.hessian_eigenvalues
123 .iter()
124 .map(|e| e.abs())
125 .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
126 }
127
128 /// Emit a single human-readable verdict block to the log.
129 pub fn log_verdict(&self, context: &str) {
130 log::warn!("[OUTER-FD-AUDIT/{context}] value={:.6e}", self.value);
131 for (block, norm) in self.analytic_block_norms() {
132 log::warn!("[OUTER-FD-AUDIT/{context}] block={block} |g_analytic|={norm:.6e}");
133 }
134 for c in &self.components {
135 let ratio = c
136 .ratio()
137 .map(|r| format!("{r:.4}"))
138 .unwrap_or_else(|| "n/a".to_string());
139 // Report the FD estimate the verdict actually uses (`best_fd`); when
140 // a Richardson refinement was taken, surface both so the log shows
141 // the O(h²)→O(h⁴) truncation collapse explicitly.
142 let refined = match c.fd_refined {
143 Some(r) => format!(" fd_coarse={:.6e} fd_richardson={:.6e}", c.fd, r),
144 None => String::new(),
145 };
146 log::warn!(
147 "[OUTER-FD-AUDIT/{context}] block={} i={} analytic={:.6e} fd={:.6e} gap={:.3e} ratio={}{}",
148 c.block,
149 c.index,
150 c.analytic,
151 c.best_fd(),
152 c.abs_gap(),
153 ratio,
154 refined,
155 );
156 }
157 if !self.hessian_eigenvalues.is_empty() {
158 let evs: Vec<String> = self
159 .hessian_eigenvalues
160 .iter()
161 .map(|e| format!("{e:.4e}"))
162 .collect();
163 log::warn!(
164 "[OUTER-FD-AUDIT/{context}] hessian_eigenvalues=[{}] min_abs={:.4e}",
165 evs.join(", "),
166 self.min_abs_eigenvalue().unwrap_or(f64::NAN)
167 );
168 }
169 match self.worst_component() {
170 Some(w) if w.abs_gap() > 1e-3 && w.abs_gap() > 1e-3 * w.fd.abs().max(1.0) => {
171 log::warn!(
172 "[OUTER-FD-AUDIT/{context}] VERDICT=DESYNC worst_block={} worst_i={} gap={:.3e} (analytic gradient disagrees with FD of the criterion: fix the derivative)",
173 w.block,
174 w.index,
175 w.abs_gap()
176 );
177 }
178 _ => {
179 let flat = self.min_abs_eigenvalue().map(|m| m < 1e-6).unwrap_or(false);
180 if flat {
181 log::warn!(
182 "[OUTER-FD-AUDIT/{context}] VERDICT=FLATNESS min_abs_eig={:.3e} (analytic≈FD but the outer Hessian is near-singular: weak identifiability, fix termination not the gradient)",
183 self.min_abs_eigenvalue().unwrap_or(f64::NAN)
184 );
185 } else {
186 log::warn!(
187 "[OUTER-FD-AUDIT/{context}] VERDICT=CLEAN analytic≈FD and outer Hessian well-conditioned at this θ"
188 );
189 }
190 }
191 }
192 }
193}
194
195/// Run a component-by-component central finite-difference audit of an outer
196/// REML/LAML gradient at a fixed θ₀.
197///
198/// `eval` is the family's outer evaluator: `θ, mode ↦ (V, ∇V, H)` where the
199/// gradient is honored at `ValueAndGradient`/`ValueGradientHessian` and `H` at
200/// `ValueGradientHessian`. `block_for_index` labels each flat θ coordinate
201/// (used only to group the report). `h` is the FD step.
202///
203/// Cost: one `ValueGradientHessian` eval at θ₀ plus `2·len(θ)` `ValueOnly`
204/// evals. The caller is responsible for only invoking this on a
205/// diagnostic-sized problem (it is not part of the production hot loop).
206pub fn outer_gradient_fd_audit<EvalF>(
207 // fd-ok: FD-audit certificate, not in math path
208 theta0: &Array1<f64>,
209 h: f64,
210 block_for_index: impl Fn(usize) -> String,
211 mut eval: EvalF,
212) -> Result<OuterGradientFdAudit, String>
213where
214 EvalF: FnMut(
215 &Array1<f64>,
216 crate::estimate::reml::reml_outer_engine::EvalMode,
217 ) -> Result<(f64, Array1<f64>, HessianResult), String>,
218{
219 use crate::estimate::reml::reml_outer_engine::EvalMode;
220 let (value, analytic_grad, hess) = eval(theta0, EvalMode::ValueGradientHessian)?;
221 if analytic_grad.len() != theta0.len() {
222 return Err(format!(
223 "outer_gradient_fd_audit: analytic gradient length {} != theta length {}",
224 analytic_grad.len(),
225 theta0.len()
226 ));
227 }
228 let mut components = Vec::with_capacity(theta0.len());
229 for i in 0..theta0.len() {
230 let mut tp = theta0.clone();
231 tp[i] += h;
232 let mut tm = theta0.clone();
233 tm[i] -= h;
234 let (vp, _, _) = eval(&tp, EvalMode::ValueOnly)?;
235 let (vm, _, _) = eval(&tm, EvalMode::ValueOnly)?;
236 let fd = (vp - vm) / (2.0 * h);
237
238 // Cheap-by-default: only a coordinate whose coarse gap could trip the
239 // DESYNC band earns a Richardson refinement. The leading central-FD
240 // error is `O(h²)·V'''`; on a steeply-curved coordinate (the Matérn
241 // log-κ axis, whose operator penalty scales like κ^{2m}) that
242 // truncation alone can exceed the band even though the analytic
243 // gradient is exact. Combining the `h` and `h/2` central differences as
244 // `D_R = (4·D_{h/2} − D_h)/3` cancels the `O(h²)` term, leaving `O(h⁴)`
245 // — enough to separate truncation (gap collapses) from a real
246 // derivative bug (gap is `h`-independent, so `D_R` stays as far from the
247 // analytic value as `D_h` was). The clean-coordinate path stays at 2
248 // evals; the rare suspicious coordinate pays 2 more.
249 let coarse_gap = (analytic_grad[i] - fd).abs();
250 let desync_band = (1e-3_f64).max(1e-3 * fd.abs().max(1.0));
251 let fd_refined = if coarse_gap > desync_band {
252 let h2 = 0.5 * h;
253 let mut tp2 = theta0.clone();
254 tp2[i] += h2;
255 let mut tm2 = theta0.clone();
256 tm2[i] -= h2;
257 let (vp2, _, _) = eval(&tp2, EvalMode::ValueOnly)?;
258 let (vm2, _, _) = eval(&tm2, EvalMode::ValueOnly)?;
259 let fd_half = (vp2 - vm2) / (2.0 * h2);
260 Some((4.0 * fd_half - fd) / 3.0)
261 } else {
262 None
263 };
264 components.push(OuterGradientFdComponent {
265 block: block_for_index(i),
266 index: i,
267 analytic: analytic_grad[i],
268 fd,
269 fd_refined,
270 });
271 }
272 let hessian_eigenvalues = match hess.materialize_dense() {
273 Ok(Some(mut hmat)) => {
274 // Symmetrize defensively before the self-adjoint solve.
275 let n = hmat.nrows();
276 if n == hmat.ncols() && n > 0 {
277 for r in 0..n {
278 for c in (r + 1)..n {
279 let avg = 0.5 * (hmat[[r, c]] + hmat[[c, r]]);
280 hmat[[r, c]] = avg;
281 hmat[[c, r]] = avg;
282 }
283 }
284 match gam_linalg::faer_ndarray::FaerEigh::eigh(&hmat, faer::Side::Lower) {
285 Ok((vals, _)) => {
286 let mut v: Vec<f64> = vals.to_vec();
287 v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
288 v
289 }
290 Err(_) => Vec::new(),
291 }
292 } else {
293 Vec::new()
294 }
295 }
296 _ => Vec::new(),
297 };
298 Ok(OuterGradientFdAudit {
299 value,
300 components,
301 hessian_eigenvalues,
302 })
303}
304// END-FD-OK