gam_solve/rho_prior_eval.rs
1//! Shared evaluation of the configured deterministic smoothing-parameter (ρ)
2//! prior objective.
3//!
4//! The cost / gradient / Hessian of the configured [`RhoPrior`] is the same
5//! math whether it is consumed by the custom-family outer objective or by the
6//! REML/LAML runtime; only the *invalid-prior policy* differs between callers.
7//! Custom-family handling surfaces malformed priors as hard errors, while the
8//! REML runtime folds them into the objective as `+inf` cost (and `NaN`
9//! gradient/Hessian) so the outer optimizer steps away from the offending
10//! region. This module owns the single source of truth for the prior math and
11//! lets each caller pick its policy explicitly via [`InvalidPriorPolicy`].
12//!
13//! For a single coordinate `r` (a log-precision, `λ = exp(r)`):
14//!
15//! * `Flat` — cost 0, grad 0, Hessian 0.
16//! * `Normal{mean,sd}`— with `inv_var = 1 / sd²`,
17//! cost `0.5 · (r − mean)² · inv_var`, grad `(r − mean) · inv_var`,
18//! Hessian `inv_var`.
19//! * `GammaPrecision{shape,rate}` — Gamma(shape, rate) hyperprior on the
20//! precision `λ = exp(r)`, using the REML/LAML MAP-in-lambda convention and
21//! contributing (up to an additive constant) cost `rate · λ − (shape − 1) · r`,
22//! grad `rate · λ − (shape − 1)`, Hessian `rate · λ`. Samplers over `r`
23//! include the `+r` Jacobian and therefore use the transformed density instead.
24//! * `Independent` — one prior per coordinate; the same per-coordinate math
25//! summed/stacked, with nested `Independent` priors rejected as invalid.
26//!
27//! The Hessian is diagonal and is returned as `Some` only when at least one
28//! diagonal entry is non-zero; an all-zero curvature contribution collapses to
29//! `None` so callers can skip adding an empty block.
30
31use gam_spec::RhoPrior;
32use ndarray::{Array1, Array2};
33
34/// Calibrated exponential rate `θ = −ln(tail_prob) / upper` of a
35/// penalized-complexity prior, from the tail statement `P(d > upper) =
36/// tail_prob` on the distance scale `d = exp(-ρ/2)`. Caller must validate the
37/// hyperparameters first; with `0 < tail_prob < 1` and `upper > 0` the result
38/// is finite and strictly positive.
39pub(crate) fn pc_prior_rate(upper: f64, tail_prob: f64) -> f64 {
40 -tail_prob.ln() / upper
41}
42
43/// Negative-log penalized-complexity prior contribution and its first/second
44/// derivatives in the log-precision `r`, for calibrated rate `θ`. The objective
45/// is minimized, so this returns the cost (up to the ρ-independent additive
46/// constant `−ln(θ/2)`), gradient and Hessian:
47///
48/// ```text
49/// cost = r/2 + θ exp(-r/2), grad = 1/2 − (θ/2) exp(-r/2), hess = (θ/4) exp(-r/2).
50/// ```
51///
52/// The curvature is strictly positive (`θ > 0`), so the contribution is convex
53/// and always supplies usable outer-Hessian information.
54pub(crate) fn pc_prior_terms(theta: f64, r: f64) -> (f64, f64, f64) {
55 let e = (-0.5 * r).exp();
56 (0.5 * r + theta * e, 0.5 - 0.5 * theta * e, 0.25 * theta * e)
57}
58
59/// Self-gated, *one-sided* penalized-complexity barrier used as the firth-general
60/// DEFAULT outer ρ prior on smoothing coordinates the caller left unset. It walls
61/// off the `λ → 0` / `ρ → −∞` under-smoothing degeneracy WITHOUT perturbing a
62/// well-identified `λ` — restoring the strict zero-downside guarantee (a clean
63/// fit stays byte-identical to plain REML), exactly the way the Jeffreys
64/// conditioning gate returns a byte-identical-zero contribution on a clean fit.
65///
66/// MOTIVATION. The plain PC prior contributes `cost = ρ/2 + θ e^{-ρ/2}` whose
67/// gradient `1/2 − (θ/2) e^{-ρ/2}` tends to the CONSTANT `+1/2` as `ρ → +∞`. That
68/// residual `O(1)` Occam pull shifts the REML optimum of EVERY identified `λ` by
69/// `Δρ ≈ (1/2)/H_reml = O(1/n)` — small, but never byte-zero, so it is a
70/// (tiny) downside on every fit including well-conditioned ones. The firth
71/// default exists only to remove the `λ → 0` degeneracy, not to bias an
72/// identified `λ`, so the default should be EXACTLY flat on the identified side.
73///
74/// SHAPE. Work on the distance scale `b(ρ) = e^{-ρ/2}` (the marginal-SD scale of
75/// the penalized component; `b → ∞` is the under-smoothing degeneracy). Gate at
76/// `ρ_gate` with `b_gate = e^{-ρ_gate/2}`:
77/// * `ρ ≥ ρ_gate` (identified side, distance `b ≤ b_gate`): cost = grad = hess
78/// = 0, BYTE-IDENTICALLY (a clean fit pays nothing — strict zero-downside).
79/// * `ρ < ρ_gate` (degenerate side, distance `b > b_gate`): a convex barrier
80/// that is C¹ at the gate,
81/// cost = θ [ b(ρ) − b_gate − (1/2) b_gate (ρ_gate − ρ) ],
82/// grad = θ [ −(1/2) b(ρ) + (1/2) b_gate ] (= 0 at the gate, < 0 below),
83/// hess = (θ/4) b(ρ) (> 0, always usable curvature).
84/// The gradient is negative below the gate (pushing `ρ` UP, away from the
85/// `λ → 0` wall) and decays continuously to zero AT the gate, so there is no
86/// persistent pull on the identified side.
87///
88/// GATE CHOICE. `ρ_gate = −2 ln(upper)`, i.e. the barrier engages only once the
89/// marginal-SD distance `b = e^{-ρ/2}` exceeds the SAME interpretable `upper`
90/// bound that calibrates `θ` (`P(b > upper) = tail_prob`). Any `λ` whose
91/// distance scale is within the plausible identified range `b ≤ upper` is left
92/// exactly untouched; the wall only asserts itself in the genuinely
93/// under-smoothed tail the prior was introduced to bound.
94pub(crate) fn firth_default_barrier_terms(theta: f64, upper: f64, r: f64) -> (f64, f64, f64) {
95 let rho_gate = -2.0 * upper.ln();
96 if r >= rho_gate {
97 // Identified side: byte-identically flat (strict zero-downside).
98 return (0.0, 0.0, 0.0);
99 }
100 let b = (-0.5 * r).exp();
101 let b_gate = (-0.5 * rho_gate).exp(); // == upper, but kept symbolic for clarity.
102 let cost = theta * (b - b_gate - 0.5 * b_gate * (rho_gate - r));
103 let grad = theta * (-0.5 * b + 0.5 * b_gate);
104 let hess = 0.25 * theta * b;
105 (cost, grad, hess)
106}
107
108/// What a caller wants done when the configured prior is malformed (e.g. a
109/// `Normal` with non-positive `sd`, a `GammaPrecision` with non-positive
110/// `shape`, an `Independent` whose length disagrees with `ρ`, or a nested
111/// `Independent`).
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum InvalidPriorPolicy {
114 /// Return a descriptive [`RhoPriorError`]. Used by custom-family handling,
115 /// which validates configuration up front and reports problems precisely.
116 HardError,
117 /// Saturate the contribution so the outer optimizer is repelled: cost is
118 /// `+inf` and gradient/Hessian entries are `NaN`. Used by the REML/LAML
119 /// runtime, where the prior is one additive term inside a search.
120 Saturate,
121}
122
123/// Structured malformed-prior diagnostic produced under
124/// [`InvalidPriorPolicy::HardError`].
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum RhoPriorError {
127 /// A length / shape disagreement (e.g. an `Independent` prior whose number
128 /// of coordinates does not match `ρ`).
129 DimensionMismatch { reason: String },
130 /// A structurally invalid prior, such as a nested `Independent` where a
131 /// scalar prior is required.
132 ConstraintViolation { reason: String },
133}
134
135impl RhoPriorError {
136 pub(crate) fn dimension_mismatch(reason: String) -> Self {
137 RhoPriorError::DimensionMismatch { reason }
138 }
139
140 pub(crate) fn constraint_violation(reason: String) -> Self {
141 RhoPriorError::ConstraintViolation { reason }
142 }
143}
144
145/// Cost, gradient, and (optional) diagonal Hessian of the configured ρ prior.
146#[derive(Debug, Clone)]
147pub struct RhoPriorEval {
148 pub cost: f64,
149 pub gradient: Array1<f64>,
150 /// Diagonal Hessian, `Some` only when at least one entry is non-zero.
151 pub hessian: Option<Array2<f64>>,
152}
153
154/// Per-coordinate scalar contribution `(cost, grad, hess)` of one *scalar*
155/// prior at `r`, or a structured error when the scalar prior is malformed.
156/// Nested `Independent` priors are not scalar and are rejected here.
157pub(crate) fn scalar_terms(
158 prior: &RhoPrior,
159 r: f64,
160 context: &str,
161) -> Result<(f64, f64, f64), RhoPriorError> {
162 match prior {
163 RhoPrior::Flat => Ok((0.0, 0.0, 0.0)),
164 RhoPrior::Normal { mean, sd } => {
165 if !mean.is_finite() || !sd.is_finite() || *sd <= 0.0 {
166 return Err(RhoPriorError::constraint_violation(format!(
167 "{context} Normal log-precision prior requires finite mean and sd > 0"
168 )));
169 }
170 let inv_var = 1.0 / (*sd * *sd);
171 let delta = r - *mean;
172 Ok((0.5 * delta * delta * inv_var, delta * inv_var, inv_var))
173 }
174 RhoPrior::GammaPrecision { shape, rate } => {
175 if !shape.is_finite() || *shape <= 0.0 || !rate.is_finite() || *rate < 0.0 {
176 return Err(RhoPriorError::constraint_violation(format!(
177 "{context} Gamma precision prior requires shape > 0 and rate >= 0"
178 )));
179 }
180 let lambda = r.exp();
181 // Deterministic REML/LAML uses the MAP-in-lambda convention; rho samplers add the Jacobian.
182 Ok((
183 *rate * lambda - (*shape - 1.0) * r,
184 *rate * lambda - (*shape - 1.0),
185 *rate * lambda,
186 ))
187 }
188 RhoPrior::PenalizedComplexity { upper, tail_prob } => {
189 if !upper.is_finite() || *upper <= 0.0 {
190 return Err(RhoPriorError::constraint_violation(format!(
191 "{context} penalized-complexity prior requires a finite upper > 0"
192 )));
193 }
194 if !tail_prob.is_finite() || *tail_prob <= 0.0 || *tail_prob >= 1.0 {
195 return Err(RhoPriorError::constraint_violation(format!(
196 "{context} penalized-complexity prior requires tail probability in (0, 1)"
197 )));
198 }
199 let theta = pc_prior_rate(*upper, *tail_prob);
200 Ok(pc_prior_terms(theta, r))
201 }
202 RhoPrior::Independent(_) => Err(RhoPriorError::constraint_violation(format!(
203 "{context} must be a scalar rho prior, not a nested Independent prior"
204 ))),
205 }
206}
207
208/// Saturated contribution returned (under [`InvalidPriorPolicy::Saturate`])
209/// when the prior is malformed: `+inf` cost, `NaN` gradient, `NaN` Hessian.
210pub(crate) fn saturated(len: usize) -> RhoPriorEval {
211 RhoPriorEval {
212 cost: f64::INFINITY,
213 gradient: Array1::from_elem(len, f64::NAN),
214 hessian: Some(Array2::from_elem((len, len), f64::NAN)),
215 }
216}
217
218/// Evaluate the configured ρ prior under the given invalid-prior `policy`.
219///
220/// On success returns the prior `cost`, `gradient`, and diagonal `hessian`
221/// (the latter `Some` only when non-zero). Under [`InvalidPriorPolicy::Saturate`]
222/// a malformed prior yields the saturated `+inf`/`NaN` contribution and never
223/// errors; under [`InvalidPriorPolicy::HardError`] it returns the structured
224/// [`RhoPriorError`].
225pub fn evaluate(
226 prior: &RhoPrior,
227 rho: &Array1<f64>,
228 policy: InvalidPriorPolicy,
229) -> Result<RhoPriorEval, RhoPriorError> {
230 match evaluate_strict(prior, rho) {
231 Ok(eval) => Ok(eval),
232 Err(err) => match policy {
233 InvalidPriorPolicy::HardError => Err(err),
234 InvalidPriorPolicy::Saturate => Ok(saturated(rho.len())),
235 },
236 }
237}
238
239/// Strict evaluation: always errors on a malformed prior. Policy mapping lives
240/// in [`evaluate`].
241pub fn evaluate_strict(
242 prior: &RhoPrior,
243 rho: &Array1<f64>,
244) -> Result<RhoPriorEval, RhoPriorError> {
245 let len = rho.len();
246 match prior {
247 RhoPrior::Flat => Ok(RhoPriorEval {
248 cost: 0.0,
249 gradient: Array1::zeros(len),
250 hessian: None,
251 }),
252 RhoPrior::Normal { .. }
253 | RhoPrior::GammaPrecision { .. }
254 | RhoPrior::PenalizedComplexity { .. } => {
255 let mut cost = 0.0;
256 let mut gradient = Array1::<f64>::zeros(len);
257 let mut hessian = Array2::<f64>::zeros((len, len));
258 let mut any_hessian = false;
259 for (idx, &r) in rho.iter().enumerate() {
260 let (c, g, h) = scalar_terms(prior, r, "rho prior")?;
261 cost += c;
262 gradient[idx] = g;
263 hessian[[idx, idx]] = h;
264 any_hessian |= h != 0.0;
265 }
266 Ok(RhoPriorEval {
267 cost,
268 gradient,
269 hessian: any_hessian.then_some(hessian),
270 })
271 }
272 RhoPrior::Independent(priors) => {
273 if priors.len() != len {
274 return Err(RhoPriorError::dimension_mismatch(format!(
275 "Independent rho prior length mismatch: got {}, expected {}",
276 priors.len(),
277 len
278 )));
279 }
280 let mut cost = 0.0;
281 let mut gradient = Array1::<f64>::zeros(len);
282 let mut hessian = Array2::<f64>::zeros((len, len));
283 let mut any_hessian = false;
284 for (idx, (prior, &r)) in priors.iter().zip(rho.iter()).enumerate() {
285 let (c, g, h) = scalar_terms(prior, r, &format!("rho prior coordinate {idx}"))?;
286 cost += c;
287 gradient[idx] = g;
288 hessian[[idx, idx]] = h;
289 any_hessian |= h != 0.0;
290 }
291 Ok(RhoPriorEval {
292 cost,
293 gradient,
294 hessian: any_hessian.then_some(hessian),
295 })
296 }
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 pub(crate) fn approx(a: f64, b: f64) {
305 assert!((a - b).abs() <= 1e-12, "expected {a} ~= {b}");
306 }
307
308 /// The shared engine and the saturating policy must agree term-for-term on
309 /// every valid prior variant — this is the parity guarantee the two former
310 /// duplicate call sites relied on.
311 #[test]
312 pub(crate) fn cost_grad_hess_parity_across_valid_priors() {
313 let rho = Array1::from_vec(vec![-0.5, 0.25, 1.5, 0.7]);
314 let priors = vec![
315 RhoPrior::Flat,
316 RhoPrior::Normal { mean: 0.2, sd: 0.8 },
317 RhoPrior::GammaPrecision {
318 shape: 2.0,
319 rate: 0.5,
320 },
321 RhoPrior::PenalizedComplexity {
322 upper: 0.5,
323 tail_prob: 0.05,
324 },
325 RhoPrior::Independent(vec![
326 RhoPrior::Flat,
327 RhoPrior::Normal {
328 mean: -0.1,
329 sd: 1.3,
330 },
331 RhoPrior::GammaPrecision {
332 shape: 1.5,
333 rate: 0.0,
334 },
335 RhoPrior::PenalizedComplexity {
336 upper: 1.2,
337 tail_prob: 0.01,
338 },
339 ]),
340 ];
341 for prior in &priors {
342 // Both policies must produce identical results when the prior is
343 // valid: the policy only changes the malformed branch.
344 let hard = evaluate(prior, &rho, InvalidPriorPolicy::HardError)
345 .expect("valid prior must not error under HardError");
346 let sat =
347 evaluate(prior, &rho, InvalidPriorPolicy::Saturate).expect("Saturate never errors");
348 approx(hard.cost, sat.cost);
349 assert_eq!(hard.gradient, sat.gradient);
350 assert_eq!(hard.hessian, sat.hessian);
351
352 // Finite-difference check of gradient and (diagonal) Hessian. The
353 // gradient uses a small step; the Hessian needs a larger one
354 // because a central second difference amplifies roundoff like
355 // macheps/h² — the optimal step is ≈ macheps^¼ ≈ 1e-4.
356 let base = evaluate(prior, &rho, InvalidPriorPolicy::HardError).unwrap();
357 let cost_at = |k: usize, delta: f64| -> f64 {
358 let mut r = rho.clone();
359 r[k] += delta;
360 evaluate(prior, &r, InvalidPriorPolicy::HardError)
361 .unwrap()
362 .cost
363 };
364 let (h_grad, h_hess) = (1e-6, 1e-4);
365 for k in 0..rho.len() {
366 let fd_grad = (cost_at(k, h_grad) - cost_at(k, -h_grad)) / (2.0 * h_grad);
367 assert!(
368 (fd_grad - base.gradient[k]).abs() <= 1e-5,
369 "gradient mismatch at {k}: fd {fd_grad} vs {}",
370 base.gradient[k]
371 );
372 let fd_hess = (cost_at(k, h_hess) - 2.0 * base.cost + cost_at(k, -h_hess))
373 / (h_hess * h_hess);
374 let analytic_hess = base.hessian.as_ref().map_or(0.0, |h| h[[k, k]]);
375 assert!(
376 (fd_hess - analytic_hess).abs() <= 1e-4,
377 "hessian mismatch at {k}: fd {fd_hess} vs {analytic_hess}"
378 );
379 }
380 }
381 }
382
383 #[test]
384 pub(crate) fn invalid_prior_policy_branches() {
385 let rho = Array1::from_vec(vec![0.0, 0.0]);
386 let bad_normal = RhoPrior::Normal {
387 mean: 0.0,
388 sd: -1.0,
389 };
390 // HardError surfaces a structured error.
391 assert!(matches!(
392 evaluate(&bad_normal, &rho, InvalidPriorPolicy::HardError),
393 Err(RhoPriorError::ConstraintViolation { .. })
394 ));
395 // Saturate folds it into +inf / NaN.
396 let sat = evaluate(&bad_normal, &rho, InvalidPriorPolicy::Saturate).unwrap();
397 assert!(sat.cost.is_infinite() && sat.cost > 0.0);
398 assert!(sat.gradient.iter().all(|v| v.is_nan()));
399 assert!(sat.hessian.unwrap().iter().all(|v| v.is_nan()));
400
401 // Length-mismatched Independent.
402 let bad_len = RhoPrior::Independent(vec![RhoPrior::Flat]);
403 assert!(matches!(
404 evaluate(&bad_len, &rho, InvalidPriorPolicy::HardError),
405 Err(RhoPriorError::DimensionMismatch { .. })
406 ));
407 // Nested Independent.
408 let nested = RhoPrior::Independent(vec![
409 RhoPrior::Independent(vec![RhoPrior::Flat]),
410 RhoPrior::Flat,
411 ]);
412 assert!(matches!(
413 evaluate(&nested, &rho, InvalidPriorPolicy::HardError),
414 Err(RhoPriorError::ConstraintViolation { .. })
415 ));
416 }
417
418 // ---- Penalized-complexity prior ---------------------------------------
419
420 /// Normalized PC-prior log-density on ρ (includes the additive constant
421 /// `ln(θ/2)` the optimizer cost drops). `log p(ρ) = ln(θ/2) − ρ/2 − θ
422 /// exp(−ρ/2)`, the change-of-variables image of `d ~ Exp(θ)` under
423 /// `d = exp(−ρ/2)`.
424 pub(crate) fn pc_log_pdf(upper: f64, tail_prob: f64, r: f64) -> f64 {
425 let theta = pc_prior_rate(upper, tail_prob);
426 (0.5 * theta).ln() - 0.5 * r - theta * (-0.5 * r).exp()
427 }
428
429 #[test]
430 pub(crate) fn pc_rate_calibrates_to_tail_statement() {
431 // θ = −ln(α)/U solves P(d > U) = exp(−θU) = α exactly.
432 for &(upper, alpha) in &[(0.5_f64, 0.05_f64), (1.2, 0.01), (3.0, 0.25)] {
433 let theta = pc_prior_rate(upper, alpha);
434 let tail = (-theta * upper).exp();
435 assert!(
436 (tail - alpha).abs() < 1e-12,
437 "P(d>U)={tail} vs α={alpha} (U={upper})"
438 );
439 }
440 }
441
442 #[test]
443 pub(crate) fn pc_density_integrates_to_one_and_matches_tail() {
444 // Trapezoidal integration of the normalized ρ-density. d = exp(−ρ/2)
445 // ranges over (0, ∞); the density decays at both ends, so a wide grid
446 // captures essentially all the mass.
447 let upper = 0.5_f64;
448 let alpha = 0.05_f64;
449 let (lo, hi, n) = (-60.0_f64, 80.0_f64, 2_000_000usize);
450 let h = (hi - lo) / n as f64;
451 // ρ < −2 ln U ⇔ exp(−ρ/2) > U ⇔ d > U: the tail region.
452 let tail_boundary = -2.0 * upper.ln();
453 let mut total = 0.0;
454 let mut tail = 0.0;
455 for i in 0..=n {
456 let r = lo + i as f64 * h;
457 let w = if i == 0 || i == n { 0.5 } else { 1.0 };
458 let p = pc_log_pdf(upper, alpha, r).exp();
459 total += w * p;
460 if r <= tail_boundary {
461 tail += w * p;
462 }
463 }
464 total *= h;
465 tail *= h;
466 assert!((total - 1.0).abs() < 1e-4, "∫ p(ρ) dρ = {total}");
467 // P(d > U) recovered from the ρ-density must equal the calibration α.
468 assert!(
469 (tail - alpha).abs() < 1e-3,
470 "P(d>U) = {tail} vs α = {alpha}"
471 );
472 }
473
474 #[test]
475 pub(crate) fn pc_terms_are_negative_log_density_derivatives() {
476 // cost = −log p (up to the dropped constant); grad/hess are its ρ
477 // derivatives, cross-checked against a finite difference of pc_log_pdf.
478 let (upper, alpha) = (0.8_f64, 0.02_f64);
479 let theta = pc_prior_rate(upper, alpha);
480 // Small step for the first derivative; larger step for the second,
481 // whose central difference amplifies roundoff like macheps/h².
482 let (h1, h2) = (1e-6, 1e-4);
483 for &r in &[-2.0_f64, -0.3, 0.0, 1.7, 4.0] {
484 let (cost, grad, hess) = pc_prior_terms(theta, r);
485 // cost + log p(ρ) is the ρ-independent constant ln(θ/2).
486 approx(cost + pc_log_pdf(upper, alpha, r), (0.5 * theta).ln());
487 // grad = −d/dρ log p (FD on the log-density).
488 let dlp =
489 (pc_log_pdf(upper, alpha, r + h1) - pc_log_pdf(upper, alpha, r - h1)) / (2.0 * h1);
490 let neg_dlp = -dlp;
491 assert!(
492 (grad - neg_dlp).abs() < 1e-5,
493 "grad {grad} vs {neg_dlp} at r={r}"
494 );
495 // hess = −d²/dρ² log p (FD), and is strictly positive (convex).
496 let d2lp = (pc_log_pdf(upper, alpha, r + h2) - 2.0 * pc_log_pdf(upper, alpha, r)
497 + pc_log_pdf(upper, alpha, r - h2))
498 / (h2 * h2);
499 let neg_d2lp = -d2lp;
500 assert!(
501 (hess - neg_d2lp).abs() < 1e-4,
502 "hess {hess} vs {neg_d2lp} at r={r}"
503 );
504 assert!(hess > 0.0, "PC curvature must be positive, got {hess}");
505 }
506 }
507
508 #[test]
509 pub(crate) fn pc_prior_pulls_toward_simpler_model() {
510 // The simpler (base) model is more smoothing: larger ρ ⇒ larger
511 // precision λ = exp(ρ) ⇒ the penalized component collapses. The PC cost
512 // must therefore make *under*-smoothing (small ρ, wiggly) more expensive
513 // than over-smoothing of the same magnitude — an asymmetric, convex
514 // bowl whose gradient at the base side is bounded by 1/2.
515 let prior = RhoPrior::PenalizedComplexity {
516 upper: 1.0,
517 tail_prob: 0.05,
518 };
519 let cost = |r: f64| {
520 evaluate(
521 &prior,
522 &Array1::from_vec(vec![r]),
523 InvalidPriorPolicy::HardError,
524 )
525 .unwrap()
526 .cost
527 };
528 // Wiggly (ρ = −4) is penalized far more than smooth (ρ = +4).
529 assert!(
530 cost(-4.0) > cost(4.0),
531 "under-smoothing must cost more: {} vs {}",
532 cost(-4.0),
533 cost(4.0)
534 );
535 // As ρ → +∞ the cost grows only linearly (slope 1/2): a gentle pull, not
536 // a wall — the data can still buy complexity.
537 let g_far = evaluate(
538 &prior,
539 &Array1::from_vec(vec![25.0]),
540 InvalidPriorPolicy::HardError,
541 )
542 .unwrap()
543 .gradient[0];
544 assert!(
545 (g_far - 0.5).abs() < 1e-3,
546 "far over-smoothing slope {g_far}"
547 );
548 }
549
550 #[test]
551 pub(crate) fn pc_prior_rejects_invalid_hyperparameters() {
552 let rho = Array1::from_vec(vec![0.0]);
553 for bad in [
554 RhoPrior::PenalizedComplexity {
555 upper: 0.0,
556 tail_prob: 0.05,
557 },
558 RhoPrior::PenalizedComplexity {
559 upper: -1.0,
560 tail_prob: 0.05,
561 },
562 RhoPrior::PenalizedComplexity {
563 upper: 1.0,
564 tail_prob: 0.0,
565 },
566 RhoPrior::PenalizedComplexity {
567 upper: 1.0,
568 tail_prob: 1.0,
569 },
570 RhoPrior::PenalizedComplexity {
571 upper: 1.0,
572 tail_prob: f64::NAN,
573 },
574 ] {
575 assert!(matches!(
576 evaluate(&bad, &rho, InvalidPriorPolicy::HardError),
577 Err(RhoPriorError::ConstraintViolation { .. })
578 ));
579 let sat = evaluate(&bad, &rho, InvalidPriorPolicy::Saturate).unwrap();
580 assert!(sat.cost.is_infinite() && sat.cost > 0.0);
581 }
582 }
583}