tacet_core/analysis/gibbs.rs
1//! Gibbs sampler for Student's t prior inference (spec v5.4, v5.6).
2//!
3//! This module implements Bayesian inference using a Student's t prior with
4//! degrees of freedom ν=4, represented as a scale mixture of Gaussians:
5//!
6//! ```text
7//! δ | λ ~ N(0, (σ²/λ) R)
8//! λ ~ Gamma(ν/2, ν/2)
9//! ```
10//!
11//! v5.6 adds a robust t-likelihood with precision parameter κ that prevents
12//! false certainty when Σₙ is misestimated:
13//!
14//! ```text
15//! Δ | δ, κ ~ N(δ, Σₙ/κ)
16//! κ ~ Gamma(ν_ℓ/2, ν_ℓ/2) where ν_ℓ = 8
17//! ```
18//!
19//! The posterior is computed via Gibbs sampling, alternating between:
20//! 1. δ | λ, κ, Δ ~ N(μ(λ,κ), Q(λ,κ)⁻¹)
21//! 2. λ | δ ~ Gamma((ν+9)/2, (ν + δᵀR⁻¹δ/σ²)/2)
22//! 3. κ | δ, Δ ~ Gamma((ν_ℓ+9)/2, (ν_ℓ + s)/2) where s = (Δ-δ)ᵀΣₙ⁻¹(Δ-δ)
23//!
24//! This replaces the v5.2 mixture prior, fixing the correlation-induced
25//! failure mode where high inter-decile correlations caused pathological
26//! posterior shrinkage.
27
28extern crate alloc;
29
30use alloc::vec::Vec;
31use core::f64::consts::PI;
32
33use nalgebra::Cholesky;
34use rand::prelude::*;
35use rand::SeedableRng;
36use rand_distr::Gamma;
37use rand_xoshiro::Xoshiro256PlusPlus;
38
39use crate::math;
40use crate::types::{Matrix9, Vector9};
41
42/// Degrees of freedom for the Student's t prior (ν = 4).
43pub const NU: f64 = 4.0;
44
45/// Degrees of freedom for robust t-likelihood (ν_ℓ = 8, spec §3.4.2 v5.6).
46pub const NU_L: f64 = 8.0;
47
48/// Total number of Gibbs iterations.
49pub const N_GIBBS: usize = 256;
50
51/// Number of burn-in iterations to discard.
52pub const N_BURN: usize = 64;
53
54/// Number of retained samples for posterior inference.
55pub const N_KEEP: usize = N_GIBBS - N_BURN; // 192
56
57/// Minimum λ value to prevent numerical issues.
58const LAMBDA_MIN: f64 = 1e-10;
59
60/// Maximum λ value to prevent numerical issues.
61const LAMBDA_MAX: f64 = 1e10;
62
63/// Minimum κ value to prevent numerical issues (v5.6).
64const KAPPA_MIN: f64 = 1e-10;
65
66/// Maximum κ value to prevent numerical issues (v5.6).
67const KAPPA_MAX: f64 = 1e10;
68
69/// Condition number threshold for triggering robust shrinkage (§3.3.5).
70/// When exceeded, apply shrinkage to prevent GLS instability.
71const CONDITION_NUMBER_THRESHOLD: f64 = 1e4;
72
73/// Result of Gibbs sampling inference.
74#[derive(Clone, Debug)]
75pub struct GibbsResult {
76 /// Posterior mean δ_post (average of retained samples).
77 pub delta_post: Vector9,
78
79 /// Posterior covariance Λ_post (sample covariance of retained samples).
80 pub lambda_post: Matrix9,
81
82 /// Posterior probability P(max_k |δ_k| > θ | Δ).
83 pub leak_probability: f64,
84
85 /// 95% credible interval for max effect magnitude.
86 pub effect_magnitude_ci: (f64, f64),
87
88 /// Retained δ draws from the Gibbs sampler.
89 /// Used for effect estimation via `compute_effect_estimate`.
90 pub delta_draws: Vec<Vector9>,
91
92 /// Posterior mean of latent scale λ.
93 pub lambda_mean: f64,
94
95 /// Posterior standard deviation of λ.
96 pub lambda_sd: f64,
97
98 /// Coefficient of variation: λ_sd / λ_mean.
99 pub lambda_cv: f64,
100
101 /// Effective sample size of λ chain.
102 pub lambda_ess: f64,
103
104 /// True if mixing diagnostics pass: CV ≥ 0.1 AND ESS ≥ 20.
105 pub lambda_mixing_ok: bool,
106
107 // =========================================================================
108 // v5.6 Kappa diagnostics - robust t-likelihood precision
109 // =========================================================================
110 /// v5.6: Posterior mean of likelihood precision κ.
111 pub kappa_mean: f64,
112
113 /// v5.6: Posterior standard deviation of κ.
114 pub kappa_sd: f64,
115
116 /// v5.6: Coefficient of variation: κ_sd / κ_mean.
117 pub kappa_cv: f64,
118
119 /// v5.6: Effective sample size of κ chain.
120 pub kappa_ess: f64,
121
122 /// v5.6: Whether κ mixing diagnostics pass: CV ≥ 0.1 AND ESS ≥ 20.
123 pub kappa_mixing_ok: bool,
124}
125
126/// Gibbs sampler for Student's t prior inference.
127pub struct GibbsSampler {
128 /// Degrees of freedom (fixed at 4).
129 nu: f64,
130
131 /// Calibrated prior scale σ.
132 sigma: f64,
133
134 /// Cholesky factor L_R such that L_R L_Rᵀ = R.
135 l_r: Matrix9,
136
137 /// Deterministic RNG.
138 rng: Xoshiro256PlusPlus,
139}
140
141impl GibbsSampler {
142 /// Create a new Gibbs sampler with precomputed factorizations.
143 ///
144 /// # Arguments
145 /// * `l_r` - Cholesky factor of R (correlation matrix)
146 /// * `sigma` - Calibrated prior scale
147 /// * `seed` - Deterministic RNG seed
148 pub fn new(l_r: &Matrix9, sigma: f64, seed: u64) -> Self {
149 Self {
150 nu: NU,
151 sigma,
152 l_r: *l_r,
153 rng: Xoshiro256PlusPlus::seed_from_u64(seed),
154 }
155 }
156
157 /// Run Gibbs sampler and return posterior summaries.
158 ///
159 /// # Arguments
160 /// * `delta_obs` - Observed quantile differences Δ
161 /// * `sigma_n` - Likelihood covariance Σ_n
162 /// * `theta` - Effect threshold for leak probability
163 ///
164 /// v5.6: Extended to sample (δ, λ, κ) for robust t-likelihood.
165 pub fn run(&mut self, delta_obs: &Vector9, sigma_n: &Matrix9, theta: f64) -> GibbsResult {
166 // Apply condition-number-based regularization to sigma_n (§3.3.5).
167 // This prevents posterior instability when sigma_n is ill-conditioned,
168 // which occurs in discrete timer mode with high quantization.
169 let sigma_n_reg = regularize_sigma_n(sigma_n);
170
171 // Precompute Cholesky of regularized Σ_n for efficiency
172 let sigma_n_chol =
173 Cholesky::new(sigma_n_reg).expect("regularize_sigma_n should ensure SPD");
174
175 // Precompute Σ_n⁻¹ for efficiency (constant throughout Gibbs run)
176 // This avoids creating a Matrix9 on the stack 256 times
177 let sigma_n_inv = Self::invert_via_cholesky(&sigma_n_chol);
178
179 // Precompute R⁻¹ for efficiency (constant throughout Gibbs run)
180 // This avoids creating a Matrix9 on the stack 256 times
181 let r_inv = self.compute_r_inverse();
182
183 // Storage for retained samples
184 let mut retained_deltas: Vec<Vector9> = Vec::with_capacity(N_KEEP);
185 let mut retained_lambdas: Vec<f64> = Vec::with_capacity(N_KEEP);
186 let mut retained_kappas: Vec<f64> = Vec::with_capacity(N_KEEP);
187
188 // Initialize: λ⁽⁰⁾ = 1, κ⁽⁰⁾ = 1 (spec §3.4.4 v5.6)
189 let mut lambda = 1.0;
190 let mut kappa = 1.0;
191
192 // Main Gibbs loop (spec §3.4.4 v5.6)
193 for t in 0..N_GIBBS {
194 // Step 1: δ | λ, κ, Δ (spec §3.4.4 Conditional 1)
195 // Note: sigma_n_inv_delta is recomputed inside to account for κ scaling
196 let delta = self.sample_delta_given_lambda_kappa(
197 &sigma_n_inv,
198 &r_inv,
199 delta_obs,
200 &sigma_n_chol,
201 lambda,
202 kappa,
203 );
204
205 // Step 2: λ | δ (unchanged, spec §3.4.4 Conditional 2)
206 lambda = self.sample_lambda_given_delta(&delta);
207
208 // Step 3: κ | δ, Δ (NEW, spec §3.4.4 Conditional 3)
209 kappa = self.sample_kappa_given_delta(delta_obs, &delta, &sigma_n_chol);
210
211 // Store if past burn-in
212 if t >= N_BURN {
213 retained_deltas.push(delta);
214 retained_lambdas.push(lambda);
215 retained_kappas.push(kappa);
216 }
217 }
218
219 // Compute posterior summaries
220 self.compute_summaries(
221 &retained_deltas,
222 &retained_lambdas,
223 &retained_kappas,
224 sigma_n,
225 theta,
226 )
227 }
228
229 /// Sample δ | λ, κ, Δ from the conditional Gaussian (spec §3.4.4 v5.6).
230 ///
231 /// v5.6: Q(λ, κ) = κΣ_n⁻¹ + (λ/σ²) R⁻¹
232 /// μ(λ, κ) = Q(λ, κ)⁻¹ κΣ_n⁻¹ Δ
233 /// δ | λ, κ, Δ ~ N(μ(λ, κ), Q(λ, κ)⁻¹)
234 ///
235 /// **Spec compliance note (§3.4.4):** The spec requires that Σ_n⁻¹ and R⁻¹ be
236 /// "computed via Cholesky solves, not explicit matrix inversion". We form
237 /// explicit matrices here via Cholesky solves (solving Ax=I column by column),
238 /// which IS numerically stable. This is acceptable under the spec's exception
239 /// "unless demonstrably stable" (§3.3.5). The alternative (Woodbury identity)
240 /// would require working with covariance matrices and introduces complexity.
241 ///
242 /// Note: sigma_n_inv and r_inv are precomputed once before the Gibbs loop
243 /// to avoid repeated stack allocations.
244 fn sample_delta_given_lambda_kappa(
245 &mut self,
246 sigma_n_inv: &Matrix9,
247 r_inv: &Matrix9,
248 delta_obs: &Vector9,
249 sigma_n_chol: &Cholesky<f64, nalgebra::Const<9>>,
250 lambda: f64,
251 kappa: f64,
252 ) -> Vector9 {
253 let scale_factor = lambda / (self.sigma * self.sigma);
254
255 // v5.6: Q(λ, κ) = κΣ_n⁻¹ + (λ/σ²) R⁻¹
256 let q = sigma_n_inv * kappa + r_inv * scale_factor;
257
258 // Cholesky of Q(λ, κ)
259 let q_chol = match Cholesky::new(q) {
260 Some(c) => c,
261 None => {
262 // Fallback: add jitter
263 let jittered = q + Matrix9::identity() * 1e-8;
264 Cholesky::new(jittered).expect("Q(λ, κ) must be SPD")
265 }
266 };
267
268 // v5.6: Posterior mean: μ = Q⁻¹ κΣ_n⁻¹ Δ
269 // First compute κΣ_n⁻¹ Δ
270 let sigma_n_inv_delta = sigma_n_chol.solve(delta_obs);
271 let kappa_sigma_n_inv_delta = sigma_n_inv_delta * kappa;
272 let mu = q_chol.solve(&kappa_sigma_n_inv_delta);
273
274 // Sample: δ = μ + L_Q⁻ᵀ z where z ~ N(0, I₉)
275 // Since Q = L_Q L_Qᵀ, we have Q⁻¹ = L_Q⁻ᵀ L_Q⁻¹
276 // So sampling is: δ = μ + L_Q⁻ᵀ z
277 let z = self.sample_standard_normal_vector();
278 let l_q_inv_t_z = q_chol.l().solve_upper_triangular(&z).unwrap_or(z);
279
280 mu + l_q_inv_t_z
281 }
282
283 /// Sample λ | δ from the conditional Gamma.
284 ///
285 /// q = δᵀ R⁻¹ δ
286 /// λ | δ ~ Gamma((ν+9)/2, (ν + q/σ²)/2)
287 fn sample_lambda_given_delta(&mut self, delta: &Vector9) -> f64 {
288 // Compute q = δᵀ R⁻¹ δ via Cholesky solve
289 // R = L_R L_Rᵀ, so R⁻¹ δ = L_R⁻ᵀ L_R⁻¹ δ
290 // q = δᵀ R⁻¹ δ = ||L_R⁻¹ δ||²
291 let y = self.l_r.solve_lower_triangular(delta).unwrap_or(*delta);
292 let q = y.dot(&y);
293
294 // Gamma parameters (shape-rate parameterization)
295 let shape = (self.nu + 9.0) / 2.0; // (4 + 9) / 2 = 6.5
296 let rate = (self.nu + q / (self.sigma * self.sigma)) / 2.0;
297
298 // Sample from Gamma(shape, rate)
299 // rand_distr uses shape-scale, so scale = 1/rate
300 let scale = 1.0 / rate;
301 let gamma = Gamma::new(shape, scale).unwrap();
302 let sample = gamma.sample(&mut self.rng);
303
304 // Clamp to prevent numerical issues
305 sample.clamp(LAMBDA_MIN, LAMBDA_MAX)
306 }
307
308 /// Sample κ | δ, Δ from the conditional Gamma (spec §3.4.4 Conditional 3 v5.6).
309 ///
310 /// s = (Δ - δ)ᵀ Σₙ⁻¹ (Δ - δ)
311 /// κ | δ, Δ ~ Gamma((ν_ℓ + 9)/2, (ν_ℓ + s)/2)
312 ///
313 /// This samples the likelihood precision that allows the model to accommodate
314 /// data that doesn't match the estimated Σₙ, preventing false certainty.
315 fn sample_kappa_given_delta(
316 &mut self,
317 delta_obs: &Vector9,
318 delta: &Vector9,
319 sigma_n_chol: &Cholesky<f64, nalgebra::Const<9>>,
320 ) -> f64 {
321 // Compute residual: r = Δ - δ
322 let residual = delta_obs - delta;
323
324 // Compute s = rᵀ Σₙ⁻¹ r via Cholesky solve
325 // Σₙ = L L', so Σₙ⁻¹ r = L⁻ᵀ L⁻¹ r
326 // s = rᵀ Σₙ⁻¹ r = ||L⁻¹ r||²
327 let y = sigma_n_chol.solve(&residual);
328 let s = residual.dot(&y);
329
330 // Gamma parameters (shape-rate, spec §3.4.4)
331 let shape = (NU_L + 9.0) / 2.0; // (8 + 9) / 2 = 8.5
332 let rate = (NU_L + s) / 2.0;
333
334 // Sample from Gamma(shape, rate)
335 // rand_distr uses shape-scale, so scale = 1/rate
336 let scale = 1.0 / rate;
337 let gamma = Gamma::new(shape, scale).unwrap();
338 let sample = gamma.sample(&mut self.rng);
339
340 // Clamp to prevent numerical issues
341 sample.clamp(KAPPA_MIN, KAPPA_MAX)
342 }
343
344 /// Compute posterior summaries from retained samples.
345 fn compute_summaries(
346 &self,
347 retained_deltas: &[Vector9],
348 retained_lambdas: &[f64],
349 retained_kappas: &[f64],
350 _sigma_n: &Matrix9,
351 theta: f64,
352 ) -> GibbsResult {
353 let n = retained_deltas.len() as f64;
354
355 // Posterior mean of δ
356 let delta_post = {
357 let mut sum = Vector9::zeros();
358 for delta in retained_deltas {
359 sum += delta;
360 }
361 sum / n
362 };
363
364 // Posterior covariance of δ (sample covariance)
365 let lambda_post = {
366 let mut cov = Matrix9::zeros();
367 for delta in retained_deltas {
368 let diff = delta - delta_post;
369 cov += diff * diff.transpose();
370 }
371 cov / (n - 1.0) // Unbiased estimator
372 };
373
374 // Leak probability: fraction exceeding threshold
375 let mut exceed_count = 0;
376 let mut max_effects: Vec<f64> = Vec::with_capacity(retained_deltas.len());
377 for delta in retained_deltas {
378 let max_effect = delta.iter().map(|x| x.abs()).fold(0.0_f64, f64::max);
379 max_effects.push(max_effect);
380 if max_effect > theta {
381 exceed_count += 1;
382 }
383 }
384 let leak_probability = exceed_count as f64 / n;
385
386 // Effect magnitude CI (2.5th and 97.5th percentiles)
387 max_effects.sort_by(|a, b| a.partial_cmp(b).unwrap());
388 let ci_low = max_effects[(n * 0.025) as usize];
389 let ci_high = max_effects[((n * 0.975) as usize).min(max_effects.len() - 1)];
390
391 // Lambda diagnostics
392 let lambda_mean = retained_lambdas.iter().sum::<f64>() / n;
393 let lambda_var = retained_lambdas
394 .iter()
395 .map(|&l| math::sq(l - lambda_mean))
396 .sum::<f64>()
397 / (n - 1.0);
398 let lambda_sd = math::sqrt(lambda_var);
399 let lambda_cv = if lambda_mean > 0.0 {
400 lambda_sd / lambda_mean
401 } else {
402 0.0
403 };
404 let lambda_ess = compute_ess(retained_lambdas);
405 let lambda_mixing_ok = lambda_cv >= 0.1 && lambda_ess >= 20.0;
406
407 // v5.6: Kappa diagnostics (parallel to lambda)
408 let kappa_mean = retained_kappas.iter().sum::<f64>() / n;
409 let kappa_var = retained_kappas
410 .iter()
411 .map(|&k| math::sq(k - kappa_mean))
412 .sum::<f64>()
413 / (n - 1.0);
414 let kappa_sd = math::sqrt(kappa_var);
415 let kappa_cv = if kappa_mean > 0.0 {
416 kappa_sd / kappa_mean
417 } else {
418 0.0
419 };
420 let kappa_ess = compute_ess(retained_kappas);
421 let kappa_mixing_ok = kappa_cv >= 0.1 && kappa_ess >= 20.0;
422
423 // Store delta draws for effect estimation
424 let delta_draws = retained_deltas.to_vec();
425
426 GibbsResult {
427 delta_post,
428 lambda_post,
429 leak_probability,
430 effect_magnitude_ci: (ci_low, ci_high),
431 delta_draws,
432 lambda_mean,
433 lambda_sd,
434 lambda_cv,
435 lambda_ess,
436 lambda_mixing_ok,
437 kappa_mean,
438 kappa_sd,
439 kappa_cv,
440 kappa_ess,
441 kappa_mixing_ok,
442 }
443 }
444
445 /// Sample a 9D standard normal vector.
446 fn sample_standard_normal_vector(&mut self) -> Vector9 {
447 let mut z = Vector9::zeros();
448 for i in 0..9 {
449 z[i] = self.sample_standard_normal();
450 }
451 z
452 }
453
454 /// Sample from standard normal using Box-Muller transform.
455 fn sample_standard_normal(&mut self) -> f64 {
456 let u1: f64 = self.rng.random();
457 let u2: f64 = self.rng.random();
458 math::sqrt(-2.0 * math::ln(u1.max(1e-12))) * math::cos(2.0 * PI * u2)
459 }
460
461 /// Invert a matrix via its Cholesky factor (numerically stable).
462 ///
463 /// Computes A⁻¹ by solving A x_j = e_j for each standard basis vector.
464 /// This is numerically stable since it uses forward/backward substitution
465 /// on the Cholesky factor, not direct matrix inversion.
466 fn invert_via_cholesky(chol: &Cholesky<f64, nalgebra::Const<9>>) -> Matrix9 {
467 let mut inv = Matrix9::zeros();
468 for j in 0..9 {
469 let mut e = Vector9::zeros();
470 e[j] = 1.0;
471 let col = chol.solve(&e);
472 for i in 0..9 {
473 inv[(i, j)] = col[i];
474 }
475 }
476 inv
477 }
478
479 /// Compute R⁻¹ via Cholesky solves (numerically stable).
480 ///
481 /// Uses the precomputed Cholesky factor L_R to solve R x_j = e_j for each
482 /// basis vector. This is the standard numerically stable approach.
483 fn compute_r_inverse(&self) -> Matrix9 {
484 let mut r_inv = Matrix9::zeros();
485 for j in 0..9 {
486 let mut e = Vector9::zeros();
487 e[j] = 1.0;
488 // Solve L y = e, then L^T x = y
489 let y = self.l_r.solve_lower_triangular(&e).unwrap_or(e);
490 let x = self.l_r.transpose().solve_upper_triangular(&y).unwrap_or(y);
491 for i in 0..9 {
492 r_inv[(i, j)] = x[i];
493 }
494 }
495 r_inv
496 }
497}
498
499/// Estimate condition number of a symmetric positive semi-definite matrix.
500///
501/// Uses Cholesky factorization when available: for SPD matrices,
502/// cond(A) = cond(L)² ≈ (max(L_ii) / min(L_ii))².
503/// Falls back to diagonal ratio which underestimates for high correlations.
504fn estimate_condition_number(m: &Matrix9) -> f64 {
505 // Try Cholesky factorization for accurate condition number
506 if let Some(chol) = Cholesky::new(*m) {
507 let l = chol.l();
508 let diag: [f64; 9] = core::array::from_fn(|i| l[(i, i)].abs());
509 let max_l = diag.iter().cloned().fold(0.0_f64, f64::max);
510 let min_l = diag.iter().cloned().fold(f64::INFINITY, f64::min);
511
512 if min_l < 1e-12 {
513 return f64::INFINITY;
514 }
515
516 // cond(A) = cond(L)² for Cholesky L such that A = LL'
517 let cond_l = max_l / min_l;
518 return cond_l * cond_l;
519 }
520
521 // Cholesky failed: matrix is definitely ill-conditioned
522 f64::INFINITY
523}
524
525/// Regularize covariance matrix for GLS stability (§3.3.5).
526///
527/// Applies condition-number-based shrinkage when the matrix is ill-conditioned:
528/// Σ ← (1-λ)Σ + λ·diag(Σ) where λ scales with condition severity.
529/// For extremely ill-conditioned matrices, uses diagonal-only (OLS) as fallback.
530fn regularize_sigma_n(sigma_n: &Matrix9) -> Matrix9 {
531 let cond = estimate_condition_number(sigma_n);
532
533 if cond <= CONDITION_NUMBER_THRESHOLD {
534 // Well-conditioned: return as-is (add minimal jitter if needed)
535 if Cholesky::new(*sigma_n).is_some() {
536 return *sigma_n;
537 }
538 }
539
540 // Extract diagonal for shrinkage target
541 let diag_sigma = Matrix9::from_diagonal(&sigma_n.diagonal());
542
543 // For extremely ill-conditioned matrices (cond > 10^6), use diagonal-only (OLS).
544 // This gives up on modeling correlations but preserves variance structure,
545 // which is critical for correctly weighting quantiles in tail effect estimation.
546 if cond > CONDITION_NUMBER_THRESHOLD * 1e2 || cond.is_infinite() {
547 return diag_sigma + Matrix9::identity() * 1e-6;
548 }
549
550 // Moderately ill-conditioned: use aggressive shrinkage
551 // Scale lambda with log of condition number excess
552 let log_excess = (cond / CONDITION_NUMBER_THRESHOLD).ln().max(0.0);
553 let lambda = (0.1 + 0.2 * log_excess).min(0.95); // Range: 10% to 95%
554
555 // Shrink toward diagonal: (1-λ)Σ + λ·diag(Σ)
556 let regularized = *sigma_n * (1.0 - lambda) + diag_sigma * lambda;
557
558 // Ensure SPD with increasing jitter if needed
559 for &eps in &[1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5] {
560 let jittered = regularized + Matrix9::identity() * eps;
561 if Cholesky::new(jittered).is_some() {
562 return jittered;
563 }
564 }
565
566 // Final fallback: use diagonal only
567 diag_sigma + Matrix9::identity() * 1e-6
568}
569
570/// Compute effective sample size of a chain accounting for autocorrelation.
571///
572/// ESS = N / (1 + 2 * Σ_k ρ_k)
573/// where ρ_k is the lag-k autocorrelation.
574fn compute_ess(chain: &[f64]) -> f64 {
575 let n = chain.len();
576 if n < 2 {
577 return n as f64;
578 }
579
580 let mean: f64 = chain.iter().sum::<f64>() / n as f64;
581 let var: f64 = chain.iter().map(|&x| math::sq(x - mean)).sum::<f64>() / n as f64;
582
583 if var < 1e-12 {
584 return n as f64; // No variance, treat as independent
585 }
586
587 let mut sum_rho = 0.0;
588 for k in 1..=50.min(n / 2) {
589 let rho_k = autocorrelation(chain, k, mean, var);
590 if rho_k < 0.05 {
591 break;
592 }
593 sum_rho += rho_k;
594 }
595
596 n as f64 / (1.0 + 2.0 * sum_rho)
597}
598
599/// Compute lag-k autocorrelation.
600fn autocorrelation(chain: &[f64], k: usize, mean: f64, var: f64) -> f64 {
601 let n = chain.len();
602 if k >= n {
603 return 0.0;
604 }
605
606 let cov: f64 = (0..(n - k))
607 .map(|i| (chain[i] - mean) * (chain[i + k] - mean))
608 .sum::<f64>()
609 / (n - k) as f64;
610
611 cov / var
612}
613
614/// Public interface: Run Gibbs inference on observed data.
615///
616/// # Arguments
617/// * `delta` - Observed quantile differences Δ
618/// * `sigma_n` - Likelihood covariance Σ_n = Σ_rate / n
619/// * `sigma_t` - Calibrated Student's t prior scale
620/// * `l_r` - Cholesky factor of correlation matrix R
621/// * `theta` - Effect threshold
622/// * `seed` - Deterministic RNG seed
623pub fn run_gibbs_inference(
624 delta: &Vector9,
625 sigma_n: &Matrix9,
626 sigma_t: f64,
627 l_r: &Matrix9,
628 theta: f64,
629 seed: u64,
630) -> GibbsResult {
631 let mut sampler = GibbsSampler::new(l_r, sigma_t, seed);
632 sampler.run(delta, sigma_n, theta)
633}
634
635#[cfg(test)]
636mod tests {
637 use super::*;
638
639 #[test]
640 fn test_gibbs_determinism() {
641 let l_r = Matrix9::identity();
642 let sigma_n = Matrix9::identity() * 100.0;
643 let delta = Vector9::from_row_slice(&[10.0; 9]);
644 let sigma_t = 50.0;
645 let theta = 5.0;
646
647 let result1 = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
648 let result2 = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
649
650 assert!(
651 (result1.leak_probability - result2.leak_probability).abs() < 1e-10,
652 "Same seed should give same result"
653 );
654 }
655
656 #[test]
657 fn test_lambda_diagnostics() {
658 let l_r = Matrix9::identity();
659 let sigma_n = Matrix9::identity() * 100.0;
660 let delta = Vector9::from_row_slice(&[50.0; 9]);
661 let sigma_t = 50.0;
662 let theta = 10.0;
663
664 let result = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
665
666 // Lambda mean should be positive
667 assert!(result.lambda_mean > 0.0);
668
669 // Lambda SD should be positive
670 assert!(result.lambda_sd > 0.0);
671
672 // ESS should be reasonable (between 1 and N_KEEP)
673 assert!(result.lambda_ess >= 1.0);
674 assert!(result.lambda_ess <= N_KEEP as f64);
675 }
676
677 #[test]
678 fn test_large_effect_detection() {
679 // With large effect, leak probability should be high
680 let l_r = Matrix9::identity();
681 let sigma_n = Matrix9::identity() * 100.0;
682 let delta = Vector9::from_row_slice(&[500.0; 9]); // Large effect
683 let sigma_t = 50.0;
684 let theta = 10.0;
685
686 let result = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
687
688 assert!(
689 result.leak_probability > 0.95,
690 "Large effect should give high leak probability, got {}",
691 result.leak_probability
692 );
693 }
694
695 #[test]
696 fn test_no_effect_low_probability() {
697 // With no effect, leak probability should be low
698 let l_r = Matrix9::identity();
699 let sigma_n = Matrix9::identity() * 100.0;
700 let delta = Vector9::zeros(); // No effect
701 let sigma_t = 50.0;
702 let theta = 100.0;
703
704 let result = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
705
706 assert!(
707 result.leak_probability < 0.5,
708 "No effect should give low leak probability, got {}",
709 result.leak_probability
710 );
711 }
712
713 #[test]
714 fn test_ess_computation() {
715 // Test ESS with known autocorrelated sequence
716 let chain: Vec<f64> = (0..100).map(|i| (i as f64).sin()).collect();
717 let ess = compute_ess(&chain);
718
719 // ESS should be less than N due to autocorrelation
720 assert!(ess < 100.0);
721 assert!(ess > 0.0);
722 }
723
724 #[test]
725 fn test_kappa_diagnostics() {
726 // v5.6: Test kappa diagnostics similar to lambda diagnostics
727 let l_r = Matrix9::identity();
728 let sigma_n = Matrix9::identity() * 100.0;
729 let delta = Vector9::from_row_slice(&[50.0; 9]);
730 let sigma_t = 50.0;
731 let theta = 10.0;
732
733 let result = run_gibbs_inference(&delta, &sigma_n, sigma_t, &l_r, theta, 42);
734
735 // Kappa mean should be positive
736 assert!(result.kappa_mean > 0.0, "kappa_mean should be positive");
737
738 // Kappa SD should be positive
739 assert!(result.kappa_sd > 0.0, "kappa_sd should be positive");
740
741 // Kappa ESS should be reasonable (between 1 and N_KEEP)
742 assert!(
743 result.kappa_ess >= 1.0,
744 "kappa_ess should be >= 1, got {}",
745 result.kappa_ess
746 );
747 assert!(
748 result.kappa_ess <= N_KEEP as f64,
749 "kappa_ess should be <= N_KEEP"
750 );
751
752 // CV should be positive when SD > 0
753 assert!(result.kappa_cv >= 0.0, "kappa_cv should be non-negative");
754 }
755
756 #[test]
757 fn test_kappa_responds_to_residual_magnitude() {
758 // v5.6: κ should be < 1 when residuals are larger than expected under Σₙ.
759 // When the observed data differs from the sampled δ significantly,
760 // the Gamma posterior for κ should shift toward smaller values.
761
762 let l_r = Matrix9::identity();
763 let sigma_n = Matrix9::identity(); // Small Σₙ
764 let sigma_t = 50.0;
765 let theta = 10.0;
766
767 // Small effect: residuals ≈ small relative to Σₙ → κ ≈ 1
768 let delta_small = Vector9::from_row_slice(&[1.0; 9]);
769 let result_small = run_gibbs_inference(&delta_small, &sigma_n, sigma_t, &l_r, theta, 42);
770
771 // Large effect: same Σₙ but much larger observed Δ → κ < 1
772 // (The observed data is far from what the model expects)
773 let delta_large = Vector9::from_row_slice(&[100.0; 9]);
774 let result_large = run_gibbs_inference(&delta_large, &sigma_n, sigma_t, &l_r, theta, 42);
775
776 // When residuals are large relative to Σₙ, kappa should be smaller
777 // to inflate the likelihood covariance for robustness
778 assert!(
779 result_large.kappa_mean < result_small.kappa_mean,
780 "Large residuals should give smaller kappa: {} vs {}",
781 result_large.kappa_mean,
782 result_small.kappa_mean
783 );
784 }
785}