1use crate::estimate::EstimationError;
2use crate::estimate::{FitGeometry, UnifiedFitResult};
3use crate::pirls;
4use faer::Mat as FaerMat;
5use faer::linalg::matmul::matmul;
6use faer::prelude::ReborrowMut;
7use faer::{Accum, Par};
8use gam_linalg::faer_ndarray::{FaerArrayView, FaerCholesky};
9use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
10use gam_linalg::utils::StableSolver;
11use gam_problem::LinkFunction;
12use ndarray::{Array1, Array2, ArrayView1, ShapeBuilder, s};
13use std::fmt;
14
15#[derive(Debug, Clone)]
24pub enum AloError {
25 InvalidInput { reason: String },
29 WeightInvalid { reason: String },
32 DesignDegenerate { reason: String },
35 InfluenceMatrixFailed { condition_number: f64 },
38 LooComputationFailed { reason: String },
41}
42
43impl fmt::Display for AloError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 AloError::InvalidInput { reason }
47 | AloError::WeightInvalid { reason }
48 | AloError::DesignDegenerate { reason }
49 | AloError::LooComputationFailed { reason } => f.write_str(reason),
50 AloError::InfluenceMatrixFailed { condition_number } => {
51 write!(
52 f,
53 "ALO influence matrix failed (condition number {condition_number:.3e})"
54 )
55 }
56 }
57 }
58}
59
60impl std::error::Error for AloError {}
61
62impl From<AloError> for EstimationError {
63 fn from(err: AloError) -> EstimationError {
64 match err {
65 AloError::InvalidInput { reason }
66 | AloError::WeightInvalid { reason }
67 | AloError::DesignDegenerate { reason }
68 | AloError::LooComputationFailed { reason } => EstimationError::InvalidInput(reason),
69 AloError::InfluenceMatrixFailed { condition_number } => {
70 EstimationError::ModelIsIllConditioned { condition_number }
71 }
72 }
73 }
74}
75
76impl From<AloError> for String {
77 fn from(err: AloError) -> String {
78 err.to_string()
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct AloDiagnostics {
85 pub eta_tilde: Array1<f64>,
86 pub se_bayes: Array1<f64>,
89 pub se_sandwich: Array1<f64>,
92 pub pred_identity: Array1<f64>,
93 pub leverage: Array1<f64>,
94 pub fisherweights: Array1<f64>,
95}
96
97#[inline]
98fn alo_eta_updatewith_offset(
99 eta_hat: f64,
100 z: f64,
101 offset: f64,
102 x_hinv_x: f64,
103 score_weight: f64,
104 denom: f64,
105) -> f64 {
106 let eta_centered = eta_hat - offset;
109 let z_centered = z - offset;
110 let score = score_weight * (eta_centered - z_centered);
111 offset + eta_centered + x_hinv_x * score / denom
112}
113
114pub type AloScalarScoreCurvature<'a> = dyn Fn(usize, f64) -> (f64, f64) + Sync + 'a;
124
125const ALO_EXACT_SCALAR_MAX_ITERS: usize = 64;
131
132const ALO_EXACT_SCALAR_TOL: f64 = 1e-12;
136
137#[derive(Debug, Clone, Copy, PartialEq)]
158enum AloExactScalarError {
159 NonFiniteScoreCurvature {
160 eta: f64,
161 ell_prime: f64,
162 ell_double: f64,
163 },
164 DegenerateJacobian {
165 eta: f64,
166 jacobian: f64,
167 },
168 NonFiniteStep {
169 eta: f64,
170 residual: f64,
171 jacobian: f64,
172 next: f64,
173 },
174 MaxIterations {
175 iterations: usize,
176 residual: f64,
177 eta: f64,
178 },
179}
180
181impl fmt::Display for AloExactScalarError {
182 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
183 match *self {
184 AloExactScalarError::NonFiniteScoreCurvature {
185 eta,
186 ell_prime,
187 ell_double,
188 } => write!(
189 f,
190 "non-finite score/curvature at eta={eta:.6e}: ell_prime={ell_prime:.6e}, ell_double={ell_double:.6e}"
191 ),
192 AloExactScalarError::DegenerateJacobian { eta, jacobian } => write!(
193 f,
194 "degenerate Newton Jacobian at eta={eta:.6e}: jacobian={jacobian:.6e}, min={ALO_DENOMINATOR_MIN:.1e}"
195 ),
196 AloExactScalarError::NonFiniteStep {
197 eta,
198 residual,
199 jacobian,
200 next,
201 } => write!(
202 f,
203 "non-finite Newton step from eta={eta:.6e}: residual={residual:.6e}, jacobian={jacobian:.6e}, next={next:.6e}"
204 ),
205 AloExactScalarError::MaxIterations {
206 iterations,
207 residual,
208 eta,
209 } => write!(
210 f,
211 "did not converge within {iterations} iterations: residual={residual:.6e}, eta={eta:.6e}, tol={ALO_EXACT_SCALAR_TOL:.1e}"
212 ),
213 }
214 }
215}
216
217const ALO_EXACT_SCALAR_BACKTRACKS: usize = 40;
223
224#[inline]
225fn alo_eta_exact_frozen_curvature(
226 eta_hat: f64,
227 a_ii: f64,
228 score_curvature: &dyn Fn(f64) -> (f64, f64),
229) -> Result<f64, AloExactScalarError> {
230 let residual_and_jac = |eta: f64| -> Result<(f64, f64), AloExactScalarError> {
254 let (ell_prime, ell_double) = score_curvature(eta);
255 if !ell_prime.is_finite() || !ell_double.is_finite() {
256 return Err(AloExactScalarError::NonFiniteScoreCurvature {
257 eta,
258 ell_prime,
259 ell_double,
260 });
261 }
262 Ok((eta - eta_hat - a_ii * ell_prime, 1.0 - a_ii * ell_double))
263 };
264
265 let mut eta = eta_hat;
266 let (mut residual, mut jac) = residual_and_jac(eta)?;
267 for _ in 0..ALO_EXACT_SCALAR_MAX_ITERS {
268 if residual.abs() <= ALO_EXACT_SCALAR_TOL {
269 return Ok(eta);
270 }
271 if jac.abs() <= ALO_DENOMINATOR_MIN || !jac.is_finite() {
272 return Err(AloExactScalarError::DegenerateJacobian { eta, jacobian: jac });
273 }
274 let step = residual / jac;
275 if !step.is_finite() {
276 return Err(AloExactScalarError::NonFiniteStep {
277 eta,
278 residual,
279 jacobian: jac,
280 next: eta - step,
281 });
282 }
283 let mut t = 1.0;
288 let mut advanced = false;
289 for _ in 0..ALO_EXACT_SCALAR_BACKTRACKS {
290 let trial = eta - t * step;
291 if let Ok((r_trial, j_trial)) = residual_and_jac(trial) {
292 if r_trial.abs() < residual.abs() {
293 eta = trial;
294 residual = r_trial;
295 jac = j_trial;
296 advanced = true;
297 break;
298 }
299 }
300 t *= 0.5;
301 }
302 if !advanced {
303 break;
304 }
305 }
306 Err(AloExactScalarError::MaxIterations {
307 iterations: ALO_EXACT_SCALAR_MAX_ITERS,
308 residual,
309 eta,
310 })
311}
312
313#[inline]
314fn bayesvar_eta(phi: f64, x_hinv_x: f64) -> f64 {
315 phi * x_hinv_x
316}
317
318#[inline]
319fn sandwichvar_eta_from_meat(phi: f64, meat_quad: f64) -> f64 {
320 phi * meat_quad
321}
322
323#[inline]
324fn variance_negative_tolerance(scale: f64) -> f64 {
325 1e-12 * scale.abs().max(1.0)
327}
328
329const LEVERAGE_HIGH_THRESHOLD: f64 = 0.99;
330const LEVERAGE_VERY_HIGH_THRESHOLD: f64 = 0.999;
331const LEVERAGE_RATE_THRESHOLDS: [f64; 3] = [0.90, 0.95, 0.99];
332const LEVERAGE_PERCENTILES: [f64; 3] = [0.50, 0.95, 0.99];
333const ALO_DENOMINATOR_MIN: f64 = 1e-12;
334const MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES: usize = 256 * 1024 * 1024;
335
336const ALO_RHS_BLOCK_COLS: usize = 8192;
341
342const HESSIAN_SYMMETRY_REL_TOL: f64 = 1e-8;
348
349const ALO_LOCAL_BLOCK_RIDGE: f64 = 1e-6;
355
356const LU_PIVOT_SINGULAR_TOL: f64 = 1e-12;
361
362#[inline]
363fn percentile_index(sample_size: usize, quantile: f64) -> usize {
364 if sample_size <= 1 {
365 return 0;
366 }
367 let max_index = sample_size - 1;
368 ((quantile * max_index as f64).round() as usize).min(max_index)
369}
370
371#[inline]
372fn percentile_from_sorted(sorted: &[f64], quantile: f64) -> f64 {
373 if sorted.is_empty() {
374 0.0
375 } else {
376 sorted[percentile_index(sorted.len(), quantile)]
377 }
378}
379
380#[inline]
381fn multiblock_col_offsets(block_designs: &[Array2<f64>]) -> Vec<usize> {
382 let mut offsets = Vec::with_capacity(block_designs.len());
383 let mut off = 0usize;
384 for design in block_designs {
385 offsets.push(off);
386 off += design.ncols();
387 }
388 offsets
389}
390
391#[inline]
392fn multiblock_alo_parallel_leverage_chunk_size(
393 p_tot: usize,
394 n_blocks: usize,
395 n_obs: usize,
396 max_workers: usize,
397) -> usize {
398 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
399 return 1;
400 }
401
402 let workers = max_workers.max(1);
408 let per_worker_budget = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / workers).max(1);
409 let elem_count_per_obs = p_tot.saturating_mul(n_blocks.saturating_add(1)).max(1);
410 let bytes_per_obs = elem_count_per_obs
411 .saturating_mul(std::mem::size_of::<f64>())
412 .max(1);
413 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
414 budget_obs.min(n_obs)
415}
416
417fn compute_alo_diagnostics_from_pirls_impl(
418 base: &pirls::PirlsResult,
419 y: ArrayView1<f64>,
420 link: LinkFunction,
421) -> Result<AloDiagnostics, EstimationError> {
422 compute_alo_diagnostics_from_pirls_inner(base, y, link).map_err(EstimationError::from)
423}
424
425fn alo_link_needs_exact_curvature_refinement(likelihood: &gam_problem::GlmLikelihoodSpec) -> bool {
438 use gam_problem::ResponseFamily;
439 matches!(
440 (&likelihood.spec.response, likelihood.link_function()),
441 (ResponseFamily::Binomial, LinkFunction::Logit)
442 | (ResponseFamily::Poisson, LinkFunction::Log)
443 )
444}
445
446fn compute_alo_diagnostics_from_pirls_inner(
447 base: &pirls::PirlsResult,
448 y: ArrayView1<f64>,
449 link: LinkFunction,
450) -> Result<AloDiagnostics, AloError> {
451 let x_dense_arc = base
452 .x_transformed
453 .try_to_dense_arc("ALO diagnostics require dense transformed design")
454 .map_err(|reason| AloError::DesignDegenerate { reason })?;
455 let x_dense = x_dense_arc.as_ref();
456 let n = x_dense.nrows();
457
458 let phi = match link {
460 LinkFunction::Log => 1.0,
461 LinkFunction::Logit
462 | LinkFunction::Probit
463 | LinkFunction::CLogLog
464 | LinkFunction::Sas
465 | LinkFunction::BetaLogistic => 1.0,
466 LinkFunction::Identity => {
467 use rayon::iter::{IntoParallelIterator, ParallelIterator};
468 let rss: f64 = (0..n)
469 .into_par_iter()
470 .map(|i| {
471 let r = y[i] - base.finalmu[i];
472 base.finalweights[i] * r * r
473 })
474 .sum();
475 let n_pos = (0..n).filter(|&i| base.finalweights[i] > 0.0).count();
482 let dof = (n_pos as f64) - base.edf;
483 let denom = dof.max(1.0);
484 rss / denom
485 }
486 };
487
488 let e = &base.reparam_result.e_transformed;
489 let ridge = base.ridge_passport.laplacehessianridge().max(0.0);
490
491 let h_dense_for_alo = base
495 .dense_stabilizedhessian_transformed(
496 "ALO diagnostics require exact dense stabilized penalized Hessian",
497 )
498 .map_err(|e| match e {
499 EstimationError::InvalidInput(reason) => AloError::InvalidInput { reason },
500 other => AloError::InvalidInput {
501 reason: format!("{other:?}"),
502 },
503 })?;
504
505 let canonical_scale: Option<Array1<f64>> =
524 if alo_link_needs_exact_curvature_refinement(&base.likelihood) {
525 let mut c = Array1::<f64>::zeros(n);
526 for i in 0..n {
527 let dmu = base.solve_dmu_deta[i];
528 let w_h = base.finalweights[i];
529 c[i] = if dmu.abs() <= ALO_DENOMINATOR_MIN || !dmu.is_finite() || !w_h.is_finite() {
530 f64::NAN
531 } else {
532 w_h / dmu
533 };
534 }
535 Some(c)
536 } else {
537 None
538 };
539
540 let inv_link_for_closure = base.likelihood.spec.link.clone();
541 let score_curvature_closure = canonical_scale.as_ref().map(|scale| {
542 move |i: usize, eta: f64| -> (f64, f64) {
543 let (mu, dmu) = crate::mixture_link::inverse_link_mu_d1_for_inverse_link(
544 &inv_link_for_closure,
545 eta,
546 )
547 .unwrap_or((f64::NAN, f64::NAN));
548 let c_i = scale[i];
549 (c_i * (mu - y[i]), c_i * dmu)
550 }
551 });
552 let score_curvature_ref: Option<&AloScalarScoreCurvature> = score_curvature_closure
553 .as_ref()
554 .map(|f| f as &AloScalarScoreCurvature);
555
556 let input = AloInput {
558 design: x_dense,
559 penalized_hessian: &h_dense_for_alo,
560 hessian_weights: base.final_weights_signed(),
561 score_weights: base.solve_weights_psd(),
562 working_response: &base.solveworking_response,
563 eta: &base.final_eta,
564 offset: &base.final_offset,
565 link,
566 phi,
567 penalty_root: if e.nrows() > 0 { Some(e) } else { None },
568 ridge,
569 score_curvature: score_curvature_ref,
570 };
571
572 let result = compute_alo_from_input_inner(&input)?;
573
574 log_leverage_diagnostics(&result.leverage, phi);
576
577 let has_nan_pred = result.eta_tilde.iter().any(|&x| x.is_nan());
579 let has_nan_se_bayes = result.se_bayes.iter().any(|&x| x.is_nan());
580 let has_nan_se_sandwich = result.se_sandwich.iter().any(|&x| x.is_nan());
581 let has_nan_leverage = result.leverage.iter().any(|&x| x.is_nan());
582
583 if has_nan_pred || has_nan_se_bayes || has_nan_se_sandwich || has_nan_leverage {
584 log::error!("[GAM ALO] NaN values found in ALO diagnostics:");
585 log::error!(
586 "[GAM ALO] eta_tilde: {} NaN values",
587 result.eta_tilde.iter().filter(|&&x| x.is_nan()).count()
588 );
589 log::error!(
590 "[GAM ALO] se_bayes: {} NaN values",
591 result.se_bayes.iter().filter(|&&x| x.is_nan()).count()
592 );
593 log::error!(
594 "[GAM ALO] se_sandwich: {} NaN values",
595 result.se_sandwich.iter().filter(|&&x| x.is_nan()).count()
596 );
597 log::error!(
598 "[GAM ALO] leverage: {} NaN values",
599 result.leverage.iter().filter(|&&x| x.is_nan()).count()
600 );
601 return Err(AloError::InfluenceMatrixFailed {
602 condition_number: f64::INFINITY,
603 });
604 }
605
606 Ok(result)
607}
608
609fn log_leverage_diagnostics(leverage: &Array1<f64>, phi: f64) {
611 let n = leverage.len();
612 if n == 0 {
613 return;
614 }
615
616 let mut invalid_count = 0usize;
617 let mut high_leverage_count = 0usize;
618 let mut threshold_counts = [0usize; LEVERAGE_RATE_THRESHOLDS.len()];
619 let mut finite_leverage = Vec::with_capacity(n);
620
621 for (obs, &ai) in leverage.iter().enumerate() {
622 if ai.is_finite() {
623 finite_leverage.push(ai);
624 }
625
626 if !(0.0..=1.0).contains(&ai) || !ai.is_finite() {
627 invalid_count += 1;
628 log::warn!("[GAM ALO] invalid leverage at i={}, a_ii={:.6e}", obs, ai);
629 } else if ai > LEVERAGE_HIGH_THRESHOLD {
630 high_leverage_count += 1;
631 if ai > LEVERAGE_VERY_HIGH_THRESHOLD {
632 log::warn!("[GAM ALO] very high leverage at i={}, a_ii={:.6e}", obs, ai);
633 }
634 }
635
636 for (idx, threshold) in LEVERAGE_RATE_THRESHOLDS.iter().enumerate() {
637 if ai > *threshold {
638 threshold_counts[idx] += 1;
639 }
640 }
641 }
642
643 if invalid_count > 0 || high_leverage_count > 0 {
644 log::warn!(
645 "[GAM ALO] leverage diagnostics: {} invalid values, {} high values (>0.99)",
646 invalid_count,
647 high_leverage_count
648 );
649 }
650
651 finite_leverage.sort_by(f64::total_cmp);
652
653 let finite_n = finite_leverage.len();
654 let a_mean = if finite_n > 0 {
655 finite_leverage.iter().copied().sum::<f64>() / finite_n as f64
656 } else {
657 0.0
658 };
659 let a_median = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[0]);
660 let a_p95 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[1]);
661 let a_p99 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[2]);
662 let a_max = finite_leverage.last().copied().unwrap_or(0.0);
663
664 log::info!(
673 "[GAM ALO] leverage: n={}, mean={:.3e}, median={:.3e}, p95={:.3e}, p99={:.3e}, max={:.3e}",
674 n,
675 a_mean,
676 a_median,
677 a_p95,
678 a_p99,
679 a_max
680 );
681 log::info!(
682 "[GAM ALO] high-leverage: a>0.90: {:.2}%, a>0.95: {:.2}%, a>0.99: {:.2}%, dispersion phi={:.3e}",
683 100.0 * (threshold_counts[0] as f64) / n as f64,
684 100.0 * (threshold_counts[1] as f64) / n as f64,
685 100.0 * (threshold_counts[2] as f64) / n as f64,
686 phi
687 );
688}
689
690pub struct AloInput<'a> {
697 pub design: &'a Array2<f64>,
699 pub penalized_hessian: &'a Array2<f64>,
701 pub hessian_weights: SignedWeightsView<'a>,
708 pub score_weights: PsdWeightsView<'a>,
711 pub working_response: &'a Array1<f64>,
713 pub eta: &'a Array1<f64>,
715 pub offset: &'a Array1<f64>,
717 pub link: LinkFunction,
719 pub phi: f64,
721 pub penalty_root: Option<&'a Array2<f64>>,
724 pub ridge: f64,
726 pub score_curvature: Option<&'a AloScalarScoreCurvature<'a>>,
739}
740
741impl<'a> AloInput<'a> {
742 pub fn from_geometry(
744 geom: &'a FitGeometry,
745 design: &'a Array2<f64>,
746 eta: &'a Array1<f64>,
747 offset: &'a Array1<f64>,
748 link: LinkFunction,
749 phi: f64,
750 ) -> Self {
751 let psd_w = PsdWeightsView::from_view_unchecked(geom.working_weights.view());
758 Self {
759 design,
760 penalized_hessian: &geom.penalized_hessian,
761 hessian_weights: psd_w.as_signed(),
762 score_weights: psd_w,
763 working_response: &geom.working_response,
764 eta,
765 offset,
766 link,
767 phi,
768 penalty_root: None,
769 ridge: 0.0,
770 score_curvature: None,
771 }
772 }
773}
774
775pub fn compute_alo_from_input(input: &AloInput) -> Result<AloDiagnostics, EstimationError> {
781 compute_alo_from_input_inner(input).map_err(EstimationError::from)
782}
783
784fn compute_alo_from_input_inner(input: &AloInput) -> Result<AloDiagnostics, AloError> {
785 let x_dense = input.design;
786 let n = x_dense.nrows();
787 let p = x_dense.ncols();
788 let w_h = input.hessian_weights.view();
792 let w_s = input.score_weights.view();
793
794 validate_alo_solve_setup(input, n, p)?;
795
796 let factor = StableSolver::new("alo penalized hessian")
797 .factorize(input.penalized_hessian)
798 .map_err(|_| AloError::InfluenceMatrixFailed {
799 condition_number: f64::INFINITY,
800 })?;
801
802 let xt = x_dense.t();
803 let phi = input.phi;
804
805 let mut aii = Array1::<f64>::zeros(n);
806 let mut x_hinv_x_diag = Array1::<f64>::zeros(n);
807 let mut se_bayes = Array1::<f64>::zeros(n);
808 let mut se_sandwich = Array1::<f64>::zeros(n);
809
810 let block_cols = ALO_RHS_BLOCK_COLS;
811 let mut rhs_chunk_buf = Array2::<f64>::zeros((p, block_cols).f());
816 let mut xs_chunk_storage = FaerMat::<f64>::zeros(n, block_cols);
823 let x_dense_view = FaerArrayView::new(x_dense);
824
825 for chunk_start in (0..n).step_by(block_cols) {
826 let chunk_end = (chunk_start + block_cols).min(n);
827 let width = chunk_end - chunk_start;
828
829 rhs_chunk_buf
830 .slice_mut(s![.., ..width])
831 .assign(&xt.slice(s![.., chunk_start..chunk_end]));
832
833 let rhs_chunkview = rhs_chunk_buf.slice(s![.., ..width]);
834 let rhs_chunk = FaerArrayView::new(&rhs_chunkview);
835 let s_chunk = factor.solve(rhs_chunk.as_ref());
839
840 let mut xs_target = xs_chunk_storage.as_mut().subcols_mut(0, width);
841 matmul(
842 xs_target.rb_mut(),
843 Accum::Replace,
844 x_dense_view.as_ref(),
845 s_chunk.as_ref(),
846 1.0,
847 Par::Seq,
848 );
849
850 let rhs_view = rhs_chunk_buf.slice(s![.., ..width]);
851
852 for local_col in 0..width {
853 let obs = chunk_start + local_col;
854 let rhs_col = rhs_view.column(local_col);
858 let rhs_slice = rhs_col.as_slice().expect("column-major col contiguous");
859 let s_slice = s_chunk.col_as_slice(local_col);
860
861 let mut x_hinv_x = 0.0f64;
862 for k in 0..p {
864 let sval = s_slice[k];
865 let xval = rhs_slice[k];
866 x_hinv_x = sval.mul_add(xval, x_hinv_x);
867 }
868 let ai = w_h[obs].max(0.0) * x_hinv_x;
869 aii[obs] = ai;
870 x_hinv_x_diag[obs] = x_hinv_x;
871
872 let var_bayes = bayesvar_eta(phi, x_hinv_x);
873 let xs_slice = xs_chunk_storage.col_as_slice(local_col);
874 let mut meat_quad = 0.0f64;
875 for row in 0..n {
876 let xs = xs_slice[row];
877 meat_quad += w_h[row] * xs * xs;
878 }
879 let var_sandwich = sandwichvar_eta_from_meat(phi, meat_quad);
880
881 if !var_bayes.is_finite() || !var_sandwich.is_finite() {
882 return Err(AloError::LooComputationFailed {
883 reason: format!(
884 "ALO variance is not finite at row {obs}: bayes={var_bayes:.6e}, sandwich={var_sandwich:.6e}"
885 ),
886 });
887 }
888 let bayes_tol = variance_negative_tolerance(phi * x_hinv_x.abs());
889 if var_bayes < -bayes_tol {
890 return Err(AloError::LooComputationFailed {
891 reason: format!(
892 "ALO Bayesian variance is materially negative at row {obs}: var={var_bayes:.6e}, tol={bayes_tol:.6e}"
893 ),
894 });
895 }
896 let sandwich_scale = phi * meat_quad.abs().max(x_hinv_x.abs());
897 let sandwich_tol = variance_negative_tolerance(sandwich_scale);
898 if var_sandwich < -sandwich_tol {
899 return Err(AloError::LooComputationFailed {
900 reason: format!(
901 "ALO sandwich variance is materially negative at row {obs}: var={var_sandwich:.6e}, tol={sandwich_tol:.6e}"
902 ),
903 });
904 }
905
906 se_bayes[obs] = var_bayes.max(0.0).sqrt();
907 se_sandwich[obs] = var_sandwich.max(0.0).sqrt();
908 }
909 }
910
911 let eta_hat = input.eta;
912 let z = input.working_response;
913 let offset = input.offset;
914
915 use rayon::prelude::*;
916 let eta_tilde_vec: Vec<f64> = (0..n)
917 .into_par_iter()
918 .map(|i| {
919 let denom_raw = 1.0 - aii[i];
920 if denom_raw <= ALO_DENOMINATOR_MIN || !denom_raw.is_finite() {
921 return Err(AloError::LooComputationFailed {
922 reason: format!(
923 "ALO denominator is too small at row {i}: a_ii={:.6e}, 1-a_ii={:.6e}, min={:.1e}",
924 aii[i], denom_raw, ALO_DENOMINATOR_MIN
925 ),
926 });
927 }
928 let one_step = alo_eta_updatewith_offset(
929 eta_hat[i],
930 z[i],
931 offset[i],
932 x_hinv_x_diag[i],
933 w_s[i],
934 denom_raw,
935 );
936 let v = if let Some(score_curvature) = input.score_curvature {
944 alo_eta_exact_frozen_curvature(
945 eta_hat[i],
946 x_hinv_x_diag[i],
947 &|eta| score_curvature(i, eta),
948 )
949 .map_err(|err| AloError::LooComputationFailed {
950 reason: format!(
951 "ALO exact frozen-curvature solve failed at row {i}: {err}"
952 ),
953 })?
954 } else {
955 one_step
956 };
957 if !v.is_finite() {
958 return Err(AloError::LooComputationFailed {
959 reason: format!("ALO eta_tilde is not finite at row {i}: eta_tilde={v}"),
960 });
961 }
962 Ok(v)
963 })
964 .collect::<Result<_, _>>()?;
965 let eta_tilde = Array1::from(eta_tilde_vec);
966
967 Ok(AloDiagnostics {
968 eta_tilde,
969 se_bayes,
970 se_sandwich,
971 pred_identity: eta_hat.clone(),
972 leverage: aii,
973 fisherweights: w_h.to_owned(),
974 })
975}
976
977fn validate_alo_solve_setup(input: &AloInput, n: usize, p: usize) -> Result<(), AloError> {
978 let h = input.penalized_hessian;
979 if h.nrows() != p || h.ncols() != p {
980 return Err(AloError::InvalidInput {
981 reason: format!(
982 "ALO diagnostics require a dense exact penalized Hessian with shape {p}x{p}; got {}x{}",
983 h.nrows(),
984 h.ncols()
985 ),
986 });
987 }
988 if h.iter().any(|v| !v.is_finite()) {
989 return Err(AloError::InvalidInput {
990 reason: "ALO diagnostics require a finite dense exact penalized Hessian".to_string(),
991 });
992 }
993 for i in 0..p {
994 for j in 0..i {
995 let a = h[[i, j]];
996 let b = h[[j, i]];
997 let scale = a.abs().max(b.abs()).max(1.0);
998 if (a - b).abs() > HESSIAN_SYMMETRY_REL_TOL * scale {
999 return Err(AloError::InvalidInput {
1000 reason: format!(
1001 "ALO diagnostics require a symmetric dense exact penalized Hessian; entries ({i},{j}) and ({j},{i}) differ by {:.3e}",
1002 (a - b).abs()
1003 ),
1004 });
1005 }
1006 }
1007 }
1008
1009 let vector_lengths = [
1010 ("hessian_weights", input.hessian_weights.len()),
1011 ("score_weights", input.score_weights.len()),
1012 ("working_response", input.working_response.len()),
1013 ("eta", input.eta.len()),
1014 ("offset", input.offset.len()),
1015 ];
1016 for (name, len) in vector_lengths {
1017 if len != n {
1018 return Err(AloError::InvalidInput {
1019 reason: format!("ALO diagnostics require {name} length {n}; got {len}"),
1020 });
1021 }
1022 }
1023 if input.hessian_weights.view().iter().any(|v| !v.is_finite()) {
1024 return Err(AloError::WeightInvalid {
1025 reason: "ALO diagnostics require finite Hessian-side weights".to_string(),
1026 });
1027 }
1028 if input.score_weights.view().iter().any(|v| !v.is_finite()) {
1029 return Err(AloError::WeightInvalid {
1030 reason: "ALO diagnostics require finite score-side weights".to_string(),
1031 });
1032 }
1033 if input.working_response.iter().any(|v| !v.is_finite()) {
1034 return Err(AloError::WeightInvalid {
1035 reason: "ALO diagnostics require finite working responses".to_string(),
1036 });
1037 }
1038 if input.eta.iter().any(|v| !v.is_finite()) || input.offset.iter().any(|v| !v.is_finite()) {
1039 return Err(AloError::InvalidInput {
1040 reason: "ALO diagnostics require finite linear predictors and offsets".to_string(),
1041 });
1042 }
1043 if !input.phi.is_finite() || input.phi <= 0.0 {
1044 return Err(AloError::InvalidInput {
1045 reason: format!(
1046 "ALO diagnostics require positive finite dispersion phi; got {}",
1047 input.phi
1048 ),
1049 });
1050 }
1051 if !input.ridge.is_finite() || input.ridge < 0.0 {
1052 return Err(AloError::InvalidInput {
1053 reason: format!(
1054 "ALO diagnostics require a finite non-negative Hessian ridge; got {}",
1055 input.ridge
1056 ),
1057 });
1058 }
1059 if let Some(e) = input.penalty_root {
1060 if e.ncols() != p {
1061 return Err(AloError::InvalidInput {
1062 reason: format!(
1063 "ALO diagnostics require penalty root to have {p} columns; got {}",
1064 e.ncols()
1065 ),
1066 });
1067 }
1068 if e.iter().any(|v| !v.is_finite()) {
1069 return Err(AloError::InvalidInput {
1070 reason: "ALO diagnostics require finite penalty-root entries".to_string(),
1071 });
1072 }
1073 }
1074 Ok(())
1075}
1076
1077pub fn compute_alo_diagnostics_from_fit(
1079 fit: &UnifiedFitResult,
1080 y: ArrayView1<f64>,
1081 link: LinkFunction,
1082) -> Result<AloDiagnostics, EstimationError> {
1083 let pirls = fit
1084 .artifacts
1085 .pirls
1086 .as_ref()
1087 .ok_or_else(|| AloError::InvalidInput {
1088 reason:
1089 "ALO diagnostics require a PIRLS-backed fit; this fit does not expose PIRLS geometry"
1090 .to_string(),
1091 })
1092 .map_err(EstimationError::from)?;
1093 compute_alo_diagnostics_from_pirls_impl(pirls, y, link)
1094}
1095
1096pub fn compute_alo_diagnostics_from_unified(
1102 unified: &UnifiedFitResult,
1103 design: &Array2<f64>,
1104 eta: &Array1<f64>,
1105 offset: &Array1<f64>,
1106 link: LinkFunction,
1107 phi: f64,
1108) -> Result<AloDiagnostics, EstimationError> {
1109 let geom = unified
1110 .geometry
1111 .as_ref()
1112 .ok_or_else(|| AloError::InvalidInput {
1113 reason: "UnifiedFitResult does not contain working-set geometry; \
1114 ALO diagnostics require geometry at convergence"
1115 .to_string(),
1116 })
1117 .map_err(EstimationError::from)?;
1118 let input = AloInput::from_geometry(geom, design, eta, offset, link, phi);
1119 compute_alo_from_input(&input)
1120}
1121
1122pub fn compute_alo_diagnostics_from_pirls(
1124 base: &pirls::PirlsResult,
1125 y: ArrayView1<f64>,
1126 link: LinkFunction,
1127) -> Result<AloDiagnostics, EstimationError> {
1128 compute_alo_diagnostics_from_pirls_impl(base, y, link)
1129}
1130
1131pub fn compute_case_deletion_from_pirls(
1150 base: &pirls::PirlsResult,
1151 y: ArrayView1<f64>,
1152 link: LinkFunction,
1153) -> Result<Option<crate::sensitivity::CaseDeletionInfluence>, EstimationError> {
1154 let x_dense_arc = base
1155 .x_transformed
1156 .try_to_dense_arc("case-deletion diagnostics require dense transformed design")
1157 .map_err(|reason| EstimationError::InvalidInput(reason))?;
1158 let x_dense = x_dense_arc.as_ref();
1159 let n = x_dense.nrows();
1160 let p = x_dense.ncols();
1161 if n == 0 || p == 0 {
1162 return Ok(None);
1163 }
1164
1165 let phi = match link {
1168 LinkFunction::Identity => {
1169 use rayon::iter::{IntoParallelIterator, ParallelIterator};
1170 let rss: f64 = (0..n)
1171 .into_par_iter()
1172 .map(|i| {
1173 let r = y[i] - base.finalmu[i];
1174 base.finalweights[i] * r * r
1175 })
1176 .sum();
1177 let dof = (n as f64) - base.edf;
1178 rss / dof.max(1.0)
1179 }
1180 _ => 1.0,
1181 };
1182 if !(phi.is_finite() && phi > 0.0) {
1183 return Ok(None);
1184 }
1185
1186 let h_dense = base
1189 .dense_stabilizedhessian_transformed(
1190 "case-deletion diagnostics require exact dense stabilized penalized Hessian",
1191 )
1192 .map_err(|e| match e {
1193 EstimationError::InvalidInput(reason) => EstimationError::InvalidInput(reason),
1194 other => EstimationError::InvalidInput(format!("{other:?}")),
1195 })?;
1196
1197 let factor = match h_dense.cholesky(faer::Side::Lower) {
1198 Ok(f) => f,
1199 Err(_) => return Ok(None),
1203 };
1204
1205 let working_weights = base.finalweights.clone();
1209 let working_residual = &base.solveworking_response - &base.final_eta;
1210
1211 let sensitivity = crate::sensitivity::FitSensitivity::from_faer_cholesky(&factor, p);
1212 Ok(sensitivity.case_deletion(
1213 x_dense,
1214 working_weights.view(),
1215 working_residual.view(),
1216 phi,
1217 ))
1218}
1219
1220#[derive(Debug, Clone)]
1224pub struct MultiBlockAloDiagnostics {
1225 pub eta_tilde: Vec<Array1<f64>>,
1228 pub leverage: Array1<f64>,
1230 pub alo_variance: Vec<Array1<f64>>,
1235 pub cook_distance: Array1<f64>,
1238}
1239
1240pub struct MultiBlockAloInput<'a> {
1270 pub n_obs: usize,
1272 pub n_blocks: usize,
1274 pub block_designs: &'a [Array2<f64>],
1277 pub penalized_hessian_inv: &'a Array2<f64>,
1279 pub block_weights: Vec<Array2<f64>>,
1281 pub scores: Vec<Array1<f64>>,
1284 pub eta_hat: Vec<Array1<f64>>,
1287}
1288
1289pub fn compute_multiblock_alo(
1308 input: &MultiBlockAloInput,
1309) -> Result<MultiBlockAloDiagnostics, EstimationError> {
1310 compute_multiblock_alo_inner(input).map_err(EstimationError::from)
1311}
1312
1313fn compute_multiblock_alo_inner(
1314 input: &MultiBlockAloInput,
1315) -> Result<MultiBlockAloDiagnostics, AloError> {
1316 use rayon::prelude::*;
1317
1318 let n = input.n_obs;
1319 let b = input.n_blocks;
1320 let p_tot = input.penalized_hessian_inv.nrows();
1321
1322 if input.block_designs.len() != b {
1324 return Err(AloError::InvalidInput {
1325 reason: format!(
1326 "MultiBlockAloInput: expected {} block designs, got {}",
1327 b,
1328 input.block_designs.len()
1329 ),
1330 });
1331 }
1332
1333 let col_sum: usize = input.block_designs.iter().map(|d| d.ncols()).sum();
1335 if col_sum != p_tot {
1336 return Err(AloError::InvalidInput {
1337 reason: format!(
1338 "MultiBlockAloInput: total design columns ({}) != penalized_hessian_inv size ({})",
1339 col_sum, p_tot
1340 ),
1341 });
1342 }
1343
1344 let col_offsets = multiblock_col_offsets(input.block_designs);
1345 let (chunk_size, max_concurrent_chunks) = multiblock_alo_parallel_plan(p_tot, b, n);
1346 let chunk_starts: Vec<usize> = (0..n).step_by(chunk_size).collect();
1347
1348 let mut chunk_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> =
1354 Vec::with_capacity(chunk_starts.len());
1355 for chunk_wave in chunk_starts.chunks(max_concurrent_chunks) {
1356 let mut wave_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> = chunk_wave
1357 .par_iter()
1358 .map_init(
1359 || MultiBlockAloScratch::new(b),
1360 |scratch, &chunk_start| {
1361 let chunk_end = (chunk_start + chunk_size).min(n);
1362 compute_multiblock_alo_chunk(
1363 input,
1364 &col_offsets,
1365 chunk_start,
1366 chunk_end,
1367 scratch,
1368 )
1369 },
1370 )
1371 .collect();
1372 chunk_results.append(&mut wave_results);
1373 }
1374
1375 let mut eta_tilde = Vec::with_capacity(n);
1376 let mut leverage = Array1::<f64>::zeros(n);
1377 let mut alo_variance = Vec::with_capacity(n);
1378 let mut cook_distance = Array1::<f64>::zeros(n);
1379
1380 let mut chunks = Vec::with_capacity(chunk_results.len());
1381 for result in chunk_results {
1382 chunks.push(result?);
1383 }
1384 chunks.sort_unstable_by_key(|chunk| chunk.chunk_start);
1385
1386 for chunk in chunks {
1387 let chunk_start = chunk.chunk_start;
1388 eta_tilde.extend(chunk.eta_tilde);
1389 alo_variance.extend(chunk.alo_variance);
1390 for (local_i, lev) in chunk.leverage.into_iter().enumerate() {
1391 leverage[chunk_start + local_i] = lev;
1392 }
1393 for (local_i, cook) in chunk.cook_distance.into_iter().enumerate() {
1394 cook_distance[chunk_start + local_i] = cook;
1395 }
1396 }
1397
1398 Ok(MultiBlockAloDiagnostics {
1399 eta_tilde,
1400 leverage,
1401 alo_variance,
1402 cook_distance,
1403 })
1404}
1405
1406#[inline]
1407fn multiblock_alo_parallel_plan(p_tot: usize, n_blocks: usize, n_obs: usize) -> (usize, usize) {
1408 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
1409 return (1, 1);
1410 }
1411 let bytes_per_obs = (p_tot * n_blocks * std::mem::size_of::<f64>()).max(1);
1412 let workers = rayon::current_num_threads().max(1);
1413 let max_concurrent_chunks = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / bytes_per_obs)
1414 .max(1)
1415 .min(workers);
1416 let per_worker_budget =
1417 (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / max_concurrent_chunks).max(bytes_per_obs);
1418 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
1419 (budget_obs.min(n_obs), max_concurrent_chunks)
1420}
1421
1422struct MultiBlockAloScratch {
1423 a_i: Vec<f64>,
1424 wa: Vec<f64>,
1425 aw: Vec<f64>,
1426 imwa: Vec<f64>,
1427 imaw: Vec<f64>,
1428 perm_imwa: Vec<usize>,
1429 perm_imaw: Vec<usize>,
1430 delta_eta: Vec<f64>,
1431 rhs_buf: Vec<f64>,
1432 w_u: Vec<f64>,
1433 var_diag_buf: Vec<f64>,
1434 w_flat: Vec<f64>,
1435 lu_scratch: Vec<f64>,
1436}
1437
1438impl MultiBlockAloScratch {
1439 fn new(b: usize) -> Self {
1440 let bb_sz = b * b;
1441 Self {
1442 a_i: vec![0.0f64; bb_sz],
1443 wa: vec![0.0f64; bb_sz],
1444 aw: vec![0.0f64; bb_sz],
1445 imwa: vec![0.0f64; bb_sz],
1446 imaw: vec![0.0f64; bb_sz],
1447 perm_imwa: vec![0usize; b],
1448 perm_imaw: vec![0usize; b],
1449 delta_eta: vec![0.0f64; b],
1450 rhs_buf: vec![0.0f64; b],
1451 w_u: vec![0.0f64; b],
1452 var_diag_buf: vec![0.0f64; b],
1453 w_flat: vec![0.0f64; bb_sz],
1454 lu_scratch: vec![0.0f64; b],
1455 }
1456 }
1457}
1458
1459struct MultiBlockAloChunkDiagnostics {
1460 chunk_start: usize,
1461 eta_tilde: Vec<Array1<f64>>,
1462 leverage: Vec<f64>,
1463 alo_variance: Vec<Array1<f64>>,
1464 cook_distance: Vec<f64>,
1465}
1466
1467fn compute_multiblock_alo_chunk(
1468 input: &MultiBlockAloInput,
1469 col_offsets: &[usize],
1470 chunk_start: usize,
1471 chunk_end: usize,
1472 scratch: &mut MultiBlockAloScratch,
1473) -> Result<MultiBlockAloChunkDiagnostics, AloError> {
1474 let b = input.n_blocks;
1475 let chunk_len = chunk_end - chunk_start;
1476
1477 let mut q_blocks = Vec::with_capacity(b);
1478 for blk in 0..b {
1479 let x_chunk_t = input.block_designs[blk]
1480 .slice(s![chunk_start..chunk_end, ..])
1481 .t()
1482 .to_owned();
1483 let off_b = col_offsets[blk];
1484 let h_slice = input
1485 .penalized_hessian_inv
1486 .slice(s![.., off_b..off_b + x_chunk_t.nrows()])
1487 .to_owned();
1488 q_blocks.push(h_slice.dot(&x_chunk_t));
1489 }
1490
1491 let mut eta_tilde = Vec::with_capacity(chunk_len);
1492 let mut leverage = vec![0.0f64; chunk_len];
1493 let mut alo_variance = Vec::with_capacity(chunk_len);
1494 let mut cook_distance = vec![0.0f64; chunk_len];
1495
1496 for local_i in 0..chunk_len {
1497 let i = chunk_start + local_i;
1498 let w_i = &input.block_weights[i];
1499
1500 for r in 0..b {
1502 for c in 0..b {
1503 scratch.w_flat[r * b + c] = w_i[(r, c)];
1504 }
1505 }
1506
1507 for a in 0..b {
1509 let x_a = &input.block_designs[a];
1510 let p_a = x_a.ncols();
1511 let off_a = col_offsets[a];
1512 let xa_row = x_a.row(i);
1513 for bb in 0..b {
1514 let q_bb = &q_blocks[bb];
1515 let mut dot = 0.0f64;
1516 for k in 0..p_a {
1517 dot += xa_row[k] * q_bb[(off_a + k, local_i)];
1518 }
1519 scratch.a_i[a * b + bb] = dot;
1520 }
1521 }
1522
1523 mat_mul_flat(&scratch.w_flat, &scratch.a_i, &mut scratch.wa, b);
1525 mat_mul_flat(&scratch.a_i, &scratch.w_flat, &mut scratch.aw, b);
1527
1528 let mut tr = 0.0f64;
1531 for d in 0..b {
1532 tr += scratch.aw[d * b + d];
1533 }
1534 leverage[local_i] = tr;
1535
1536 for r in 0..b {
1538 for c in 0..b {
1539 let idx = r * b + c;
1540 let id = if r == c { 1.0 } else { 0.0 };
1541 scratch.imwa[idx] = id - scratch.wa[idx];
1542 scratch.imaw[idx] = id - scratch.aw[idx];
1543 }
1544 }
1545
1546 if !lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b) {
1552 for r in 0..b {
1553 for c in 0..b {
1554 let idx = r * b + c;
1555 let id = if r == c { 1.0 } else { 0.0 };
1556 scratch.imwa[idx] = id - scratch.wa[idx];
1557 }
1558 }
1559 for d in 0..b {
1560 scratch.imwa[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1561 }
1562 let refactored = lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b);
1563 assert!(
1564 refactored,
1565 "ALO local block remained singular after ridge regularization"
1566 );
1567 }
1568 if !lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b) {
1569 for r in 0..b {
1570 for c in 0..b {
1571 let idx = r * b + c;
1572 let id = if r == c { 1.0 } else { 0.0 };
1573 scratch.imaw[idx] = id - scratch.aw[idx];
1574 }
1575 }
1576 for d in 0..b {
1577 scratch.imaw[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1578 }
1579 let refactored = lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b);
1580 assert!(
1581 refactored,
1582 "ALO local variance block remained singular after ridge regularization"
1583 );
1584 }
1585
1586 let s_i = &input.scores[i];
1588 for k in 0..b {
1589 scratch.rhs_buf[k] = s_i[k];
1590 }
1591 lu_solve_in_place(
1592 &scratch.imwa,
1593 &scratch.perm_imwa,
1594 &mut scratch.rhs_buf,
1595 &mut scratch.lu_scratch,
1596 b,
1597 );
1598 for r in 0..b {
1600 let mut acc = 0.0f64;
1601 let row_off = r * b;
1602 for k in 0..b {
1603 acc += scratch.a_i[row_off + k] * scratch.rhs_buf[k];
1604 }
1605 scratch.delta_eta[r] = acc;
1606 }
1607
1608 let eta_i = &input.eta_hat[i];
1609 let mut corrected = Array1::<f64>::zeros(b);
1610 for d in 0..b {
1611 corrected[d] = eta_i[d] + scratch.delta_eta[d];
1612 }
1613 eta_tilde.push(corrected);
1614
1615 let mut cook = 0.0f64;
1617 for r in 0..b {
1618 let mut w_delta_r = 0.0f64;
1619 let row_off = r * b;
1620 for k in 0..b {
1621 w_delta_r += scratch.w_flat[row_off + k] * scratch.delta_eta[k];
1622 }
1623 cook += scratch.delta_eta[r] * w_delta_r;
1624 }
1625 cook_distance[local_i] = cook;
1626
1627 for d in 0..b {
1633 let row_off = d * b;
1634 for k in 0..b {
1636 scratch.rhs_buf[k] = scratch.a_i[row_off + k];
1637 }
1638 lu_solve_in_place(
1639 &scratch.imaw,
1640 &scratch.perm_imaw,
1641 &mut scratch.rhs_buf,
1642 &mut scratch.lu_scratch,
1643 b,
1644 );
1645 for r in 0..b {
1647 let mut acc = 0.0f64;
1648 let wr = r * b;
1649 for k in 0..b {
1650 acc += scratch.w_flat[wr + k] * scratch.rhs_buf[k];
1651 }
1652 scratch.w_u[r] = acc;
1653 }
1654 lu_solve_in_place(
1656 &scratch.imwa,
1657 &scratch.perm_imwa,
1658 &mut scratch.w_u,
1659 &mut scratch.lu_scratch,
1660 b,
1661 );
1662 let mut v_dd = 0.0f64;
1664 for k in 0..b {
1665 v_dd += scratch.a_i[row_off + k] * scratch.w_u[k];
1666 }
1667 scratch.var_diag_buf[d] = v_dd.max(0.0);
1668 }
1669 let mut var_diag = Array1::<f64>::zeros(b);
1670 for d in 0..b {
1671 var_diag[d] = scratch.var_diag_buf[d];
1672 }
1673 alo_variance.push(var_diag);
1674 }
1675
1676 Ok(MultiBlockAloChunkDiagnostics {
1677 chunk_start,
1678 eta_tilde,
1679 leverage,
1680 alo_variance,
1681 cook_distance,
1682 })
1683}
1684
1685#[inline]
1687fn mat_mul_flat(a: &[f64], b_mat: &[f64], out: &mut [f64], b: usize) {
1688 for r in 0..b {
1689 let ar = r * b;
1690 let or = r * b;
1691 for c in 0..b {
1692 let mut acc = 0.0f64;
1693 for k in 0..b {
1694 acc += a[ar + k] * b_mat[k * b + c];
1695 }
1696 out[or + c] = acc;
1697 }
1698 }
1699}
1700
1701fn lu_factor_in_place(m: &mut [f64], perm: &mut [usize], b: usize) -> bool {
1708 for i in 0..b {
1709 perm[i] = i;
1710 }
1711 for col in 0..b {
1712 let mut max_val = m[col * b + col].abs();
1714 let mut max_idx = col;
1715 for row in (col + 1)..b {
1716 let v = m[row * b + col].abs();
1717 if v > max_val {
1718 max_val = v;
1719 max_idx = row;
1720 }
1721 }
1722 if max_val < LU_PIVOT_SINGULAR_TOL {
1723 return false;
1724 }
1725 if max_idx != col {
1726 for k in 0..b {
1728 m.swap(col * b + k, max_idx * b + k);
1729 }
1730 perm.swap(col, max_idx);
1731 }
1732 let pivot = m[col * b + col];
1733 for row in (col + 1)..b {
1734 let factor = m[row * b + col] / pivot;
1735 m[row * b + col] = factor; for k in (col + 1)..b {
1737 let upd = factor * m[col * b + k];
1738 m[row * b + k] -= upd;
1739 }
1740 }
1741 }
1742 true
1743}
1744
1745fn lu_solve_in_place(m: &[f64], perm: &[usize], rhs: &mut [f64], scratch: &mut [f64], b: usize) {
1748 let y = &mut scratch[..b];
1750 for row in 0..b {
1751 let mut s = rhs[perm[row]];
1752 for k in 0..row {
1753 s -= m[row * b + k] * y[k];
1754 }
1755 y[row] = s;
1756 }
1757 for row in (0..b).rev() {
1759 let mut s = y[row];
1760 for k in (row + 1)..b {
1761 s -= m[row * b + k] * rhs[k];
1762 }
1763 rhs[row] = s / m[row * b + row];
1764 }
1765}
1766
1767pub fn compute_multiblock_alo_leverages(
1775 n_obs: usize,
1776 n_blocks: usize,
1777 block_designs: &[Array2<f64>],
1778 penalized_hessian_inv: &Array2<f64>,
1779 block_weights: &[Array2<f64>],
1780) -> Result<Array1<f64>, EstimationError> {
1781 use rayon::prelude::*;
1782
1783 let n = n_obs;
1784 let b = n_blocks;
1785 let p_tot = penalized_hessian_inv.nrows();
1786
1787 let col_offsets = multiblock_col_offsets(block_designs);
1788 let max_workers = rayon::current_num_threads();
1789 let chunk_size = multiblock_alo_parallel_leverage_chunk_size(p_tot, b, n, max_workers);
1790
1791 let mut leverage = Array1::<f64>::zeros(n);
1792
1793 let block_widths: Vec<usize> = block_designs.iter().map(|d| d.ncols()).collect();
1797 let mut h_stripes: Vec<FaerMat<f64>> = block_widths
1798 .iter()
1799 .map(|&p_blk| FaerMat::<f64>::zeros(p_tot, p_blk))
1800 .collect();
1801 for blk in 0..b {
1804 let off_b = col_offsets[blk];
1805 let p_blk = block_widths[blk];
1806 let stripe = &mut h_stripes[blk];
1807 for c in 0..p_blk {
1808 for r in 0..p_tot {
1809 stripe[(r, c)] = penalized_hessian_inv[(r, off_b + c)];
1810 }
1811 }
1812 }
1813
1814 leverage
1815 .as_slice_mut()
1816 .expect("newly allocated Array1 is contiguous")
1817 .par_chunks_mut(chunk_size)
1818 .enumerate()
1819 .for_each(|(chunk_idx, leverage_chunk)| {
1820 let chunk_start = chunk_idx * chunk_size;
1821 let chunk_len = leverage_chunk.len();
1822 let chunk_end = chunk_start + chunk_len;
1823
1824 let bb_sz = b * b;
1828 let mut a_i = vec![0.0f64; bb_sz];
1829 let mut aw = vec![0.0f64; bb_sz];
1830 let mut w_flat = vec![0.0f64; bb_sz];
1831
1832 let mut q_storage: Vec<FaerMat<f64>> = block_widths
1836 .iter()
1837 .map(|_| FaerMat::<f64>::zeros(p_tot, chunk_len))
1838 .collect();
1839
1840 let mut xt_storage: Vec<FaerMat<f64>> = block_widths
1844 .iter()
1845 .map(|&p_blk| FaerMat::<f64>::zeros(p_blk, chunk_len))
1846 .collect();
1847
1848 for blk in 0..b {
1853 let p_blk = block_widths[blk];
1854
1855 let x_chunk = block_designs[blk].slice(s![chunk_start..chunk_end, ..]);
1856 let xt = &mut xt_storage[blk];
1857 for local_i in 0..chunk_len {
1858 let row = x_chunk.row(local_i);
1859 for j in 0..p_blk {
1860 xt[(j, local_i)] = row[j];
1861 }
1862 }
1863
1864 matmul(
1865 q_storage[blk].as_mut(),
1866 Accum::Replace,
1867 h_stripes[blk].as_ref(),
1868 xt_storage[blk].as_ref(),
1869 1.0,
1870 Par::Seq,
1871 );
1872 }
1873
1874 for local_i in 0..chunk_len {
1875 let i = chunk_start + local_i;
1876 let w_i = &block_weights[i];
1877
1878 for r in 0..b {
1880 for c in 0..b {
1881 w_flat[r * b + c] = w_i[(r, c)];
1882 }
1883 }
1884
1885 for r in 0..bb_sz {
1889 a_i[r] = 0.0;
1890 }
1891 for k in 0..b {
1892 let q_k = &q_storage[k];
1893 let q_col = q_k.col_as_slice(local_i);
1894 for a in 0..b {
1895 let p_a = block_widths[a];
1896 let off_a = col_offsets[a];
1897 let xa_row = block_designs[a].row(i);
1898 let mut dot = 0.0f64;
1899 for j in 0..p_a {
1900 dot = xa_row[j].mul_add(q_col[off_a + j], dot);
1901 }
1902 a_i[a * b + k] = dot;
1903 }
1904 }
1905
1906 mat_mul_flat(&a_i, &w_flat, &mut aw, b);
1908 let mut tr = 0.0f64;
1909 for d in 0..b {
1910 tr += aw[d * b + d];
1911 }
1912 leverage_chunk[local_i] = tr;
1913 }
1914 });
1915
1916 Ok(leverage)
1917}
1918
1919#[cfg(test)]
1923mod tests {
1924 use super::{
1925 ALO_EXACT_SCALAR_MAX_ITERS, AloExactScalarError, AloInput, alo_eta_exact_frozen_curvature,
1926 alo_eta_updatewith_offset, bayesvar_eta, compute_alo_from_input_inner,
1927 percentile_from_sorted, percentile_index, sandwichvar_eta_from_meat,
1928 };
1929 use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
1930 use gam_problem::LinkFunction;
1931
1932 #[test]
1933 fn alo_offset_update_matches_centered_algebra() {
1934 let eta_hat = 11.0;
1935 let z = 13.0;
1936 let offset = 10.0;
1937 let x_hinv_x = 0.2;
1938 let hessian_weight = 1.0;
1939 let score_weight = 1.0;
1940 let leverage = hessian_weight * x_hinv_x;
1942 let expected = offset + ((eta_hat - offset) - leverage * (z - offset)) / (1.0 - leverage);
1943 let got =
1944 alo_eta_updatewith_offset(eta_hat, z, offset, x_hinv_x, score_weight, 1.0 - leverage);
1945 assert!((got - expected).abs() < 1e-12);
1946 }
1947
1948 #[test]
1949 fn alo_offset_update_reduces_to_classicwhen_offsetzero() {
1950 let eta_hat = 1.25;
1951 let z = -0.5;
1952 let x_hinv_x = 0.35;
1953 let hessian_weight = 1.0;
1954 let score_weight = 1.0;
1955 let leverage = hessian_weight * x_hinv_x;
1956 let expected = (eta_hat - leverage * z) / (1.0 - leverage);
1957 let got =
1958 alo_eta_updatewith_offset(eta_hat, z, 0.0, x_hinv_x, score_weight, 1.0 - leverage);
1959 assert!((got - expected).abs() < 1e-12);
1960 }
1961
1962 #[test]
1963 fn alo_offset_update_uses_distinct_score_and_hessian_weights() {
1964 let eta_hat = 1.7;
1965 let z = 0.4;
1966 let offset = -0.2;
1967 let x_hinv_x = 0.15;
1968 let hessian_weight = 3.0;
1969 let score_weight = 5.0;
1970 let expected = offset
1971 + (eta_hat - offset)
1972 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset))
1973 / (1.0 - hessian_weight * x_hinv_x);
1974 let got = alo_eta_updatewith_offset(
1975 eta_hat,
1976 z,
1977 offset,
1978 x_hinv_x,
1979 score_weight,
1980 1.0 - hessian_weight * x_hinv_x,
1981 );
1982 assert!((got - expected).abs() < 1e-12);
1983 }
1984
1985 #[test]
1986 fn alo_offset_update_handles_zero_hessian_weight() {
1987 let eta_hat = 0.8;
1988 let z = -0.3;
1989 let offset = 0.1;
1990 let x_hinv_x = 0.4;
1991 let hessian_weight = 0.0;
1992 let score_weight = 2.5;
1993 let expected = offset
1994 + (eta_hat - offset)
1995 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset));
1996 let got = alo_eta_updatewith_offset(
1997 eta_hat,
1998 z,
1999 offset,
2000 x_hinv_x,
2001 score_weight,
2002 1.0 - hessian_weight * x_hinv_x,
2003 );
2004 assert!((got - expected).abs() < 1e-12);
2005 }
2006
2007 #[test]
2008 fn alo_exact_frozen_curvature_converges_to_fixed_point() {
2009 let eta_hat = 1.0;
2010 let a_ii = 0.4;
2011 let got = alo_eta_exact_frozen_curvature(eta_hat, a_ii, &|eta| (0.5 * (eta - 2.0), 0.5))
2012 .expect("linear scalar fixed point should converge in one Newton step");
2013 assert!((got - 0.75).abs() < 1e-12);
2014 }
2015
2016 #[test]
2017 fn alo_exact_frozen_curvature_reports_nonconvergence() {
2018 let err = alo_eta_exact_frozen_curvature(0.0, 1.0, &|eta| (eta + 1.0, 0.0))
2019 .expect_err("constant residual should exhaust the scalar iteration budget");
2020 let AloExactScalarError::MaxIterations { iterations, .. } = err else {
2021 panic!("constant residual must report MaxIterations, got {err:?}");
2022 };
2023 assert_eq!(
2024 iterations, ALO_EXACT_SCALAR_MAX_ITERS,
2025 "non-convergence must report the full scalar iteration budget"
2026 );
2027 }
2028
2029 #[test]
2030 fn alo_input_reports_exact_scalar_nonconvergence_with_row_context() {
2031 let design = Array2::from_elem((1, 1), 1.0);
2032 let penalized_hessian = Array2::from_elem((1, 1), 1.0);
2033 let hessian_weights = Array1::from_vec(vec![0.0]);
2034 let score_weights = Array1::from_vec(vec![0.0]);
2035 let working_response = Array1::from_vec(vec![0.0]);
2036 let eta = Array1::from_vec(vec![0.0]);
2037 let offset = Array1::from_vec(vec![0.0]);
2038 let score_curvature = |_: usize, eta: f64| (eta + 1.0, 0.0);
2039 let input = AloInput {
2040 design: &design,
2041 penalized_hessian: &penalized_hessian,
2042 hessian_weights: SignedWeightsView::from_array(&hessian_weights),
2043 score_weights: PsdWeightsView::try_from_array(&score_weights).expect("psd weights"),
2044 working_response: &working_response,
2045 eta: &eta,
2046 offset: &offset,
2047 link: LinkFunction::Logit,
2048 phi: 1.0,
2049 penalty_root: None,
2050 ridge: 0.0,
2051 score_curvature: Some(&score_curvature),
2052 };
2053
2054 let err =
2055 compute_alo_from_input_inner(&input).expect_err("non-converged exact ALO must error");
2056 let msg = err.to_string();
2057 assert!(
2058 msg.contains("ALO exact frozen-curvature solve failed at row 0"),
2059 "missing row context in exact ALO error: {msg}"
2060 );
2061 assert!(
2062 msg.contains("did not converge within"),
2063 "missing non-convergence cause in exact ALO error: {msg}"
2064 );
2065 }
2066
2067 #[test]
2068 fn gaussian_unpenalized_direct_sandwich_equals_bayes() {
2069 let phi = 2.5;
2072 let x_hinv_x = 0.3;
2073 let vb = bayesvar_eta(phi, x_hinv_x);
2074 let vs = sandwichvar_eta_from_meat(phi, x_hinv_x);
2075 assert!((vb - vs).abs() < 1e-12);
2076 }
2077
2078 #[test]
2079 fn sandwich_from_direct_meat_scales_by_phi() {
2080 let phi = 1.7;
2081 let meat_quad = 0.358;
2082 let got = sandwichvar_eta_from_meat(phi, meat_quad);
2083 let expected = phi * meat_quad;
2084 assert!((got - expected).abs() < 1e-12);
2085 }
2086
2087 #[test]
2088 fn percentile_index_matches_expected_rounding() {
2089 assert_eq!(percentile_index(0, 0.95), 0);
2090 assert_eq!(percentile_index(1, 0.95), 0);
2091 assert_eq!(percentile_index(10, 0.50), 5);
2092 assert_eq!(percentile_index(10, 0.95), 9);
2093 }
2094
2095 #[test]
2096 fn percentile_from_sorted_returns_order_statistic() {
2097 let values = [1.0, 2.0, 3.0, 4.0, 5.0];
2098 assert_eq!(percentile_from_sorted(&values, 0.50), 3.0);
2099 assert_eq!(percentile_from_sorted(&values, 0.95), 5.0);
2100 assert_eq!(percentile_from_sorted(&[], 0.95), 0.0);
2101 }
2102
2103 use super::{MultiBlockAloInput, compute_multiblock_alo, compute_multiblock_alo_leverages};
2106 use ndarray::{Array1, Array2};
2107
2108 #[test]
2109 fn multiblock_b1_matches_scalar_leverage() {
2110 let n = 3;
2113 let p = 2;
2114 let x = Array2::from_shape_vec((n, p), vec![1.0, 0.5, 0.8, -0.3, 0.2, 1.1]).unwrap();
2115 let w = [1.0, 2.0, 0.5];
2117 let mut h = Array2::<f64>::eye(p);
2118 for i in 0..n {
2119 for r in 0..p {
2120 for c in 0..p {
2121 h[(r, c)] += w[i] * x[(i, r)] * x[(i, c)];
2122 }
2123 }
2124 }
2125 let det = h[(0, 0)] * h[(1, 1)] - h[(0, 1)] * h[(1, 0)];
2127 let mut h_inv = Array2::<f64>::zeros((p, p));
2128 h_inv[(0, 0)] = h[(1, 1)] / det;
2129 h_inv[(1, 1)] = h[(0, 0)] / det;
2130 h_inv[(0, 1)] = -h[(0, 1)] / det;
2131 h_inv[(1, 0)] = -h[(1, 0)] / det;
2132
2133 let mut scalar_lev = vec![0.0f64; n];
2135 for i in 0..n {
2136 let mut xhx = 0.0;
2137 for r in 0..p {
2138 for c in 0..p {
2139 xhx += x[(i, r)] * h_inv[(r, c)] * x[(i, c)];
2140 }
2141 }
2142 scalar_lev[i] = w[i] * xhx;
2143 }
2144
2145 let block_designs = vec![x.clone()];
2147 let block_weights: Vec<Array2<f64>> =
2148 w.iter().map(|&wi| Array2::from_elem((1, 1), wi)).collect();
2149 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.1])).collect();
2150 let eta_hat: Vec<Array1<f64>> = (0..n).map(|i| Array1::from_vec(vec![i as f64])).collect();
2151
2152 let input = MultiBlockAloInput {
2153 n_obs: n,
2154 n_blocks: 1,
2155 block_designs: &block_designs,
2156 penalized_hessian_inv: &h_inv,
2157 block_weights,
2158 scores,
2159 eta_hat,
2160 };
2161
2162 let result = compute_multiblock_alo(&input).unwrap();
2163 for i in 0..n {
2164 assert!(
2165 (result.leverage[i] - scalar_lev[i]).abs() < 1e-10,
2166 "leverage mismatch at i={}: got {}, expected {}",
2167 i,
2168 result.leverage[i],
2169 scalar_lev[i]
2170 );
2171 }
2172 }
2173
2174 #[test]
2175 fn multiblock_leverage_only_matches_full() {
2176 let n = 4;
2179 let p1 = 2;
2180 let p2 = 3;
2181 let x1 = Array2::from_shape_fn((n, p1), |(i, j)| (i + j + 1) as f64 * 0.3);
2182 let x2 = Array2::from_shape_fn((n, p2), |(i, j)| (i * 2 + j) as f64 * 0.2 - 0.1);
2183 let p_tot = p1 + p2;
2184 let h_inv = Array2::<f64>::eye(p_tot); let block_weights: Vec<Array2<f64>> = (0..n)
2186 .map(|i| {
2187 let v = (i + 1) as f64;
2188 Array2::from_shape_vec((2, 2), vec![v, 0.1, 0.1, v * 0.5]).unwrap()
2189 })
2190 .collect();
2191 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2192 let eta_hat: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2193 let block_designs = vec![x1.clone(), x2.clone()];
2194
2195 let input = MultiBlockAloInput {
2196 n_obs: n,
2197 n_blocks: 2,
2198 block_designs: &block_designs,
2199 penalized_hessian_inv: &h_inv,
2200 block_weights: block_weights.clone(),
2201 scores,
2202 eta_hat,
2203 };
2204 let full = compute_multiblock_alo(&input).unwrap();
2205 let lev_only =
2206 compute_multiblock_alo_leverages(n, 2, &block_designs, &h_inv, &block_weights).unwrap();
2207
2208 for i in 0..n {
2209 assert!(
2210 (full.leverage[i] - lev_only[i]).abs() < 1e-12,
2211 "leverage mismatch at i={}: full={}, lev_only={}",
2212 i,
2213 full.leverage[i],
2214 lev_only[i]
2215 );
2216 }
2217 }
2218
2219 #[test]
2220 fn multiblock_singular_weight_still_corrects() {
2221 let n = 1;
2225 let p = 2;
2226 let x = Array2::from_shape_vec((1, p), vec![1.0, 0.5]).unwrap();
2227 let h_inv = Array2::eye(p);
2228 let block_designs = vec![x.clone()];
2229 let block_weights = vec![Array2::from_elem((1, 1), 0.0)]; let scores = vec![Array1::from_vec(vec![1.0])];
2231 let eta_hat = vec![Array1::from_vec(vec![std::f64::consts::PI])];
2232
2233 let input = MultiBlockAloInput {
2234 n_obs: n,
2235 n_blocks: 1,
2236 block_designs: &block_designs,
2237 penalized_hessian_inv: &h_inv,
2238 block_weights,
2239 scores,
2240 eta_hat,
2241 };
2242 let result = compute_multiblock_alo(&input).unwrap();
2243 let expected = std::f64::consts::PI + 1.25;
2245 assert!(
2246 (result.eta_tilde[0][0] - expected).abs() < 1e-12,
2247 "expected {}, got {}",
2248 expected,
2249 result.eta_tilde[0][0]
2250 );
2251 assert!(result.cook_distance[0].abs() < 1e-14);
2253 assert!(result.alo_variance[0][0].abs() < 1e-14);
2255 }
2256
2257 #[test]
2258 fn multiblock_cook_and_variance_basic() {
2259 let n = 1;
2261 let x = Array2::from_elem((1, 1), 1.0);
2262 let h_inv = Array2::from_elem((1, 1), 0.5);
2264 let block_designs = vec![x.clone()];
2265 let w_val = 2.0;
2266 let s_val = 0.4;
2267 let block_weights = vec![Array2::from_elem((1, 1), w_val)];
2268 let scores = vec![Array1::from_vec(vec![s_val])];
2269 let eta_hat = vec![Array1::from_vec(vec![1.0])];
2270
2271 let input = MultiBlockAloInput {
2272 n_obs: n,
2273 n_blocks: 1,
2274 block_designs: &block_designs,
2275 penalized_hessian_inv: &h_inv,
2276 block_weights,
2277 scores,
2278 eta_hat,
2279 };
2280 let result = compute_multiblock_alo(&input).unwrap();
2281
2282 assert!(result.eta_tilde[0][0].is_finite());
2289 assert!(result.cook_distance[0].is_finite());
2290 assert!(result.alo_variance[0][0].is_finite());
2291 }
2292}