1use crate::estimate::EstimationError;
2use crate::estimate::{FitGeometry, UnifiedFitResult};
3use crate::pirls;
4use gam_linalg::faer_ndarray::{FaerArrayView, FaerCholesky};
5use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
6use gam_linalg::utils::StableSolver;
7use gam_problem::LinkFunction;
8use faer::Mat as FaerMat;
9use faer::linalg::matmul::matmul;
10use faer::prelude::ReborrowMut;
11use faer::{Accum, Par};
12use ndarray::{Array1, Array2, ArrayView1, ShapeBuilder, s};
13use std::fmt;
14
15#[derive(Debug, Clone)]
24pub enum AloError {
25 InvalidInput { reason: String },
29 WeightInvalid { reason: String },
32 DesignDegenerate { reason: String },
35 InfluenceMatrixFailed { condition_number: f64 },
38 LooComputationFailed { reason: String },
41}
42
43impl fmt::Display for AloError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 AloError::InvalidInput { reason }
47 | AloError::WeightInvalid { reason }
48 | AloError::DesignDegenerate { reason }
49 | AloError::LooComputationFailed { reason } => f.write_str(reason),
50 AloError::InfluenceMatrixFailed { condition_number } => {
51 write!(
52 f,
53 "ALO influence matrix failed (condition number {condition_number:.3e})"
54 )
55 }
56 }
57 }
58}
59
60impl std::error::Error for AloError {}
61
62impl From<AloError> for EstimationError {
63 fn from(err: AloError) -> EstimationError {
64 match err {
65 AloError::InvalidInput { reason }
66 | AloError::WeightInvalid { reason }
67 | AloError::DesignDegenerate { reason }
68 | AloError::LooComputationFailed { reason } => EstimationError::InvalidInput(reason),
69 AloError::InfluenceMatrixFailed { condition_number } => {
70 EstimationError::ModelIsIllConditioned { condition_number }
71 }
72 }
73 }
74}
75
76impl From<AloError> for String {
77 fn from(err: AloError) -> String {
78 err.to_string()
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct AloDiagnostics {
85 pub eta_tilde: Array1<f64>,
86 pub se_bayes: Array1<f64>,
89 pub se_sandwich: Array1<f64>,
92 pub pred_identity: Array1<f64>,
93 pub leverage: Array1<f64>,
94 pub fisherweights: Array1<f64>,
95}
96
97#[inline]
98fn alo_eta_updatewith_offset(
99 eta_hat: f64,
100 z: f64,
101 offset: f64,
102 x_hinv_x: f64,
103 score_weight: f64,
104 denom: f64,
105) -> f64 {
106 let eta_centered = eta_hat - offset;
109 let z_centered = z - offset;
110 let score = score_weight * (eta_centered - z_centered);
111 offset + eta_centered + x_hinv_x * score / denom
112}
113
114pub type AloScalarScoreCurvature<'a> = dyn Fn(usize, f64) -> (f64, f64) + Sync + 'a;
124
125const ALO_EXACT_SCALAR_MAX_ITERS: usize = 64;
131
132const ALO_EXACT_SCALAR_TOL: f64 = 1e-12;
136
137#[derive(Debug, Clone, Copy, PartialEq)]
158enum AloExactScalarError {
159 NonFiniteScoreCurvature {
160 eta: f64,
161 ell_prime: f64,
162 ell_double: f64,
163 },
164 DegenerateJacobian {
165 eta: f64,
166 jacobian: f64,
167 },
168 NonFiniteStep {
169 eta: f64,
170 residual: f64,
171 jacobian: f64,
172 next: f64,
173 },
174 MaxIterations {
175 iterations: usize,
176 residual: f64,
177 eta: f64,
178 },
179}
180
181impl fmt::Display for AloExactScalarError {
182 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
183 match *self {
184 AloExactScalarError::NonFiniteScoreCurvature {
185 eta,
186 ell_prime,
187 ell_double,
188 } => write!(
189 f,
190 "non-finite score/curvature at eta={eta:.6e}: ell_prime={ell_prime:.6e}, ell_double={ell_double:.6e}"
191 ),
192 AloExactScalarError::DegenerateJacobian { eta, jacobian } => write!(
193 f,
194 "degenerate Newton Jacobian at eta={eta:.6e}: jacobian={jacobian:.6e}, min={ALO_DENOMINATOR_MIN:.1e}"
195 ),
196 AloExactScalarError::NonFiniteStep {
197 eta,
198 residual,
199 jacobian,
200 next,
201 } => write!(
202 f,
203 "non-finite Newton step from eta={eta:.6e}: residual={residual:.6e}, jacobian={jacobian:.6e}, next={next:.6e}"
204 ),
205 AloExactScalarError::MaxIterations {
206 iterations,
207 residual,
208 eta,
209 } => write!(
210 f,
211 "did not converge within {iterations} iterations: residual={residual:.6e}, eta={eta:.6e}, tol={ALO_EXACT_SCALAR_TOL:.1e}"
212 ),
213 }
214 }
215}
216
217const ALO_EXACT_SCALAR_BACKTRACKS: usize = 40;
223
224#[inline]
225fn alo_eta_exact_frozen_curvature(
226 eta_hat: f64,
227 a_ii: f64,
228 score_curvature: &dyn Fn(f64) -> (f64, f64),
229) -> Result<f64, AloExactScalarError> {
230 let residual_and_jac = |eta: f64| -> Result<(f64, f64), AloExactScalarError> {
254 let (ell_prime, ell_double) = score_curvature(eta);
255 if !ell_prime.is_finite() || !ell_double.is_finite() {
256 return Err(AloExactScalarError::NonFiniteScoreCurvature {
257 eta,
258 ell_prime,
259 ell_double,
260 });
261 }
262 Ok((eta - eta_hat - a_ii * ell_prime, 1.0 - a_ii * ell_double))
263 };
264
265 let mut eta = eta_hat;
266 let (mut residual, mut jac) = residual_and_jac(eta)?;
267 for _ in 0..ALO_EXACT_SCALAR_MAX_ITERS {
268 if residual.abs() <= ALO_EXACT_SCALAR_TOL {
269 return Ok(eta);
270 }
271 if jac.abs() <= ALO_DENOMINATOR_MIN || !jac.is_finite() {
272 return Err(AloExactScalarError::DegenerateJacobian { eta, jacobian: jac });
273 }
274 let step = residual / jac;
275 if !step.is_finite() {
276 return Err(AloExactScalarError::NonFiniteStep {
277 eta,
278 residual,
279 jacobian: jac,
280 next: eta - step,
281 });
282 }
283 let mut t = 1.0;
288 let mut advanced = false;
289 for _ in 0..ALO_EXACT_SCALAR_BACKTRACKS {
290 let trial = eta - t * step;
291 if let Ok((r_trial, j_trial)) = residual_and_jac(trial) {
292 if r_trial.abs() < residual.abs() {
293 eta = trial;
294 residual = r_trial;
295 jac = j_trial;
296 advanced = true;
297 break;
298 }
299 }
300 t *= 0.5;
301 }
302 if !advanced {
303 break;
304 }
305 }
306 Err(AloExactScalarError::MaxIterations {
307 iterations: ALO_EXACT_SCALAR_MAX_ITERS,
308 residual,
309 eta,
310 })
311}
312
313#[inline]
314fn bayesvar_eta(phi: f64, x_hinv_x: f64) -> f64 {
315 phi * x_hinv_x
316}
317
318#[inline]
319fn sandwichvar_eta(phi: f64, x_hinv_x: f64, es_norm2: f64, ridge: f64, s_norm2: f64) -> f64 {
320 phi * (x_hinv_x - es_norm2 - ridge * s_norm2)
324}
325
326#[inline]
327fn variance_negative_tolerance(scale: f64) -> f64 {
328 1e-12 * scale.abs().max(1.0)
330}
331
332const LEVERAGE_HIGH_THRESHOLD: f64 = 0.99;
333const LEVERAGE_VERY_HIGH_THRESHOLD: f64 = 0.999;
334const LEVERAGE_RATE_THRESHOLDS: [f64; 3] = [0.90, 0.95, 0.99];
335const LEVERAGE_PERCENTILES: [f64; 3] = [0.50, 0.95, 0.99];
336const ALO_DENOMINATOR_MIN: f64 = 1e-12;
337const MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES: usize = 256 * 1024 * 1024;
338
339const ALO_RHS_BLOCK_COLS: usize = 8192;
344
345const HESSIAN_SYMMETRY_REL_TOL: f64 = 1e-8;
351
352const ALO_LOCAL_BLOCK_RIDGE: f64 = 1e-6;
358
359const LU_PIVOT_SINGULAR_TOL: f64 = 1e-12;
364
365#[inline]
366fn percentile_index(sample_size: usize, quantile: f64) -> usize {
367 if sample_size <= 1 {
368 return 0;
369 }
370 let max_index = sample_size - 1;
371 ((quantile * max_index as f64).round() as usize).min(max_index)
372}
373
374#[inline]
375fn percentile_from_sorted(sorted: &[f64], quantile: f64) -> f64 {
376 if sorted.is_empty() {
377 0.0
378 } else {
379 sorted[percentile_index(sorted.len(), quantile)]
380 }
381}
382
383#[inline]
384fn multiblock_col_offsets(block_designs: &[Array2<f64>]) -> Vec<usize> {
385 let mut offsets = Vec::with_capacity(block_designs.len());
386 let mut off = 0usize;
387 for design in block_designs {
388 offsets.push(off);
389 off += design.ncols();
390 }
391 offsets
392}
393
394#[inline]
395fn multiblock_alo_parallel_leverage_chunk_size(
396 p_tot: usize,
397 n_blocks: usize,
398 n_obs: usize,
399 max_workers: usize,
400) -> usize {
401 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
402 return 1;
403 }
404
405 let workers = max_workers.max(1);
411 let per_worker_budget = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / workers).max(1);
412 let elem_count_per_obs = p_tot.saturating_mul(n_blocks.saturating_add(1)).max(1);
413 let bytes_per_obs = elem_count_per_obs
414 .saturating_mul(std::mem::size_of::<f64>())
415 .max(1);
416 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
417 budget_obs.min(n_obs)
418}
419
420fn compute_alo_diagnostics_from_pirls_impl(
421 base: &pirls::PirlsResult,
422 y: ArrayView1<f64>,
423 link: LinkFunction,
424) -> Result<AloDiagnostics, EstimationError> {
425 compute_alo_diagnostics_from_pirls_inner(base, y, link).map_err(EstimationError::from)
426}
427
428fn alo_link_needs_exact_curvature_refinement(likelihood: &gam_problem::GlmLikelihoodSpec) -> bool {
441 use gam_problem::ResponseFamily;
442 matches!(
443 (&likelihood.spec.response, likelihood.link_function()),
444 (ResponseFamily::Binomial, LinkFunction::Logit)
445 | (ResponseFamily::Poisson, LinkFunction::Log)
446 )
447}
448
449fn compute_alo_diagnostics_from_pirls_inner(
450 base: &pirls::PirlsResult,
451 y: ArrayView1<f64>,
452 link: LinkFunction,
453) -> Result<AloDiagnostics, AloError> {
454 let x_dense_arc = base
455 .x_transformed
456 .try_to_dense_arc("ALO diagnostics require dense transformed design")
457 .map_err(|reason| AloError::DesignDegenerate { reason })?;
458 let x_dense = x_dense_arc.as_ref();
459 let n = x_dense.nrows();
460
461 let phi = match link {
463 LinkFunction::Log => 1.0,
464 LinkFunction::Logit
465 | LinkFunction::Probit
466 | LinkFunction::CLogLog
467 | LinkFunction::Sas
468 | LinkFunction::BetaLogistic => 1.0,
469 LinkFunction::Identity => {
470 use rayon::iter::{IntoParallelIterator, ParallelIterator};
471 let rss: f64 = (0..n)
472 .into_par_iter()
473 .map(|i| {
474 let r = y[i] - base.finalmu[i];
475 base.finalweights[i] * r * r
476 })
477 .sum();
478 let n_pos = (0..n).filter(|&i| base.finalweights[i] > 0.0).count();
485 let dof = (n_pos as f64) - base.edf;
486 let denom = dof.max(1.0);
487 rss / denom
488 }
489 };
490
491 let e = &base.reparam_result.e_transformed;
492 let ridge = base.ridge_passport.laplacehessianridge().max(0.0);
493
494 let h_dense_for_alo = base
498 .dense_stabilizedhessian_transformed(
499 "ALO diagnostics require exact dense stabilized penalized Hessian",
500 )
501 .map_err(|e| match e {
502 EstimationError::InvalidInput(reason) => AloError::InvalidInput { reason },
503 other => AloError::InvalidInput {
504 reason: format!("{other:?}"),
505 },
506 })?;
507
508 let canonical_scale: Option<Array1<f64>> =
527 if alo_link_needs_exact_curvature_refinement(&base.likelihood) {
528 let mut c = Array1::<f64>::zeros(n);
529 for i in 0..n {
530 let dmu = base.solve_dmu_deta[i];
531 let w_h = base.finalweights[i];
532 c[i] = if dmu.abs() <= ALO_DENOMINATOR_MIN || !dmu.is_finite() || !w_h.is_finite() {
533 f64::NAN
534 } else {
535 w_h / dmu
536 };
537 }
538 Some(c)
539 } else {
540 None
541 };
542
543 let inv_link_for_closure = base.likelihood.spec.link.clone();
544 let score_curvature_closure = canonical_scale.as_ref().map(|scale| {
545 move |i: usize, eta: f64| -> (f64, f64) {
546 let (mu, dmu) = crate::mixture_link::inverse_link_mu_d1_for_inverse_link(
547 &inv_link_for_closure,
548 eta,
549 )
550 .unwrap_or((f64::NAN, f64::NAN));
551 let c_i = scale[i];
552 (c_i * (mu - y[i]), c_i * dmu)
553 }
554 });
555 let score_curvature_ref: Option<&AloScalarScoreCurvature> = score_curvature_closure
556 .as_ref()
557 .map(|f| f as &AloScalarScoreCurvature);
558
559 let input = AloInput {
561 design: x_dense,
562 penalized_hessian: &h_dense_for_alo,
563 hessian_weights: base.final_weights_signed(),
564 score_weights: base.solve_weights_psd(),
565 working_response: &base.solveworking_response,
566 eta: &base.final_eta,
567 offset: &base.final_offset,
568 link,
569 phi,
570 penalty_root: if e.nrows() > 0 { Some(e) } else { None },
571 ridge,
572 score_curvature: score_curvature_ref,
573 };
574
575 let result = compute_alo_from_input_inner(&input)?;
576
577 log_leverage_diagnostics(&result.leverage, phi);
579
580 let has_nan_pred = result.eta_tilde.iter().any(|&x| x.is_nan());
582 let has_nan_se_bayes = result.se_bayes.iter().any(|&x| x.is_nan());
583 let has_nan_se_sandwich = result.se_sandwich.iter().any(|&x| x.is_nan());
584 let has_nan_leverage = result.leverage.iter().any(|&x| x.is_nan());
585
586 if has_nan_pred || has_nan_se_bayes || has_nan_se_sandwich || has_nan_leverage {
587 log::error!("[GAM ALO] NaN values found in ALO diagnostics:");
588 log::error!(
589 "[GAM ALO] eta_tilde: {} NaN values",
590 result.eta_tilde.iter().filter(|&&x| x.is_nan()).count()
591 );
592 log::error!(
593 "[GAM ALO] se_bayes: {} NaN values",
594 result.se_bayes.iter().filter(|&&x| x.is_nan()).count()
595 );
596 log::error!(
597 "[GAM ALO] se_sandwich: {} NaN values",
598 result.se_sandwich.iter().filter(|&&x| x.is_nan()).count()
599 );
600 log::error!(
601 "[GAM ALO] leverage: {} NaN values",
602 result.leverage.iter().filter(|&&x| x.is_nan()).count()
603 );
604 return Err(AloError::InfluenceMatrixFailed {
605 condition_number: f64::INFINITY,
606 });
607 }
608
609 Ok(result)
610}
611
612fn log_leverage_diagnostics(leverage: &Array1<f64>, phi: f64) {
614 let n = leverage.len();
615 if n == 0 {
616 return;
617 }
618
619 let mut invalid_count = 0usize;
620 let mut high_leverage_count = 0usize;
621 let mut threshold_counts = [0usize; LEVERAGE_RATE_THRESHOLDS.len()];
622 let mut finite_leverage = Vec::with_capacity(n);
623
624 for (obs, &ai) in leverage.iter().enumerate() {
625 if ai.is_finite() {
626 finite_leverage.push(ai);
627 }
628
629 if !(0.0..=1.0).contains(&ai) || !ai.is_finite() {
630 invalid_count += 1;
631 log::warn!("[GAM ALO] invalid leverage at i={}, a_ii={:.6e}", obs, ai);
632 } else if ai > LEVERAGE_HIGH_THRESHOLD {
633 high_leverage_count += 1;
634 if ai > LEVERAGE_VERY_HIGH_THRESHOLD {
635 log::warn!("[GAM ALO] very high leverage at i={}, a_ii={:.6e}", obs, ai);
636 }
637 }
638
639 for (idx, threshold) in LEVERAGE_RATE_THRESHOLDS.iter().enumerate() {
640 if ai > *threshold {
641 threshold_counts[idx] += 1;
642 }
643 }
644 }
645
646 if invalid_count > 0 || high_leverage_count > 0 {
647 log::warn!(
648 "[GAM ALO] leverage diagnostics: {} invalid values, {} high values (>0.99)",
649 invalid_count,
650 high_leverage_count
651 );
652 }
653
654 finite_leverage.sort_by(f64::total_cmp);
655
656 let finite_n = finite_leverage.len();
657 let a_mean = if finite_n > 0 {
658 finite_leverage.iter().copied().sum::<f64>() / finite_n as f64
659 } else {
660 0.0
661 };
662 let a_median = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[0]);
663 let a_p95 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[1]);
664 let a_p99 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[2]);
665 let a_max = finite_leverage.last().copied().unwrap_or(0.0);
666
667 log::warn!(
668 "[GAM ALO] leverage: n={}, mean={:.3e}, median={:.3e}, p95={:.3e}, p99={:.3e}, max={:.3e}",
669 n,
670 a_mean,
671 a_median,
672 a_p95,
673 a_p99,
674 a_max
675 );
676 log::warn!(
677 "[GAM ALO] high-leverage: a>0.90: {:.2}%, a>0.95: {:.2}%, a>0.99: {:.2}%, dispersion phi={:.3e}",
678 100.0 * (threshold_counts[0] as f64) / n as f64,
679 100.0 * (threshold_counts[1] as f64) / n as f64,
680 100.0 * (threshold_counts[2] as f64) / n as f64,
681 phi
682 );
683}
684
685pub struct AloInput<'a> {
692 pub design: &'a Array2<f64>,
694 pub penalized_hessian: &'a Array2<f64>,
696 pub hessian_weights: SignedWeightsView<'a>,
703 pub score_weights: PsdWeightsView<'a>,
706 pub working_response: &'a Array1<f64>,
708 pub eta: &'a Array1<f64>,
710 pub offset: &'a Array1<f64>,
712 pub link: LinkFunction,
714 pub phi: f64,
716 pub penalty_root: Option<&'a Array2<f64>>,
719 pub ridge: f64,
721 pub score_curvature: Option<&'a AloScalarScoreCurvature<'a>>,
734}
735
736impl<'a> AloInput<'a> {
737 pub fn from_geometry(
739 geom: &'a FitGeometry,
740 design: &'a Array2<f64>,
741 eta: &'a Array1<f64>,
742 offset: &'a Array1<f64>,
743 link: LinkFunction,
744 phi: f64,
745 ) -> Self {
746 let psd_w = PsdWeightsView::from_view_unchecked(geom.working_weights.view());
753 Self {
754 design,
755 penalized_hessian: &geom.penalized_hessian,
756 hessian_weights: psd_w.as_signed(),
757 score_weights: psd_w,
758 working_response: &geom.working_response,
759 eta,
760 offset,
761 link,
762 phi,
763 penalty_root: None,
764 ridge: 0.0,
765 score_curvature: None,
766 }
767 }
768}
769
770pub fn compute_alo_from_input(input: &AloInput) -> Result<AloDiagnostics, EstimationError> {
776 compute_alo_from_input_inner(input).map_err(EstimationError::from)
777}
778
779fn compute_alo_from_input_inner(input: &AloInput) -> Result<AloDiagnostics, AloError> {
780 let x_dense = input.design;
781 let n = x_dense.nrows();
782 let p = x_dense.ncols();
783 let w_h = input.hessian_weights.view();
787 let w_s = input.score_weights.view();
788
789 validate_alo_solve_setup(input, n, p)?;
790
791 let factor = StableSolver::new("alo penalized hessian")
792 .factorize(input.penalized_hessian)
793 .map_err(|_| AloError::InfluenceMatrixFailed {
794 condition_number: f64::INFINITY,
795 })?;
796
797 let xt = x_dense.t();
798 let phi = input.phi;
799 let ridge = input.ridge;
800
801 let e_rank = input.penalty_root.map(|e| e.nrows()).unwrap_or(0);
802
803 let mut aii = Array1::<f64>::zeros(n);
804 let mut x_hinv_x_diag = Array1::<f64>::zeros(n);
805 let mut se_bayes = Array1::<f64>::zeros(n);
806 let mut se_sandwich = Array1::<f64>::zeros(n);
807
808 let block_cols = ALO_RHS_BLOCK_COLS;
809 let mut rhs_chunk_buf = Array2::<f64>::zeros((p, block_cols).f());
814 let mut es_chunk_storage = if e_rank > 0 {
818 FaerMat::<f64>::zeros(e_rank, block_cols)
819 } else {
820 FaerMat::<f64>::zeros(0, 0)
821 };
822
823 for chunk_start in (0..n).step_by(block_cols) {
824 let chunk_end = (chunk_start + block_cols).min(n);
825 let width = chunk_end - chunk_start;
826
827 rhs_chunk_buf
828 .slice_mut(s![.., ..width])
829 .assign(&xt.slice(s![.., chunk_start..chunk_end]));
830
831 let rhs_chunkview = rhs_chunk_buf.slice(s![.., ..width]);
832 let rhs_chunk = FaerArrayView::new(&rhs_chunkview);
833 let s_chunk = factor.solve(rhs_chunk.as_ref());
837
838 if e_rank > 0
839 && let Some(e) = input.penalty_root
840 {
841 let eview = FaerArrayView::new(e);
842 let mut es_target = es_chunk_storage.as_mut().subcols_mut(0, width);
845 matmul(
846 es_target.rb_mut(),
847 Accum::Replace,
848 eview.as_ref(),
849 s_chunk.as_ref(),
850 1.0,
851 Par::Seq,
852 );
853 }
854
855 let rhs_view = rhs_chunk_buf.slice(s![.., ..width]);
856
857 for local_col in 0..width {
858 let obs = chunk_start + local_col;
859 let rhs_col = rhs_view.column(local_col);
863 let rhs_slice = rhs_col.as_slice().expect("column-major col contiguous");
864 let s_slice = s_chunk.col_as_slice(local_col);
865
866 let mut x_hinv_x = 0.0f64;
867 let mut s_norm2 = 0.0f64;
868 for k in 0..p {
870 let sval = s_slice[k];
871 let xval = rhs_slice[k];
872 x_hinv_x = sval.mul_add(xval, x_hinv_x);
873 s_norm2 = sval.mul_add(sval, s_norm2);
874 }
875 let ai = w_h[obs].max(0.0) * x_hinv_x;
876 let mut es_norm2 = 0.0f64;
877 if e_rank > 0 {
878 let es_slice = es_chunk_storage.col_as_slice(local_col);
879 for r in 0..e_rank {
880 let v = es_slice[r];
881 es_norm2 = v.mul_add(v, es_norm2);
882 }
883 }
884 aii[obs] = ai;
885 x_hinv_x_diag[obs] = x_hinv_x;
886
887 let var_bayes = bayesvar_eta(phi, x_hinv_x);
888 let var_sandwich = if e_rank > 0 {
889 sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2)
890 } else {
891 var_bayes
892 };
893
894 if !var_bayes.is_finite() || !var_sandwich.is_finite() {
895 return Err(AloError::LooComputationFailed {
896 reason: format!(
897 "ALO variance is not finite at row {obs}: bayes={var_bayes:.6e}, sandwich={var_sandwich:.6e}"
898 ),
899 });
900 }
901 let bayes_tol = variance_negative_tolerance(phi * x_hinv_x.abs());
902 if var_bayes < -bayes_tol {
903 return Err(AloError::LooComputationFailed {
904 reason: format!(
905 "ALO Bayesian variance is materially negative at row {obs}: var={var_bayes:.6e}, tol={bayes_tol:.6e}"
906 ),
907 });
908 }
909 if e_rank > 0 {
910 let sandwich_scale =
911 phi * (x_hinv_x.abs() + es_norm2.abs() + (ridge * s_norm2).abs());
912 let sandwich_tol = variance_negative_tolerance(sandwich_scale);
913 if var_sandwich < -sandwich_tol {
914 return Err(AloError::LooComputationFailed {
915 reason: format!(
916 "ALO sandwich variance is materially negative at row {obs}: var={var_sandwich:.6e}, tol={sandwich_tol:.6e}"
917 ),
918 });
919 }
920 }
921
922 se_bayes[obs] = var_bayes.max(0.0).sqrt();
923 se_sandwich[obs] = var_sandwich.max(0.0).sqrt();
924 }
925 }
926
927 let eta_hat = input.eta;
928 let z = input.working_response;
929 let offset = input.offset;
930
931 use rayon::prelude::*;
932 let eta_tilde_vec: Vec<f64> = (0..n)
933 .into_par_iter()
934 .map(|i| {
935 let denom_raw = 1.0 - aii[i];
936 if denom_raw <= ALO_DENOMINATOR_MIN || !denom_raw.is_finite() {
937 return Err(AloError::LooComputationFailed {
938 reason: format!(
939 "ALO denominator is too small at row {i}: a_ii={:.6e}, 1-a_ii={:.6e}, min={:.1e}",
940 aii[i], denom_raw, ALO_DENOMINATOR_MIN
941 ),
942 });
943 }
944 let one_step = alo_eta_updatewith_offset(
945 eta_hat[i],
946 z[i],
947 offset[i],
948 x_hinv_x_diag[i],
949 w_s[i],
950 denom_raw,
951 );
952 let v = if let Some(score_curvature) = input.score_curvature {
960 alo_eta_exact_frozen_curvature(
961 eta_hat[i],
962 x_hinv_x_diag[i],
963 &|eta| score_curvature(i, eta),
964 )
965 .map_err(|err| AloError::LooComputationFailed {
966 reason: format!(
967 "ALO exact frozen-curvature solve failed at row {i}: {err}"
968 ),
969 })?
970 } else {
971 one_step
972 };
973 if !v.is_finite() {
974 return Err(AloError::LooComputationFailed {
975 reason: format!("ALO eta_tilde is not finite at row {i}: eta_tilde={v}"),
976 });
977 }
978 Ok(v)
979 })
980 .collect::<Result<_, _>>()?;
981 let eta_tilde = Array1::from(eta_tilde_vec);
982
983 Ok(AloDiagnostics {
984 eta_tilde,
985 se_bayes,
986 se_sandwich,
987 pred_identity: eta_hat.clone(),
988 leverage: aii,
989 fisherweights: w_h.to_owned(),
990 })
991}
992
993fn validate_alo_solve_setup(input: &AloInput, n: usize, p: usize) -> Result<(), AloError> {
994 let h = input.penalized_hessian;
995 if h.nrows() != p || h.ncols() != p {
996 return Err(AloError::InvalidInput {
997 reason: format!(
998 "ALO diagnostics require a dense exact penalized Hessian with shape {p}x{p}; got {}x{}",
999 h.nrows(),
1000 h.ncols()
1001 ),
1002 });
1003 }
1004 if h.iter().any(|v| !v.is_finite()) {
1005 return Err(AloError::InvalidInput {
1006 reason: "ALO diagnostics require a finite dense exact penalized Hessian".to_string(),
1007 });
1008 }
1009 for i in 0..p {
1010 for j in 0..i {
1011 let a = h[[i, j]];
1012 let b = h[[j, i]];
1013 let scale = a.abs().max(b.abs()).max(1.0);
1014 if (a - b).abs() > HESSIAN_SYMMETRY_REL_TOL * scale {
1015 return Err(AloError::InvalidInput {
1016 reason: format!(
1017 "ALO diagnostics require a symmetric dense exact penalized Hessian; entries ({i},{j}) and ({j},{i}) differ by {:.3e}",
1018 (a - b).abs()
1019 ),
1020 });
1021 }
1022 }
1023 }
1024
1025 let vector_lengths = [
1026 ("hessian_weights", input.hessian_weights.len()),
1027 ("score_weights", input.score_weights.len()),
1028 ("working_response", input.working_response.len()),
1029 ("eta", input.eta.len()),
1030 ("offset", input.offset.len()),
1031 ];
1032 for (name, len) in vector_lengths {
1033 if len != n {
1034 return Err(AloError::InvalidInput {
1035 reason: format!("ALO diagnostics require {name} length {n}; got {len}"),
1036 });
1037 }
1038 }
1039 if input.hessian_weights.view().iter().any(|v| !v.is_finite()) {
1040 return Err(AloError::WeightInvalid {
1041 reason: "ALO diagnostics require finite Hessian-side weights".to_string(),
1042 });
1043 }
1044 if input.score_weights.view().iter().any(|v| !v.is_finite()) {
1045 return Err(AloError::WeightInvalid {
1046 reason: "ALO diagnostics require finite score-side weights".to_string(),
1047 });
1048 }
1049 if input.working_response.iter().any(|v| !v.is_finite()) {
1050 return Err(AloError::WeightInvalid {
1051 reason: "ALO diagnostics require finite working responses".to_string(),
1052 });
1053 }
1054 if input.eta.iter().any(|v| !v.is_finite()) || input.offset.iter().any(|v| !v.is_finite()) {
1055 return Err(AloError::InvalidInput {
1056 reason: "ALO diagnostics require finite linear predictors and offsets".to_string(),
1057 });
1058 }
1059 if !input.phi.is_finite() || input.phi <= 0.0 {
1060 return Err(AloError::InvalidInput {
1061 reason: format!(
1062 "ALO diagnostics require positive finite dispersion phi; got {}",
1063 input.phi
1064 ),
1065 });
1066 }
1067 if !input.ridge.is_finite() || input.ridge < 0.0 {
1068 return Err(AloError::InvalidInput {
1069 reason: format!(
1070 "ALO diagnostics require a finite non-negative Hessian ridge; got {}",
1071 input.ridge
1072 ),
1073 });
1074 }
1075 if let Some(e) = input.penalty_root {
1076 if e.ncols() != p {
1077 return Err(AloError::InvalidInput {
1078 reason: format!(
1079 "ALO diagnostics require penalty root to have {p} columns; got {}",
1080 e.ncols()
1081 ),
1082 });
1083 }
1084 if e.iter().any(|v| !v.is_finite()) {
1085 return Err(AloError::InvalidInput {
1086 reason: "ALO diagnostics require finite penalty-root entries".to_string(),
1087 });
1088 }
1089 }
1090 Ok(())
1091}
1092
1093pub fn compute_alo_diagnostics_from_fit(
1095 fit: &UnifiedFitResult,
1096 y: ArrayView1<f64>,
1097 link: LinkFunction,
1098) -> Result<AloDiagnostics, EstimationError> {
1099 let pirls = fit
1100 .artifacts
1101 .pirls
1102 .as_ref()
1103 .ok_or_else(|| AloError::InvalidInput {
1104 reason:
1105 "ALO diagnostics require a PIRLS-backed fit; this fit does not expose PIRLS geometry"
1106 .to_string(),
1107 })
1108 .map_err(EstimationError::from)?;
1109 compute_alo_diagnostics_from_pirls_impl(pirls, y, link)
1110}
1111
1112pub fn compute_alo_diagnostics_from_unified(
1118 unified: &UnifiedFitResult,
1119 design: &Array2<f64>,
1120 eta: &Array1<f64>,
1121 offset: &Array1<f64>,
1122 link: LinkFunction,
1123 phi: f64,
1124) -> Result<AloDiagnostics, EstimationError> {
1125 let geom = unified
1126 .geometry
1127 .as_ref()
1128 .ok_or_else(|| AloError::InvalidInput {
1129 reason: "UnifiedFitResult does not contain working-set geometry; \
1130 ALO diagnostics require geometry at convergence"
1131 .to_string(),
1132 })
1133 .map_err(EstimationError::from)?;
1134 let input = AloInput::from_geometry(geom, design, eta, offset, link, phi);
1135 compute_alo_from_input(&input)
1136}
1137
1138pub fn compute_alo_diagnostics_from_pirls(
1140 base: &pirls::PirlsResult,
1141 y: ArrayView1<f64>,
1142 link: LinkFunction,
1143) -> Result<AloDiagnostics, EstimationError> {
1144 compute_alo_diagnostics_from_pirls_impl(base, y, link)
1145}
1146
1147pub fn compute_case_deletion_from_pirls(
1166 base: &pirls::PirlsResult,
1167 y: ArrayView1<f64>,
1168 link: LinkFunction,
1169) -> Result<Option<crate::sensitivity::CaseDeletionInfluence>, EstimationError> {
1170 let x_dense_arc = base
1171 .x_transformed
1172 .try_to_dense_arc("case-deletion diagnostics require dense transformed design")
1173 .map_err(|reason| EstimationError::InvalidInput(reason))?;
1174 let x_dense = x_dense_arc.as_ref();
1175 let n = x_dense.nrows();
1176 let p = x_dense.ncols();
1177 if n == 0 || p == 0 {
1178 return Ok(None);
1179 }
1180
1181 let phi = match link {
1184 LinkFunction::Identity => {
1185 use rayon::iter::{IntoParallelIterator, ParallelIterator};
1186 let rss: f64 = (0..n)
1187 .into_par_iter()
1188 .map(|i| {
1189 let r = y[i] - base.finalmu[i];
1190 base.finalweights[i] * r * r
1191 })
1192 .sum();
1193 let dof = (n as f64) - base.edf;
1194 rss / dof.max(1.0)
1195 }
1196 _ => 1.0,
1197 };
1198 if !(phi.is_finite() && phi > 0.0) {
1199 return Ok(None);
1200 }
1201
1202 let h_dense = base
1205 .dense_stabilizedhessian_transformed(
1206 "case-deletion diagnostics require exact dense stabilized penalized Hessian",
1207 )
1208 .map_err(|e| match e {
1209 EstimationError::InvalidInput(reason) => EstimationError::InvalidInput(reason),
1210 other => EstimationError::InvalidInput(format!("{other:?}")),
1211 })?;
1212
1213 let factor = match h_dense.cholesky(faer::Side::Lower) {
1214 Ok(f) => f,
1215 Err(_) => return Ok(None),
1219 };
1220
1221 let working_weights = base.finalweights.clone();
1225 let working_residual = &base.solveworking_response - &base.final_eta;
1226
1227 let sensitivity = crate::sensitivity::FitSensitivity::from_faer_cholesky(&factor, p);
1228 Ok(sensitivity.case_deletion(
1229 x_dense,
1230 working_weights.view(),
1231 working_residual.view(),
1232 phi,
1233 ))
1234}
1235
1236#[derive(Debug, Clone)]
1240pub struct MultiBlockAloDiagnostics {
1241 pub eta_tilde: Vec<Array1<f64>>,
1244 pub leverage: Array1<f64>,
1246 pub alo_variance: Vec<Array1<f64>>,
1251 pub cook_distance: Array1<f64>,
1254}
1255
1256pub struct MultiBlockAloInput<'a> {
1286 pub n_obs: usize,
1288 pub n_blocks: usize,
1290 pub block_designs: &'a [Array2<f64>],
1293 pub penalized_hessian_inv: &'a Array2<f64>,
1295 pub block_weights: Vec<Array2<f64>>,
1297 pub scores: Vec<Array1<f64>>,
1300 pub eta_hat: Vec<Array1<f64>>,
1303}
1304
1305pub fn compute_multiblock_alo(
1324 input: &MultiBlockAloInput,
1325) -> Result<MultiBlockAloDiagnostics, EstimationError> {
1326 compute_multiblock_alo_inner(input).map_err(EstimationError::from)
1327}
1328
1329fn compute_multiblock_alo_inner(
1330 input: &MultiBlockAloInput,
1331) -> Result<MultiBlockAloDiagnostics, AloError> {
1332 use rayon::prelude::*;
1333
1334 let n = input.n_obs;
1335 let b = input.n_blocks;
1336 let p_tot = input.penalized_hessian_inv.nrows();
1337
1338 if input.block_designs.len() != b {
1340 return Err(AloError::InvalidInput {
1341 reason: format!(
1342 "MultiBlockAloInput: expected {} block designs, got {}",
1343 b,
1344 input.block_designs.len()
1345 ),
1346 });
1347 }
1348
1349 let col_sum: usize = input.block_designs.iter().map(|d| d.ncols()).sum();
1351 if col_sum != p_tot {
1352 return Err(AloError::InvalidInput {
1353 reason: format!(
1354 "MultiBlockAloInput: total design columns ({}) != penalized_hessian_inv size ({})",
1355 col_sum, p_tot
1356 ),
1357 });
1358 }
1359
1360 let col_offsets = multiblock_col_offsets(input.block_designs);
1361 let (chunk_size, max_concurrent_chunks) = multiblock_alo_parallel_plan(p_tot, b, n);
1362 let chunk_starts: Vec<usize> = (0..n).step_by(chunk_size).collect();
1363
1364 let mut chunk_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> =
1370 Vec::with_capacity(chunk_starts.len());
1371 for chunk_wave in chunk_starts.chunks(max_concurrent_chunks) {
1372 let mut wave_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> = chunk_wave
1373 .par_iter()
1374 .map_init(
1375 || MultiBlockAloScratch::new(b),
1376 |scratch, &chunk_start| {
1377 let chunk_end = (chunk_start + chunk_size).min(n);
1378 compute_multiblock_alo_chunk(
1379 input,
1380 &col_offsets,
1381 chunk_start,
1382 chunk_end,
1383 scratch,
1384 )
1385 },
1386 )
1387 .collect();
1388 chunk_results.append(&mut wave_results);
1389 }
1390
1391 let mut eta_tilde = Vec::with_capacity(n);
1392 let mut leverage = Array1::<f64>::zeros(n);
1393 let mut alo_variance = Vec::with_capacity(n);
1394 let mut cook_distance = Array1::<f64>::zeros(n);
1395
1396 let mut chunks = Vec::with_capacity(chunk_results.len());
1397 for result in chunk_results {
1398 chunks.push(result?);
1399 }
1400 chunks.sort_unstable_by_key(|chunk| chunk.chunk_start);
1401
1402 for chunk in chunks {
1403 let chunk_start = chunk.chunk_start;
1404 eta_tilde.extend(chunk.eta_tilde);
1405 alo_variance.extend(chunk.alo_variance);
1406 for (local_i, lev) in chunk.leverage.into_iter().enumerate() {
1407 leverage[chunk_start + local_i] = lev;
1408 }
1409 for (local_i, cook) in chunk.cook_distance.into_iter().enumerate() {
1410 cook_distance[chunk_start + local_i] = cook;
1411 }
1412 }
1413
1414 Ok(MultiBlockAloDiagnostics {
1415 eta_tilde,
1416 leverage,
1417 alo_variance,
1418 cook_distance,
1419 })
1420}
1421
1422#[inline]
1423fn multiblock_alo_parallel_plan(p_tot: usize, n_blocks: usize, n_obs: usize) -> (usize, usize) {
1424 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
1425 return (1, 1);
1426 }
1427 let bytes_per_obs = (p_tot * n_blocks * std::mem::size_of::<f64>()).max(1);
1428 let workers = rayon::current_num_threads().max(1);
1429 let max_concurrent_chunks = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / bytes_per_obs)
1430 .max(1)
1431 .min(workers);
1432 let per_worker_budget =
1433 (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / max_concurrent_chunks).max(bytes_per_obs);
1434 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
1435 (budget_obs.min(n_obs), max_concurrent_chunks)
1436}
1437
1438struct MultiBlockAloScratch {
1439 a_i: Vec<f64>,
1440 wa: Vec<f64>,
1441 aw: Vec<f64>,
1442 imwa: Vec<f64>,
1443 imaw: Vec<f64>,
1444 perm_imwa: Vec<usize>,
1445 perm_imaw: Vec<usize>,
1446 delta_eta: Vec<f64>,
1447 rhs_buf: Vec<f64>,
1448 w_u: Vec<f64>,
1449 var_diag_buf: Vec<f64>,
1450 w_flat: Vec<f64>,
1451 lu_scratch: Vec<f64>,
1452}
1453
1454impl MultiBlockAloScratch {
1455 fn new(b: usize) -> Self {
1456 let bb_sz = b * b;
1457 Self {
1458 a_i: vec![0.0f64; bb_sz],
1459 wa: vec![0.0f64; bb_sz],
1460 aw: vec![0.0f64; bb_sz],
1461 imwa: vec![0.0f64; bb_sz],
1462 imaw: vec![0.0f64; bb_sz],
1463 perm_imwa: vec![0usize; b],
1464 perm_imaw: vec![0usize; b],
1465 delta_eta: vec![0.0f64; b],
1466 rhs_buf: vec![0.0f64; b],
1467 w_u: vec![0.0f64; b],
1468 var_diag_buf: vec![0.0f64; b],
1469 w_flat: vec![0.0f64; bb_sz],
1470 lu_scratch: vec![0.0f64; b],
1471 }
1472 }
1473}
1474
1475struct MultiBlockAloChunkDiagnostics {
1476 chunk_start: usize,
1477 eta_tilde: Vec<Array1<f64>>,
1478 leverage: Vec<f64>,
1479 alo_variance: Vec<Array1<f64>>,
1480 cook_distance: Vec<f64>,
1481}
1482
1483fn compute_multiblock_alo_chunk(
1484 input: &MultiBlockAloInput,
1485 col_offsets: &[usize],
1486 chunk_start: usize,
1487 chunk_end: usize,
1488 scratch: &mut MultiBlockAloScratch,
1489) -> Result<MultiBlockAloChunkDiagnostics, AloError> {
1490 let b = input.n_blocks;
1491 let chunk_len = chunk_end - chunk_start;
1492
1493 let mut q_blocks = Vec::with_capacity(b);
1494 for blk in 0..b {
1495 let x_chunk_t = input.block_designs[blk]
1496 .slice(s![chunk_start..chunk_end, ..])
1497 .t()
1498 .to_owned();
1499 let off_b = col_offsets[blk];
1500 let h_slice = input
1501 .penalized_hessian_inv
1502 .slice(s![.., off_b..off_b + x_chunk_t.nrows()])
1503 .to_owned();
1504 q_blocks.push(h_slice.dot(&x_chunk_t));
1505 }
1506
1507 let mut eta_tilde = Vec::with_capacity(chunk_len);
1508 let mut leverage = vec![0.0f64; chunk_len];
1509 let mut alo_variance = Vec::with_capacity(chunk_len);
1510 let mut cook_distance = vec![0.0f64; chunk_len];
1511
1512 for local_i in 0..chunk_len {
1513 let i = chunk_start + local_i;
1514 let w_i = &input.block_weights[i];
1515
1516 for r in 0..b {
1518 for c in 0..b {
1519 scratch.w_flat[r * b + c] = w_i[(r, c)];
1520 }
1521 }
1522
1523 for a in 0..b {
1525 let x_a = &input.block_designs[a];
1526 let p_a = x_a.ncols();
1527 let off_a = col_offsets[a];
1528 let xa_row = x_a.row(i);
1529 for bb in 0..b {
1530 let q_bb = &q_blocks[bb];
1531 let mut dot = 0.0f64;
1532 for k in 0..p_a {
1533 dot += xa_row[k] * q_bb[(off_a + k, local_i)];
1534 }
1535 scratch.a_i[a * b + bb] = dot;
1536 }
1537 }
1538
1539 mat_mul_flat(&scratch.w_flat, &scratch.a_i, &mut scratch.wa, b);
1541 mat_mul_flat(&scratch.a_i, &scratch.w_flat, &mut scratch.aw, b);
1543
1544 let mut tr = 0.0f64;
1547 for d in 0..b {
1548 tr += scratch.aw[d * b + d];
1549 }
1550 leverage[local_i] = tr;
1551
1552 for r in 0..b {
1554 for c in 0..b {
1555 let idx = r * b + c;
1556 let id = if r == c { 1.0 } else { 0.0 };
1557 scratch.imwa[idx] = id - scratch.wa[idx];
1558 scratch.imaw[idx] = id - scratch.aw[idx];
1559 }
1560 }
1561
1562 if !lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b) {
1568 for r in 0..b {
1569 for c in 0..b {
1570 let idx = r * b + c;
1571 let id = if r == c { 1.0 } else { 0.0 };
1572 scratch.imwa[idx] = id - scratch.wa[idx];
1573 }
1574 }
1575 for d in 0..b {
1576 scratch.imwa[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1577 }
1578 let refactored = lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b);
1579 assert!(
1580 refactored,
1581 "ALO local block remained singular after ridge regularization"
1582 );
1583 }
1584 if !lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b) {
1585 for r in 0..b {
1586 for c in 0..b {
1587 let idx = r * b + c;
1588 let id = if r == c { 1.0 } else { 0.0 };
1589 scratch.imaw[idx] = id - scratch.aw[idx];
1590 }
1591 }
1592 for d in 0..b {
1593 scratch.imaw[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1594 }
1595 let refactored = lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b);
1596 assert!(
1597 refactored,
1598 "ALO local variance block remained singular after ridge regularization"
1599 );
1600 }
1601
1602 let s_i = &input.scores[i];
1604 for k in 0..b {
1605 scratch.rhs_buf[k] = s_i[k];
1606 }
1607 lu_solve_in_place(
1608 &scratch.imwa,
1609 &scratch.perm_imwa,
1610 &mut scratch.rhs_buf,
1611 &mut scratch.lu_scratch,
1612 b,
1613 );
1614 for r in 0..b {
1616 let mut acc = 0.0f64;
1617 let row_off = r * b;
1618 for k in 0..b {
1619 acc += scratch.a_i[row_off + k] * scratch.rhs_buf[k];
1620 }
1621 scratch.delta_eta[r] = acc;
1622 }
1623
1624 let eta_i = &input.eta_hat[i];
1625 let mut corrected = Array1::<f64>::zeros(b);
1626 for d in 0..b {
1627 corrected[d] = eta_i[d] + scratch.delta_eta[d];
1628 }
1629 eta_tilde.push(corrected);
1630
1631 let mut cook = 0.0f64;
1633 for r in 0..b {
1634 let mut w_delta_r = 0.0f64;
1635 let row_off = r * b;
1636 for k in 0..b {
1637 w_delta_r += scratch.w_flat[row_off + k] * scratch.delta_eta[k];
1638 }
1639 cook += scratch.delta_eta[r] * w_delta_r;
1640 }
1641 cook_distance[local_i] = cook;
1642
1643 for d in 0..b {
1649 let row_off = d * b;
1650 for k in 0..b {
1652 scratch.rhs_buf[k] = scratch.a_i[row_off + k];
1653 }
1654 lu_solve_in_place(
1655 &scratch.imaw,
1656 &scratch.perm_imaw,
1657 &mut scratch.rhs_buf,
1658 &mut scratch.lu_scratch,
1659 b,
1660 );
1661 for r in 0..b {
1663 let mut acc = 0.0f64;
1664 let wr = r * b;
1665 for k in 0..b {
1666 acc += scratch.w_flat[wr + k] * scratch.rhs_buf[k];
1667 }
1668 scratch.w_u[r] = acc;
1669 }
1670 lu_solve_in_place(
1672 &scratch.imwa,
1673 &scratch.perm_imwa,
1674 &mut scratch.w_u,
1675 &mut scratch.lu_scratch,
1676 b,
1677 );
1678 let mut v_dd = 0.0f64;
1680 for k in 0..b {
1681 v_dd += scratch.a_i[row_off + k] * scratch.w_u[k];
1682 }
1683 scratch.var_diag_buf[d] = v_dd.max(0.0);
1684 }
1685 let mut var_diag = Array1::<f64>::zeros(b);
1686 for d in 0..b {
1687 var_diag[d] = scratch.var_diag_buf[d];
1688 }
1689 alo_variance.push(var_diag);
1690 }
1691
1692 Ok(MultiBlockAloChunkDiagnostics {
1693 chunk_start,
1694 eta_tilde,
1695 leverage,
1696 alo_variance,
1697 cook_distance,
1698 })
1699}
1700
1701#[inline]
1703fn mat_mul_flat(a: &[f64], b_mat: &[f64], out: &mut [f64], b: usize) {
1704 for r in 0..b {
1705 let ar = r * b;
1706 let or = r * b;
1707 for c in 0..b {
1708 let mut acc = 0.0f64;
1709 for k in 0..b {
1710 acc += a[ar + k] * b_mat[k * b + c];
1711 }
1712 out[or + c] = acc;
1713 }
1714 }
1715}
1716
1717fn lu_factor_in_place(m: &mut [f64], perm: &mut [usize], b: usize) -> bool {
1724 for i in 0..b {
1725 perm[i] = i;
1726 }
1727 for col in 0..b {
1728 let mut max_val = m[col * b + col].abs();
1730 let mut max_idx = col;
1731 for row in (col + 1)..b {
1732 let v = m[row * b + col].abs();
1733 if v > max_val {
1734 max_val = v;
1735 max_idx = row;
1736 }
1737 }
1738 if max_val < LU_PIVOT_SINGULAR_TOL {
1739 return false;
1740 }
1741 if max_idx != col {
1742 for k in 0..b {
1744 m.swap(col * b + k, max_idx * b + k);
1745 }
1746 perm.swap(col, max_idx);
1747 }
1748 let pivot = m[col * b + col];
1749 for row in (col + 1)..b {
1750 let factor = m[row * b + col] / pivot;
1751 m[row * b + col] = factor; for k in (col + 1)..b {
1753 let upd = factor * m[col * b + k];
1754 m[row * b + k] -= upd;
1755 }
1756 }
1757 }
1758 true
1759}
1760
1761fn lu_solve_in_place(m: &[f64], perm: &[usize], rhs: &mut [f64], scratch: &mut [f64], b: usize) {
1764 let y = &mut scratch[..b];
1766 for row in 0..b {
1767 let mut s = rhs[perm[row]];
1768 for k in 0..row {
1769 s -= m[row * b + k] * y[k];
1770 }
1771 y[row] = s;
1772 }
1773 for row in (0..b).rev() {
1775 let mut s = y[row];
1776 for k in (row + 1)..b {
1777 s -= m[row * b + k] * rhs[k];
1778 }
1779 rhs[row] = s / m[row * b + row];
1780 }
1781}
1782
1783pub fn compute_multiblock_alo_leverages(
1791 n_obs: usize,
1792 n_blocks: usize,
1793 block_designs: &[Array2<f64>],
1794 penalized_hessian_inv: &Array2<f64>,
1795 block_weights: &[Array2<f64>],
1796) -> Result<Array1<f64>, EstimationError> {
1797 use rayon::prelude::*;
1798
1799 let n = n_obs;
1800 let b = n_blocks;
1801 let p_tot = penalized_hessian_inv.nrows();
1802
1803 let col_offsets = multiblock_col_offsets(block_designs);
1804 let max_workers = rayon::current_num_threads();
1805 let chunk_size = multiblock_alo_parallel_leverage_chunk_size(p_tot, b, n, max_workers);
1806
1807 let mut leverage = Array1::<f64>::zeros(n);
1808
1809 let block_widths: Vec<usize> = block_designs.iter().map(|d| d.ncols()).collect();
1813 let mut h_stripes: Vec<FaerMat<f64>> = block_widths
1814 .iter()
1815 .map(|&p_blk| FaerMat::<f64>::zeros(p_tot, p_blk))
1816 .collect();
1817 for blk in 0..b {
1820 let off_b = col_offsets[blk];
1821 let p_blk = block_widths[blk];
1822 let stripe = &mut h_stripes[blk];
1823 for c in 0..p_blk {
1824 for r in 0..p_tot {
1825 stripe[(r, c)] = penalized_hessian_inv[(r, off_b + c)];
1826 }
1827 }
1828 }
1829
1830 leverage
1831 .as_slice_mut()
1832 .expect("newly allocated Array1 is contiguous")
1833 .par_chunks_mut(chunk_size)
1834 .enumerate()
1835 .for_each(|(chunk_idx, leverage_chunk)| {
1836 let chunk_start = chunk_idx * chunk_size;
1837 let chunk_len = leverage_chunk.len();
1838 let chunk_end = chunk_start + chunk_len;
1839
1840 let bb_sz = b * b;
1844 let mut a_i = vec![0.0f64; bb_sz];
1845 let mut aw = vec![0.0f64; bb_sz];
1846 let mut w_flat = vec![0.0f64; bb_sz];
1847
1848 let mut q_storage: Vec<FaerMat<f64>> = block_widths
1852 .iter()
1853 .map(|_| FaerMat::<f64>::zeros(p_tot, chunk_len))
1854 .collect();
1855
1856 let mut xt_storage: Vec<FaerMat<f64>> = block_widths
1860 .iter()
1861 .map(|&p_blk| FaerMat::<f64>::zeros(p_blk, chunk_len))
1862 .collect();
1863
1864 for blk in 0..b {
1869 let p_blk = block_widths[blk];
1870
1871 let x_chunk = block_designs[blk].slice(s![chunk_start..chunk_end, ..]);
1872 let xt = &mut xt_storage[blk];
1873 for local_i in 0..chunk_len {
1874 let row = x_chunk.row(local_i);
1875 for j in 0..p_blk {
1876 xt[(j, local_i)] = row[j];
1877 }
1878 }
1879
1880 matmul(
1881 q_storage[blk].as_mut(),
1882 Accum::Replace,
1883 h_stripes[blk].as_ref(),
1884 xt_storage[blk].as_ref(),
1885 1.0,
1886 Par::Seq,
1887 );
1888 }
1889
1890 for local_i in 0..chunk_len {
1891 let i = chunk_start + local_i;
1892 let w_i = &block_weights[i];
1893
1894 for r in 0..b {
1896 for c in 0..b {
1897 w_flat[r * b + c] = w_i[(r, c)];
1898 }
1899 }
1900
1901 for r in 0..bb_sz {
1905 a_i[r] = 0.0;
1906 }
1907 for k in 0..b {
1908 let q_k = &q_storage[k];
1909 let q_col = q_k.col_as_slice(local_i);
1910 for a in 0..b {
1911 let p_a = block_widths[a];
1912 let off_a = col_offsets[a];
1913 let xa_row = block_designs[a].row(i);
1914 let mut dot = 0.0f64;
1915 for j in 0..p_a {
1916 dot = xa_row[j].mul_add(q_col[off_a + j], dot);
1917 }
1918 a_i[a * b + k] = dot;
1919 }
1920 }
1921
1922 mat_mul_flat(&a_i, &w_flat, &mut aw, b);
1924 let mut tr = 0.0f64;
1925 for d in 0..b {
1926 tr += aw[d * b + d];
1927 }
1928 leverage_chunk[local_i] = tr;
1929 }
1930 });
1931
1932 Ok(leverage)
1933}
1934
1935#[cfg(test)]
1939mod tests {
1940 use super::{
1941 ALO_EXACT_SCALAR_MAX_ITERS, AloExactScalarError, AloInput, alo_eta_exact_frozen_curvature,
1942 alo_eta_updatewith_offset, bayesvar_eta, compute_alo_from_input_inner,
1943 percentile_from_sorted, percentile_index, sandwichvar_eta,
1944 };
1945 use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
1946 use gam_problem::LinkFunction;
1947
1948 #[test]
1949 fn alo_offset_update_matches_centered_algebra() {
1950 let eta_hat = 11.0;
1951 let z = 13.0;
1952 let offset = 10.0;
1953 let x_hinv_x = 0.2;
1954 let hessian_weight = 1.0;
1955 let score_weight = 1.0;
1956 let leverage = hessian_weight * x_hinv_x;
1958 let expected = offset + ((eta_hat - offset) - leverage * (z - offset)) / (1.0 - leverage);
1959 let got =
1960 alo_eta_updatewith_offset(eta_hat, z, offset, x_hinv_x, score_weight, 1.0 - leverage);
1961 assert!((got - expected).abs() < 1e-12);
1962 }
1963
1964 #[test]
1965 fn alo_offset_update_reduces_to_classicwhen_offsetzero() {
1966 let eta_hat = 1.25;
1967 let z = -0.5;
1968 let x_hinv_x = 0.35;
1969 let hessian_weight = 1.0;
1970 let score_weight = 1.0;
1971 let leverage = hessian_weight * x_hinv_x;
1972 let expected = (eta_hat - leverage * z) / (1.0 - leverage);
1973 let got =
1974 alo_eta_updatewith_offset(eta_hat, z, 0.0, x_hinv_x, score_weight, 1.0 - leverage);
1975 assert!((got - expected).abs() < 1e-12);
1976 }
1977
1978 #[test]
1979 fn alo_offset_update_uses_distinct_score_and_hessian_weights() {
1980 let eta_hat = 1.7;
1981 let z = 0.4;
1982 let offset = -0.2;
1983 let x_hinv_x = 0.15;
1984 let hessian_weight = 3.0;
1985 let score_weight = 5.0;
1986 let expected = offset
1987 + (eta_hat - offset)
1988 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset))
1989 / (1.0 - hessian_weight * x_hinv_x);
1990 let got = alo_eta_updatewith_offset(
1991 eta_hat,
1992 z,
1993 offset,
1994 x_hinv_x,
1995 score_weight,
1996 1.0 - hessian_weight * x_hinv_x,
1997 );
1998 assert!((got - expected).abs() < 1e-12);
1999 }
2000
2001 #[test]
2002 fn alo_offset_update_handles_zero_hessian_weight() {
2003 let eta_hat = 0.8;
2004 let z = -0.3;
2005 let offset = 0.1;
2006 let x_hinv_x = 0.4;
2007 let hessian_weight = 0.0;
2008 let score_weight = 2.5;
2009 let expected = offset
2010 + (eta_hat - offset)
2011 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset));
2012 let got = alo_eta_updatewith_offset(
2013 eta_hat,
2014 z,
2015 offset,
2016 x_hinv_x,
2017 score_weight,
2018 1.0 - hessian_weight * x_hinv_x,
2019 );
2020 assert!((got - expected).abs() < 1e-12);
2021 }
2022
2023 #[test]
2024 fn alo_exact_frozen_curvature_converges_to_fixed_point() {
2025 let eta_hat = 1.0;
2026 let a_ii = 0.4;
2027 let got = alo_eta_exact_frozen_curvature(eta_hat, a_ii, &|eta| (0.5 * (eta - 2.0), 0.5))
2028 .expect("linear scalar fixed point should converge in one Newton step");
2029 assert!((got - 0.75).abs() < 1e-12);
2030 }
2031
2032 #[test]
2033 fn alo_exact_frozen_curvature_reports_nonconvergence() {
2034 let err = alo_eta_exact_frozen_curvature(0.0, 1.0, &|eta| (eta + 1.0, 0.0))
2035 .expect_err("constant residual should exhaust the scalar iteration budget");
2036 let AloExactScalarError::MaxIterations { iterations, .. } = err else {
2037 panic!("constant residual must report MaxIterations, got {err:?}");
2038 };
2039 assert_eq!(
2040 iterations, ALO_EXACT_SCALAR_MAX_ITERS,
2041 "non-convergence must report the full scalar iteration budget"
2042 );
2043 }
2044
2045 #[test]
2046 fn alo_input_reports_exact_scalar_nonconvergence_with_row_context() {
2047 let design = Array2::from_elem((1, 1), 1.0);
2048 let penalized_hessian = Array2::from_elem((1, 1), 1.0);
2049 let hessian_weights = Array1::from_vec(vec![0.0]);
2050 let score_weights = Array1::from_vec(vec![0.0]);
2051 let working_response = Array1::from_vec(vec![0.0]);
2052 let eta = Array1::from_vec(vec![0.0]);
2053 let offset = Array1::from_vec(vec![0.0]);
2054 let score_curvature = |_: usize, eta: f64| (eta + 1.0, 0.0);
2055 let input = AloInput {
2056 design: &design,
2057 penalized_hessian: &penalized_hessian,
2058 hessian_weights: SignedWeightsView::from_array(&hessian_weights),
2059 score_weights: PsdWeightsView::try_from_array(&score_weights).expect("psd weights"),
2060 working_response: &working_response,
2061 eta: &eta,
2062 offset: &offset,
2063 link: LinkFunction::Logit,
2064 phi: 1.0,
2065 penalty_root: None,
2066 ridge: 0.0,
2067 score_curvature: Some(&score_curvature),
2068 };
2069
2070 let err =
2071 compute_alo_from_input_inner(&input).expect_err("non-converged exact ALO must error");
2072 let msg = err.to_string();
2073 assert!(
2074 msg.contains("ALO exact frozen-curvature solve failed at row 0"),
2075 "missing row context in exact ALO error: {msg}"
2076 );
2077 assert!(
2078 msg.contains("did not converge within"),
2079 "missing non-convergence cause in exact ALO error: {msg}"
2080 );
2081 }
2082
2083 #[test]
2084 fn gaussian_unpenalized_sandwich_equals_bayes() {
2085 let phi = 2.5;
2088 let x_hinv_x = 0.3;
2089 let es_norm2 = 0.0;
2090 let ridge = 0.0;
2091 let s_norm2 = 0.0;
2092 let vb = bayesvar_eta(phi, x_hinv_x);
2093 let vs = sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2);
2094 assert!((vb - vs).abs() < 1e-12);
2095 }
2096
2097 #[test]
2098 fn sandwich_matches_direct_linear_gaussian_formula() {
2099 let phi = 1.7;
2102 let x_hinv_x = 0.41;
2103 let es_norm2 = 0.05;
2104 let ridge = 1e-3;
2105 let s_norm2 = 2.0;
2106 let got = sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2);
2107 let expected = phi * (x_hinv_x - es_norm2 - ridge * s_norm2);
2108 assert!((got - expected).abs() < 1e-12);
2109 }
2110
2111 #[test]
2112 fn percentile_index_matches_expected_rounding() {
2113 assert_eq!(percentile_index(0, 0.95), 0);
2114 assert_eq!(percentile_index(1, 0.95), 0);
2115 assert_eq!(percentile_index(10, 0.50), 5);
2116 assert_eq!(percentile_index(10, 0.95), 9);
2117 }
2118
2119 #[test]
2120 fn percentile_from_sorted_returns_order_statistic() {
2121 let values = [1.0, 2.0, 3.0, 4.0, 5.0];
2122 assert_eq!(percentile_from_sorted(&values, 0.50), 3.0);
2123 assert_eq!(percentile_from_sorted(&values, 0.95), 5.0);
2124 assert_eq!(percentile_from_sorted(&[], 0.95), 0.0);
2125 }
2126
2127 use super::{MultiBlockAloInput, compute_multiblock_alo, compute_multiblock_alo_leverages};
2130 use ndarray::{Array1, Array2};
2131
2132 #[test]
2133 fn multiblock_b1_matches_scalar_leverage() {
2134 let n = 3;
2137 let p = 2;
2138 let x = Array2::from_shape_vec((n, p), vec![1.0, 0.5, 0.8, -0.3, 0.2, 1.1]).unwrap();
2139 let w = [1.0, 2.0, 0.5];
2141 let mut h = Array2::<f64>::eye(p);
2142 for i in 0..n {
2143 for r in 0..p {
2144 for c in 0..p {
2145 h[(r, c)] += w[i] * x[(i, r)] * x[(i, c)];
2146 }
2147 }
2148 }
2149 let det = h[(0, 0)] * h[(1, 1)] - h[(0, 1)] * h[(1, 0)];
2151 let mut h_inv = Array2::<f64>::zeros((p, p));
2152 h_inv[(0, 0)] = h[(1, 1)] / det;
2153 h_inv[(1, 1)] = h[(0, 0)] / det;
2154 h_inv[(0, 1)] = -h[(0, 1)] / det;
2155 h_inv[(1, 0)] = -h[(1, 0)] / det;
2156
2157 let mut scalar_lev = vec![0.0f64; n];
2159 for i in 0..n {
2160 let mut xhx = 0.0;
2161 for r in 0..p {
2162 for c in 0..p {
2163 xhx += x[(i, r)] * h_inv[(r, c)] * x[(i, c)];
2164 }
2165 }
2166 scalar_lev[i] = w[i] * xhx;
2167 }
2168
2169 let block_designs = vec![x.clone()];
2171 let block_weights: Vec<Array2<f64>> =
2172 w.iter().map(|&wi| Array2::from_elem((1, 1), wi)).collect();
2173 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.1])).collect();
2174 let eta_hat: Vec<Array1<f64>> = (0..n).map(|i| Array1::from_vec(vec![i as f64])).collect();
2175
2176 let input = MultiBlockAloInput {
2177 n_obs: n,
2178 n_blocks: 1,
2179 block_designs: &block_designs,
2180 penalized_hessian_inv: &h_inv,
2181 block_weights,
2182 scores,
2183 eta_hat,
2184 };
2185
2186 let result = compute_multiblock_alo(&input).unwrap();
2187 for i in 0..n {
2188 assert!(
2189 (result.leverage[i] - scalar_lev[i]).abs() < 1e-10,
2190 "leverage mismatch at i={}: got {}, expected {}",
2191 i,
2192 result.leverage[i],
2193 scalar_lev[i]
2194 );
2195 }
2196 }
2197
2198 #[test]
2199 fn multiblock_leverage_only_matches_full() {
2200 let n = 4;
2203 let p1 = 2;
2204 let p2 = 3;
2205 let x1 = Array2::from_shape_fn((n, p1), |(i, j)| (i + j + 1) as f64 * 0.3);
2206 let x2 = Array2::from_shape_fn((n, p2), |(i, j)| (i * 2 + j) as f64 * 0.2 - 0.1);
2207 let p_tot = p1 + p2;
2208 let h_inv = Array2::<f64>::eye(p_tot); let block_weights: Vec<Array2<f64>> = (0..n)
2210 .map(|i| {
2211 let v = (i + 1) as f64;
2212 Array2::from_shape_vec((2, 2), vec![v, 0.1, 0.1, v * 0.5]).unwrap()
2213 })
2214 .collect();
2215 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2216 let eta_hat: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2217 let block_designs = vec![x1.clone(), x2.clone()];
2218
2219 let input = MultiBlockAloInput {
2220 n_obs: n,
2221 n_blocks: 2,
2222 block_designs: &block_designs,
2223 penalized_hessian_inv: &h_inv,
2224 block_weights: block_weights.clone(),
2225 scores,
2226 eta_hat,
2227 };
2228 let full = compute_multiblock_alo(&input).unwrap();
2229 let lev_only =
2230 compute_multiblock_alo_leverages(n, 2, &block_designs, &h_inv, &block_weights).unwrap();
2231
2232 for i in 0..n {
2233 assert!(
2234 (full.leverage[i] - lev_only[i]).abs() < 1e-12,
2235 "leverage mismatch at i={}: full={}, lev_only={}",
2236 i,
2237 full.leverage[i],
2238 lev_only[i]
2239 );
2240 }
2241 }
2242
2243 #[test]
2244 fn multiblock_singular_weight_still_corrects() {
2245 let n = 1;
2249 let p = 2;
2250 let x = Array2::from_shape_vec((1, p), vec![1.0, 0.5]).unwrap();
2251 let h_inv = Array2::eye(p);
2252 let block_designs = vec![x.clone()];
2253 let block_weights = vec![Array2::from_elem((1, 1), 0.0)]; let scores = vec![Array1::from_vec(vec![1.0])];
2255 let eta_hat = vec![Array1::from_vec(vec![std::f64::consts::PI])];
2256
2257 let input = MultiBlockAloInput {
2258 n_obs: n,
2259 n_blocks: 1,
2260 block_designs: &block_designs,
2261 penalized_hessian_inv: &h_inv,
2262 block_weights,
2263 scores,
2264 eta_hat,
2265 };
2266 let result = compute_multiblock_alo(&input).unwrap();
2267 let expected = std::f64::consts::PI + 1.25;
2269 assert!(
2270 (result.eta_tilde[0][0] - expected).abs() < 1e-12,
2271 "expected {}, got {}",
2272 expected,
2273 result.eta_tilde[0][0]
2274 );
2275 assert!(result.cook_distance[0].abs() < 1e-14);
2277 assert!(result.alo_variance[0][0].abs() < 1e-14);
2279 }
2280
2281 #[test]
2282 fn multiblock_cook_and_variance_basic() {
2283 let n = 1;
2285 let x = Array2::from_elem((1, 1), 1.0);
2286 let h_inv = Array2::from_elem((1, 1), 0.5);
2288 let block_designs = vec![x.clone()];
2289 let w_val = 2.0;
2290 let s_val = 0.4;
2291 let block_weights = vec![Array2::from_elem((1, 1), w_val)];
2292 let scores = vec![Array1::from_vec(vec![s_val])];
2293 let eta_hat = vec![Array1::from_vec(vec![1.0])];
2294
2295 let input = MultiBlockAloInput {
2296 n_obs: n,
2297 n_blocks: 1,
2298 block_designs: &block_designs,
2299 penalized_hessian_inv: &h_inv,
2300 block_weights,
2301 scores,
2302 eta_hat,
2303 };
2304 let result = compute_multiblock_alo(&input).unwrap();
2305
2306 assert!(result.eta_tilde[0][0].is_finite());
2313 assert!(result.cook_distance[0].is_finite());
2314 assert!(result.alo_variance[0][0].is_finite());
2315 }
2316}