1use crate::estimate::EstimationError;
2use crate::estimate::{FitGeometry, UnifiedFitResult};
3use crate::pirls;
4use gam_linalg::faer_ndarray::{FaerArrayView, FaerCholesky};
5use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
6use gam_linalg::utils::StableSolver;
7use gam_problem::LinkFunction;
8use faer::Mat as FaerMat;
9use faer::linalg::matmul::matmul;
10use faer::prelude::ReborrowMut;
11use faer::{Accum, Par};
12use ndarray::{Array1, Array2, ArrayView1, ShapeBuilder, s};
13use std::fmt;
14
15#[derive(Debug, Clone)]
24pub enum AloError {
25 InvalidInput { reason: String },
29 WeightInvalid { reason: String },
32 DesignDegenerate { reason: String },
35 InfluenceMatrixFailed { condition_number: f64 },
38 LooComputationFailed { reason: String },
41}
42
43impl fmt::Display for AloError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 match self {
46 AloError::InvalidInput { reason }
47 | AloError::WeightInvalid { reason }
48 | AloError::DesignDegenerate { reason }
49 | AloError::LooComputationFailed { reason } => f.write_str(reason),
50 AloError::InfluenceMatrixFailed { condition_number } => {
51 write!(
52 f,
53 "ALO influence matrix failed (condition number {condition_number:.3e})"
54 )
55 }
56 }
57 }
58}
59
60impl std::error::Error for AloError {}
61
62impl From<AloError> for EstimationError {
63 fn from(err: AloError) -> EstimationError {
64 match err {
65 AloError::InvalidInput { reason }
66 | AloError::WeightInvalid { reason }
67 | AloError::DesignDegenerate { reason }
68 | AloError::LooComputationFailed { reason } => EstimationError::InvalidInput(reason),
69 AloError::InfluenceMatrixFailed { condition_number } => {
70 EstimationError::ModelIsIllConditioned { condition_number }
71 }
72 }
73 }
74}
75
76impl From<AloError> for String {
77 fn from(err: AloError) -> String {
78 err.to_string()
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct AloDiagnostics {
85 pub eta_tilde: Array1<f64>,
86 pub se_bayes: Array1<f64>,
89 pub se_sandwich: Array1<f64>,
92 pub pred_identity: Array1<f64>,
93 pub leverage: Array1<f64>,
94 pub fisherweights: Array1<f64>,
95}
96
97#[inline]
98fn alo_eta_updatewith_offset(
99 eta_hat: f64,
100 z: f64,
101 offset: f64,
102 x_hinv_x: f64,
103 score_weight: f64,
104 denom: f64,
105) -> f64 {
106 let eta_centered = eta_hat - offset;
109 let z_centered = z - offset;
110 let score = score_weight * (eta_centered - z_centered);
111 offset + eta_centered + x_hinv_x * score / denom
112}
113
114pub type AloScalarScoreCurvature<'a> = dyn Fn(usize, f64) -> (f64, f64) + Sync + 'a;
124
125const ALO_EXACT_SCALAR_MAX_ITERS: usize = 64;
131
132const ALO_EXACT_SCALAR_TOL: f64 = 1e-12;
136
137#[derive(Debug, Clone, Copy, PartialEq)]
158enum AloExactScalarError {
159 NonFiniteScoreCurvature {
160 eta: f64,
161 ell_prime: f64,
162 ell_double: f64,
163 },
164 DegenerateJacobian {
165 eta: f64,
166 jacobian: f64,
167 },
168 NonFiniteStep {
169 eta: f64,
170 residual: f64,
171 jacobian: f64,
172 next: f64,
173 },
174 MaxIterations {
175 iterations: usize,
176 residual: f64,
177 eta: f64,
178 },
179}
180
181impl fmt::Display for AloExactScalarError {
182 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
183 match *self {
184 AloExactScalarError::NonFiniteScoreCurvature {
185 eta,
186 ell_prime,
187 ell_double,
188 } => write!(
189 f,
190 "non-finite score/curvature at eta={eta:.6e}: ell_prime={ell_prime:.6e}, ell_double={ell_double:.6e}"
191 ),
192 AloExactScalarError::DegenerateJacobian { eta, jacobian } => write!(
193 f,
194 "degenerate Newton Jacobian at eta={eta:.6e}: jacobian={jacobian:.6e}, min={ALO_DENOMINATOR_MIN:.1e}"
195 ),
196 AloExactScalarError::NonFiniteStep {
197 eta,
198 residual,
199 jacobian,
200 next,
201 } => write!(
202 f,
203 "non-finite Newton step from eta={eta:.6e}: residual={residual:.6e}, jacobian={jacobian:.6e}, next={next:.6e}"
204 ),
205 AloExactScalarError::MaxIterations {
206 iterations,
207 residual,
208 eta,
209 } => write!(
210 f,
211 "did not converge within {iterations} iterations: residual={residual:.6e}, eta={eta:.6e}, tol={ALO_EXACT_SCALAR_TOL:.1e}"
212 ),
213 }
214 }
215}
216
217const ALO_EXACT_SCALAR_BACKTRACKS: usize = 40;
223
224#[inline]
225fn alo_eta_exact_frozen_curvature(
226 eta_hat: f64,
227 a_ii: f64,
228 score_curvature: &dyn Fn(f64) -> (f64, f64),
229) -> Result<f64, AloExactScalarError> {
230 let residual_and_jac = |eta: f64| -> Result<(f64, f64), AloExactScalarError> {
254 let (ell_prime, ell_double) = score_curvature(eta);
255 if !ell_prime.is_finite() || !ell_double.is_finite() {
256 return Err(AloExactScalarError::NonFiniteScoreCurvature {
257 eta,
258 ell_prime,
259 ell_double,
260 });
261 }
262 Ok((eta - eta_hat - a_ii * ell_prime, 1.0 - a_ii * ell_double))
263 };
264
265 let mut eta = eta_hat;
266 let (mut residual, mut jac) = residual_and_jac(eta)?;
267 for _ in 0..ALO_EXACT_SCALAR_MAX_ITERS {
268 if residual.abs() <= ALO_EXACT_SCALAR_TOL {
269 return Ok(eta);
270 }
271 if jac.abs() <= ALO_DENOMINATOR_MIN || !jac.is_finite() {
272 return Err(AloExactScalarError::DegenerateJacobian { eta, jacobian: jac });
273 }
274 let step = residual / jac;
275 if !step.is_finite() {
276 return Err(AloExactScalarError::NonFiniteStep {
277 eta,
278 residual,
279 jacobian: jac,
280 next: eta - step,
281 });
282 }
283 let mut t = 1.0;
288 let mut advanced = false;
289 for _ in 0..ALO_EXACT_SCALAR_BACKTRACKS {
290 let trial = eta - t * step;
291 if let Ok((r_trial, j_trial)) = residual_and_jac(trial) {
292 if r_trial.abs() < residual.abs() {
293 eta = trial;
294 residual = r_trial;
295 jac = j_trial;
296 advanced = true;
297 break;
298 }
299 }
300 t *= 0.5;
301 }
302 if !advanced {
303 break;
304 }
305 }
306 Err(AloExactScalarError::MaxIterations {
307 iterations: ALO_EXACT_SCALAR_MAX_ITERS,
308 residual,
309 eta,
310 })
311}
312
313#[inline]
314fn bayesvar_eta(phi: f64, x_hinv_x: f64) -> f64 {
315 phi * x_hinv_x
316}
317
318#[inline]
319fn sandwichvar_eta(phi: f64, x_hinv_x: f64, es_norm2: f64, ridge: f64, s_norm2: f64) -> f64 {
320 phi * (x_hinv_x - es_norm2 - ridge * s_norm2)
324}
325
326#[inline]
327fn variance_negative_tolerance(scale: f64) -> f64 {
328 1e-12 * scale.abs().max(1.0)
330}
331
332const LEVERAGE_HIGH_THRESHOLD: f64 = 0.99;
333const LEVERAGE_VERY_HIGH_THRESHOLD: f64 = 0.999;
334const LEVERAGE_RATE_THRESHOLDS: [f64; 3] = [0.90, 0.95, 0.99];
335const LEVERAGE_PERCENTILES: [f64; 3] = [0.50, 0.95, 0.99];
336const ALO_DENOMINATOR_MIN: f64 = 1e-12;
337const MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES: usize = 256 * 1024 * 1024;
338
339const ALO_RHS_BLOCK_COLS: usize = 8192;
344
345const HESSIAN_SYMMETRY_REL_TOL: f64 = 1e-8;
351
352const ALO_LOCAL_BLOCK_RIDGE: f64 = 1e-6;
358
359const LU_PIVOT_SINGULAR_TOL: f64 = 1e-12;
364
365#[inline]
366fn percentile_index(sample_size: usize, quantile: f64) -> usize {
367 if sample_size <= 1 {
368 return 0;
369 }
370 let max_index = sample_size - 1;
371 ((quantile * max_index as f64).round() as usize).min(max_index)
372}
373
374#[inline]
375fn percentile_from_sorted(sorted: &[f64], quantile: f64) -> f64 {
376 if sorted.is_empty() {
377 0.0
378 } else {
379 sorted[percentile_index(sorted.len(), quantile)]
380 }
381}
382
383#[inline]
384fn multiblock_col_offsets(block_designs: &[Array2<f64>]) -> Vec<usize> {
385 let mut offsets = Vec::with_capacity(block_designs.len());
386 let mut off = 0usize;
387 for design in block_designs {
388 offsets.push(off);
389 off += design.ncols();
390 }
391 offsets
392}
393
394#[inline]
395fn multiblock_alo_parallel_leverage_chunk_size(
396 p_tot: usize,
397 n_blocks: usize,
398 n_obs: usize,
399 max_workers: usize,
400) -> usize {
401 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
402 return 1;
403 }
404
405 let workers = max_workers.max(1);
411 let per_worker_budget = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / workers).max(1);
412 let elem_count_per_obs = p_tot.saturating_mul(n_blocks.saturating_add(1)).max(1);
413 let bytes_per_obs = elem_count_per_obs
414 .saturating_mul(std::mem::size_of::<f64>())
415 .max(1);
416 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
417 budget_obs.min(n_obs)
418}
419
420fn compute_alo_diagnostics_from_pirls_impl(
421 base: &pirls::PirlsResult,
422 y: ArrayView1<f64>,
423 link: LinkFunction,
424) -> Result<AloDiagnostics, EstimationError> {
425 compute_alo_diagnostics_from_pirls_inner(base, y, link).map_err(EstimationError::from)
426}
427
428fn alo_link_needs_exact_curvature_refinement(likelihood: &gam_problem::GlmLikelihoodSpec) -> bool {
441 use gam_problem::ResponseFamily;
442 matches!(
443 (&likelihood.spec.response, likelihood.link_function()),
444 (ResponseFamily::Binomial, LinkFunction::Logit)
445 | (ResponseFamily::Poisson, LinkFunction::Log)
446 )
447}
448
449fn compute_alo_diagnostics_from_pirls_inner(
450 base: &pirls::PirlsResult,
451 y: ArrayView1<f64>,
452 link: LinkFunction,
453) -> Result<AloDiagnostics, AloError> {
454 let x_dense_arc = base
455 .x_transformed
456 .try_to_dense_arc("ALO diagnostics require dense transformed design")
457 .map_err(|reason| AloError::DesignDegenerate { reason })?;
458 let x_dense = x_dense_arc.as_ref();
459 let n = x_dense.nrows();
460
461 let phi = match link {
463 LinkFunction::Log => 1.0,
464 LinkFunction::Logit
465 | LinkFunction::Probit
466 | LinkFunction::CLogLog
467 | LinkFunction::Sas
468 | LinkFunction::BetaLogistic => 1.0,
469 LinkFunction::Identity => {
470 use rayon::iter::{IntoParallelIterator, ParallelIterator};
471 let rss: f64 = (0..n)
472 .into_par_iter()
473 .map(|i| {
474 let r = y[i] - base.finalmu[i];
475 base.finalweights[i] * r * r
476 })
477 .sum();
478 let n_pos = (0..n).filter(|&i| base.finalweights[i] > 0.0).count();
485 let dof = (n_pos as f64) - base.edf;
486 let denom = dof.max(1.0);
487 rss / denom
488 }
489 };
490
491 let e = &base.reparam_result.e_transformed;
492 let ridge = base.ridge_passport.laplacehessianridge().max(0.0);
493
494 let h_dense_for_alo = base
498 .dense_stabilizedhessian_transformed(
499 "ALO diagnostics require exact dense stabilized penalized Hessian",
500 )
501 .map_err(|e| match e {
502 EstimationError::InvalidInput(reason) => AloError::InvalidInput { reason },
503 other => AloError::InvalidInput {
504 reason: format!("{other:?}"),
505 },
506 })?;
507
508 let canonical_scale: Option<Array1<f64>> =
527 if alo_link_needs_exact_curvature_refinement(&base.likelihood) {
528 let mut c = Array1::<f64>::zeros(n);
529 for i in 0..n {
530 let dmu = base.solve_dmu_deta[i];
531 let w_h = base.finalweights[i];
532 c[i] = if dmu.abs() <= ALO_DENOMINATOR_MIN || !dmu.is_finite() || !w_h.is_finite() {
533 f64::NAN
534 } else {
535 w_h / dmu
536 };
537 }
538 Some(c)
539 } else {
540 None
541 };
542
543 let inv_link_for_closure = base.likelihood.spec.link.clone();
544 let score_curvature_closure = canonical_scale.as_ref().map(|scale| {
545 move |i: usize, eta: f64| -> (f64, f64) {
546 let (mu, dmu) = crate::mixture_link::inverse_link_mu_d1_for_inverse_link(
547 &inv_link_for_closure,
548 eta,
549 )
550 .unwrap_or((f64::NAN, f64::NAN));
551 let c_i = scale[i];
552 (c_i * (mu - y[i]), c_i * dmu)
553 }
554 });
555 let score_curvature_ref: Option<&AloScalarScoreCurvature> = score_curvature_closure
556 .as_ref()
557 .map(|f| f as &AloScalarScoreCurvature);
558
559 let input = AloInput {
561 design: x_dense,
562 penalized_hessian: &h_dense_for_alo,
563 hessian_weights: base.final_weights_signed(),
564 score_weights: base.solve_weights_psd(),
565 working_response: &base.solveworking_response,
566 eta: &base.final_eta,
567 offset: &base.final_offset,
568 link,
569 phi,
570 penalty_root: if e.nrows() > 0 { Some(e) } else { None },
571 ridge,
572 score_curvature: score_curvature_ref,
573 };
574
575 let result = compute_alo_from_input_inner(&input)?;
576
577 log_leverage_diagnostics(&result.leverage, phi);
579
580 let has_nan_pred = result.eta_tilde.iter().any(|&x| x.is_nan());
582 let has_nan_se_bayes = result.se_bayes.iter().any(|&x| x.is_nan());
583 let has_nan_se_sandwich = result.se_sandwich.iter().any(|&x| x.is_nan());
584 let has_nan_leverage = result.leverage.iter().any(|&x| x.is_nan());
585
586 if has_nan_pred || has_nan_se_bayes || has_nan_se_sandwich || has_nan_leverage {
587 log::error!("[GAM ALO] NaN values found in ALO diagnostics:");
588 log::error!(
589 "[GAM ALO] eta_tilde: {} NaN values",
590 result.eta_tilde.iter().filter(|&&x| x.is_nan()).count()
591 );
592 log::error!(
593 "[GAM ALO] se_bayes: {} NaN values",
594 result.se_bayes.iter().filter(|&&x| x.is_nan()).count()
595 );
596 log::error!(
597 "[GAM ALO] se_sandwich: {} NaN values",
598 result.se_sandwich.iter().filter(|&&x| x.is_nan()).count()
599 );
600 log::error!(
601 "[GAM ALO] leverage: {} NaN values",
602 result.leverage.iter().filter(|&&x| x.is_nan()).count()
603 );
604 return Err(AloError::InfluenceMatrixFailed {
605 condition_number: f64::INFINITY,
606 });
607 }
608
609 Ok(result)
610}
611
612fn log_leverage_diagnostics(leverage: &Array1<f64>, phi: f64) {
614 let n = leverage.len();
615 if n == 0 {
616 return;
617 }
618
619 let mut invalid_count = 0usize;
620 let mut high_leverage_count = 0usize;
621 let mut threshold_counts = [0usize; LEVERAGE_RATE_THRESHOLDS.len()];
622 let mut finite_leverage = Vec::with_capacity(n);
623
624 for (obs, &ai) in leverage.iter().enumerate() {
625 if ai.is_finite() {
626 finite_leverage.push(ai);
627 }
628
629 if !(0.0..=1.0).contains(&ai) || !ai.is_finite() {
630 invalid_count += 1;
631 log::warn!("[GAM ALO] invalid leverage at i={}, a_ii={:.6e}", obs, ai);
632 } else if ai > LEVERAGE_HIGH_THRESHOLD {
633 high_leverage_count += 1;
634 if ai > LEVERAGE_VERY_HIGH_THRESHOLD {
635 log::warn!("[GAM ALO] very high leverage at i={}, a_ii={:.6e}", obs, ai);
636 }
637 }
638
639 for (idx, threshold) in LEVERAGE_RATE_THRESHOLDS.iter().enumerate() {
640 if ai > *threshold {
641 threshold_counts[idx] += 1;
642 }
643 }
644 }
645
646 if invalid_count > 0 || high_leverage_count > 0 {
647 log::warn!(
648 "[GAM ALO] leverage diagnostics: {} invalid values, {} high values (>0.99)",
649 invalid_count,
650 high_leverage_count
651 );
652 }
653
654 finite_leverage.sort_by(f64::total_cmp);
655
656 let finite_n = finite_leverage.len();
657 let a_mean = if finite_n > 0 {
658 finite_leverage.iter().copied().sum::<f64>() / finite_n as f64
659 } else {
660 0.0
661 };
662 let a_median = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[0]);
663 let a_p95 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[1]);
664 let a_p99 = percentile_from_sorted(&finite_leverage, LEVERAGE_PERCENTILES[2]);
665 let a_max = finite_leverage.last().copied().unwrap_or(0.0);
666
667 log::info!(
675 "[GAM ALO] leverage: n={}, mean={:.3e}, median={:.3e}, p95={:.3e}, p99={:.3e}, max={:.3e}",
676 n,
677 a_mean,
678 a_median,
679 a_p95,
680 a_p99,
681 a_max
682 );
683 log::info!(
684 "[GAM ALO] high-leverage: a>0.90: {:.2}%, a>0.95: {:.2}%, a>0.99: {:.2}%, dispersion phi={:.3e}",
685 100.0 * (threshold_counts[0] as f64) / n as f64,
686 100.0 * (threshold_counts[1] as f64) / n as f64,
687 100.0 * (threshold_counts[2] as f64) / n as f64,
688 phi
689 );
690}
691
692pub struct AloInput<'a> {
699 pub design: &'a Array2<f64>,
701 pub penalized_hessian: &'a Array2<f64>,
703 pub hessian_weights: SignedWeightsView<'a>,
710 pub score_weights: PsdWeightsView<'a>,
713 pub working_response: &'a Array1<f64>,
715 pub eta: &'a Array1<f64>,
717 pub offset: &'a Array1<f64>,
719 pub link: LinkFunction,
721 pub phi: f64,
723 pub penalty_root: Option<&'a Array2<f64>>,
726 pub ridge: f64,
728 pub score_curvature: Option<&'a AloScalarScoreCurvature<'a>>,
741}
742
743impl<'a> AloInput<'a> {
744 pub fn from_geometry(
746 geom: &'a FitGeometry,
747 design: &'a Array2<f64>,
748 eta: &'a Array1<f64>,
749 offset: &'a Array1<f64>,
750 link: LinkFunction,
751 phi: f64,
752 ) -> Self {
753 let psd_w = PsdWeightsView::from_view_unchecked(geom.working_weights.view());
760 Self {
761 design,
762 penalized_hessian: &geom.penalized_hessian,
763 hessian_weights: psd_w.as_signed(),
764 score_weights: psd_w,
765 working_response: &geom.working_response,
766 eta,
767 offset,
768 link,
769 phi,
770 penalty_root: None,
771 ridge: 0.0,
772 score_curvature: None,
773 }
774 }
775}
776
777pub fn compute_alo_from_input(input: &AloInput) -> Result<AloDiagnostics, EstimationError> {
783 compute_alo_from_input_inner(input).map_err(EstimationError::from)
784}
785
786fn compute_alo_from_input_inner(input: &AloInput) -> Result<AloDiagnostics, AloError> {
787 let x_dense = input.design;
788 let n = x_dense.nrows();
789 let p = x_dense.ncols();
790 let w_h = input.hessian_weights.view();
794 let w_s = input.score_weights.view();
795
796 validate_alo_solve_setup(input, n, p)?;
797
798 let factor = StableSolver::new("alo penalized hessian")
799 .factorize(input.penalized_hessian)
800 .map_err(|_| AloError::InfluenceMatrixFailed {
801 condition_number: f64::INFINITY,
802 })?;
803
804 let xt = x_dense.t();
805 let phi = input.phi;
806 let ridge = input.ridge;
807
808 let e_rank = input.penalty_root.map(|e| e.nrows()).unwrap_or(0);
809
810 let mut aii = Array1::<f64>::zeros(n);
811 let mut x_hinv_x_diag = Array1::<f64>::zeros(n);
812 let mut se_bayes = Array1::<f64>::zeros(n);
813 let mut se_sandwich = Array1::<f64>::zeros(n);
814
815 let block_cols = ALO_RHS_BLOCK_COLS;
816 let mut rhs_chunk_buf = Array2::<f64>::zeros((p, block_cols).f());
821 let mut es_chunk_storage = if e_rank > 0 {
825 FaerMat::<f64>::zeros(e_rank, block_cols)
826 } else {
827 FaerMat::<f64>::zeros(0, 0)
828 };
829
830 for chunk_start in (0..n).step_by(block_cols) {
831 let chunk_end = (chunk_start + block_cols).min(n);
832 let width = chunk_end - chunk_start;
833
834 rhs_chunk_buf
835 .slice_mut(s![.., ..width])
836 .assign(&xt.slice(s![.., chunk_start..chunk_end]));
837
838 let rhs_chunkview = rhs_chunk_buf.slice(s![.., ..width]);
839 let rhs_chunk = FaerArrayView::new(&rhs_chunkview);
840 let s_chunk = factor.solve(rhs_chunk.as_ref());
844
845 if e_rank > 0
846 && let Some(e) = input.penalty_root
847 {
848 let eview = FaerArrayView::new(e);
849 let mut es_target = es_chunk_storage.as_mut().subcols_mut(0, width);
852 matmul(
853 es_target.rb_mut(),
854 Accum::Replace,
855 eview.as_ref(),
856 s_chunk.as_ref(),
857 1.0,
858 Par::Seq,
859 );
860 }
861
862 let rhs_view = rhs_chunk_buf.slice(s![.., ..width]);
863
864 for local_col in 0..width {
865 let obs = chunk_start + local_col;
866 let rhs_col = rhs_view.column(local_col);
870 let rhs_slice = rhs_col.as_slice().expect("column-major col contiguous");
871 let s_slice = s_chunk.col_as_slice(local_col);
872
873 let mut x_hinv_x = 0.0f64;
874 let mut s_norm2 = 0.0f64;
875 for k in 0..p {
877 let sval = s_slice[k];
878 let xval = rhs_slice[k];
879 x_hinv_x = sval.mul_add(xval, x_hinv_x);
880 s_norm2 = sval.mul_add(sval, s_norm2);
881 }
882 let ai = w_h[obs].max(0.0) * x_hinv_x;
883 let mut es_norm2 = 0.0f64;
884 if e_rank > 0 {
885 let es_slice = es_chunk_storage.col_as_slice(local_col);
886 for r in 0..e_rank {
887 let v = es_slice[r];
888 es_norm2 = v.mul_add(v, es_norm2);
889 }
890 }
891 aii[obs] = ai;
892 x_hinv_x_diag[obs] = x_hinv_x;
893
894 let var_bayes = bayesvar_eta(phi, x_hinv_x);
895 let var_sandwich = if e_rank > 0 {
896 sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2)
897 } else {
898 var_bayes
899 };
900
901 if !var_bayes.is_finite() || !var_sandwich.is_finite() {
902 return Err(AloError::LooComputationFailed {
903 reason: format!(
904 "ALO variance is not finite at row {obs}: bayes={var_bayes:.6e}, sandwich={var_sandwich:.6e}"
905 ),
906 });
907 }
908 let bayes_tol = variance_negative_tolerance(phi * x_hinv_x.abs());
909 if var_bayes < -bayes_tol {
910 return Err(AloError::LooComputationFailed {
911 reason: format!(
912 "ALO Bayesian variance is materially negative at row {obs}: var={var_bayes:.6e}, tol={bayes_tol:.6e}"
913 ),
914 });
915 }
916 if e_rank > 0 {
917 let sandwich_scale =
918 phi * (x_hinv_x.abs() + es_norm2.abs() + (ridge * s_norm2).abs());
919 let sandwich_tol = variance_negative_tolerance(sandwich_scale);
920 if var_sandwich < -sandwich_tol {
921 return Err(AloError::LooComputationFailed {
922 reason: format!(
923 "ALO sandwich variance is materially negative at row {obs}: var={var_sandwich:.6e}, tol={sandwich_tol:.6e}"
924 ),
925 });
926 }
927 }
928
929 se_bayes[obs] = var_bayes.max(0.0).sqrt();
930 se_sandwich[obs] = var_sandwich.max(0.0).sqrt();
931 }
932 }
933
934 let eta_hat = input.eta;
935 let z = input.working_response;
936 let offset = input.offset;
937
938 use rayon::prelude::*;
939 let eta_tilde_vec: Vec<f64> = (0..n)
940 .into_par_iter()
941 .map(|i| {
942 let denom_raw = 1.0 - aii[i];
943 if denom_raw <= ALO_DENOMINATOR_MIN || !denom_raw.is_finite() {
944 return Err(AloError::LooComputationFailed {
945 reason: format!(
946 "ALO denominator is too small at row {i}: a_ii={:.6e}, 1-a_ii={:.6e}, min={:.1e}",
947 aii[i], denom_raw, ALO_DENOMINATOR_MIN
948 ),
949 });
950 }
951 let one_step = alo_eta_updatewith_offset(
952 eta_hat[i],
953 z[i],
954 offset[i],
955 x_hinv_x_diag[i],
956 w_s[i],
957 denom_raw,
958 );
959 let v = if let Some(score_curvature) = input.score_curvature {
967 alo_eta_exact_frozen_curvature(
968 eta_hat[i],
969 x_hinv_x_diag[i],
970 &|eta| score_curvature(i, eta),
971 )
972 .map_err(|err| AloError::LooComputationFailed {
973 reason: format!(
974 "ALO exact frozen-curvature solve failed at row {i}: {err}"
975 ),
976 })?
977 } else {
978 one_step
979 };
980 if !v.is_finite() {
981 return Err(AloError::LooComputationFailed {
982 reason: format!("ALO eta_tilde is not finite at row {i}: eta_tilde={v}"),
983 });
984 }
985 Ok(v)
986 })
987 .collect::<Result<_, _>>()?;
988 let eta_tilde = Array1::from(eta_tilde_vec);
989
990 Ok(AloDiagnostics {
991 eta_tilde,
992 se_bayes,
993 se_sandwich,
994 pred_identity: eta_hat.clone(),
995 leverage: aii,
996 fisherweights: w_h.to_owned(),
997 })
998}
999
1000fn validate_alo_solve_setup(input: &AloInput, n: usize, p: usize) -> Result<(), AloError> {
1001 let h = input.penalized_hessian;
1002 if h.nrows() != p || h.ncols() != p {
1003 return Err(AloError::InvalidInput {
1004 reason: format!(
1005 "ALO diagnostics require a dense exact penalized Hessian with shape {p}x{p}; got {}x{}",
1006 h.nrows(),
1007 h.ncols()
1008 ),
1009 });
1010 }
1011 if h.iter().any(|v| !v.is_finite()) {
1012 return Err(AloError::InvalidInput {
1013 reason: "ALO diagnostics require a finite dense exact penalized Hessian".to_string(),
1014 });
1015 }
1016 for i in 0..p {
1017 for j in 0..i {
1018 let a = h[[i, j]];
1019 let b = h[[j, i]];
1020 let scale = a.abs().max(b.abs()).max(1.0);
1021 if (a - b).abs() > HESSIAN_SYMMETRY_REL_TOL * scale {
1022 return Err(AloError::InvalidInput {
1023 reason: format!(
1024 "ALO diagnostics require a symmetric dense exact penalized Hessian; entries ({i},{j}) and ({j},{i}) differ by {:.3e}",
1025 (a - b).abs()
1026 ),
1027 });
1028 }
1029 }
1030 }
1031
1032 let vector_lengths = [
1033 ("hessian_weights", input.hessian_weights.len()),
1034 ("score_weights", input.score_weights.len()),
1035 ("working_response", input.working_response.len()),
1036 ("eta", input.eta.len()),
1037 ("offset", input.offset.len()),
1038 ];
1039 for (name, len) in vector_lengths {
1040 if len != n {
1041 return Err(AloError::InvalidInput {
1042 reason: format!("ALO diagnostics require {name} length {n}; got {len}"),
1043 });
1044 }
1045 }
1046 if input.hessian_weights.view().iter().any(|v| !v.is_finite()) {
1047 return Err(AloError::WeightInvalid {
1048 reason: "ALO diagnostics require finite Hessian-side weights".to_string(),
1049 });
1050 }
1051 if input.score_weights.view().iter().any(|v| !v.is_finite()) {
1052 return Err(AloError::WeightInvalid {
1053 reason: "ALO diagnostics require finite score-side weights".to_string(),
1054 });
1055 }
1056 if input.working_response.iter().any(|v| !v.is_finite()) {
1057 return Err(AloError::WeightInvalid {
1058 reason: "ALO diagnostics require finite working responses".to_string(),
1059 });
1060 }
1061 if input.eta.iter().any(|v| !v.is_finite()) || input.offset.iter().any(|v| !v.is_finite()) {
1062 return Err(AloError::InvalidInput {
1063 reason: "ALO diagnostics require finite linear predictors and offsets".to_string(),
1064 });
1065 }
1066 if !input.phi.is_finite() || input.phi <= 0.0 {
1067 return Err(AloError::InvalidInput {
1068 reason: format!(
1069 "ALO diagnostics require positive finite dispersion phi; got {}",
1070 input.phi
1071 ),
1072 });
1073 }
1074 if !input.ridge.is_finite() || input.ridge < 0.0 {
1075 return Err(AloError::InvalidInput {
1076 reason: format!(
1077 "ALO diagnostics require a finite non-negative Hessian ridge; got {}",
1078 input.ridge
1079 ),
1080 });
1081 }
1082 if let Some(e) = input.penalty_root {
1083 if e.ncols() != p {
1084 return Err(AloError::InvalidInput {
1085 reason: format!(
1086 "ALO diagnostics require penalty root to have {p} columns; got {}",
1087 e.ncols()
1088 ),
1089 });
1090 }
1091 if e.iter().any(|v| !v.is_finite()) {
1092 return Err(AloError::InvalidInput {
1093 reason: "ALO diagnostics require finite penalty-root entries".to_string(),
1094 });
1095 }
1096 }
1097 Ok(())
1098}
1099
1100pub fn compute_alo_diagnostics_from_fit(
1102 fit: &UnifiedFitResult,
1103 y: ArrayView1<f64>,
1104 link: LinkFunction,
1105) -> Result<AloDiagnostics, EstimationError> {
1106 let pirls = fit
1107 .artifacts
1108 .pirls
1109 .as_ref()
1110 .ok_or_else(|| AloError::InvalidInput {
1111 reason:
1112 "ALO diagnostics require a PIRLS-backed fit; this fit does not expose PIRLS geometry"
1113 .to_string(),
1114 })
1115 .map_err(EstimationError::from)?;
1116 compute_alo_diagnostics_from_pirls_impl(pirls, y, link)
1117}
1118
1119pub fn compute_alo_diagnostics_from_unified(
1125 unified: &UnifiedFitResult,
1126 design: &Array2<f64>,
1127 eta: &Array1<f64>,
1128 offset: &Array1<f64>,
1129 link: LinkFunction,
1130 phi: f64,
1131) -> Result<AloDiagnostics, EstimationError> {
1132 let geom = unified
1133 .geometry
1134 .as_ref()
1135 .ok_or_else(|| AloError::InvalidInput {
1136 reason: "UnifiedFitResult does not contain working-set geometry; \
1137 ALO diagnostics require geometry at convergence"
1138 .to_string(),
1139 })
1140 .map_err(EstimationError::from)?;
1141 let input = AloInput::from_geometry(geom, design, eta, offset, link, phi);
1142 compute_alo_from_input(&input)
1143}
1144
1145pub fn compute_alo_diagnostics_from_pirls(
1147 base: &pirls::PirlsResult,
1148 y: ArrayView1<f64>,
1149 link: LinkFunction,
1150) -> Result<AloDiagnostics, EstimationError> {
1151 compute_alo_diagnostics_from_pirls_impl(base, y, link)
1152}
1153
1154pub fn compute_case_deletion_from_pirls(
1173 base: &pirls::PirlsResult,
1174 y: ArrayView1<f64>,
1175 link: LinkFunction,
1176) -> Result<Option<crate::sensitivity::CaseDeletionInfluence>, EstimationError> {
1177 let x_dense_arc = base
1178 .x_transformed
1179 .try_to_dense_arc("case-deletion diagnostics require dense transformed design")
1180 .map_err(|reason| EstimationError::InvalidInput(reason))?;
1181 let x_dense = x_dense_arc.as_ref();
1182 let n = x_dense.nrows();
1183 let p = x_dense.ncols();
1184 if n == 0 || p == 0 {
1185 return Ok(None);
1186 }
1187
1188 let phi = match link {
1191 LinkFunction::Identity => {
1192 use rayon::iter::{IntoParallelIterator, ParallelIterator};
1193 let rss: f64 = (0..n)
1194 .into_par_iter()
1195 .map(|i| {
1196 let r = y[i] - base.finalmu[i];
1197 base.finalweights[i] * r * r
1198 })
1199 .sum();
1200 let dof = (n as f64) - base.edf;
1201 rss / dof.max(1.0)
1202 }
1203 _ => 1.0,
1204 };
1205 if !(phi.is_finite() && phi > 0.0) {
1206 return Ok(None);
1207 }
1208
1209 let h_dense = base
1212 .dense_stabilizedhessian_transformed(
1213 "case-deletion diagnostics require exact dense stabilized penalized Hessian",
1214 )
1215 .map_err(|e| match e {
1216 EstimationError::InvalidInput(reason) => EstimationError::InvalidInput(reason),
1217 other => EstimationError::InvalidInput(format!("{other:?}")),
1218 })?;
1219
1220 let factor = match h_dense.cholesky(faer::Side::Lower) {
1221 Ok(f) => f,
1222 Err(_) => return Ok(None),
1226 };
1227
1228 let working_weights = base.finalweights.clone();
1232 let working_residual = &base.solveworking_response - &base.final_eta;
1233
1234 let sensitivity = crate::sensitivity::FitSensitivity::from_faer_cholesky(&factor, p);
1235 Ok(sensitivity.case_deletion(
1236 x_dense,
1237 working_weights.view(),
1238 working_residual.view(),
1239 phi,
1240 ))
1241}
1242
1243#[derive(Debug, Clone)]
1247pub struct MultiBlockAloDiagnostics {
1248 pub eta_tilde: Vec<Array1<f64>>,
1251 pub leverage: Array1<f64>,
1253 pub alo_variance: Vec<Array1<f64>>,
1258 pub cook_distance: Array1<f64>,
1261}
1262
1263pub struct MultiBlockAloInput<'a> {
1293 pub n_obs: usize,
1295 pub n_blocks: usize,
1297 pub block_designs: &'a [Array2<f64>],
1300 pub penalized_hessian_inv: &'a Array2<f64>,
1302 pub block_weights: Vec<Array2<f64>>,
1304 pub scores: Vec<Array1<f64>>,
1307 pub eta_hat: Vec<Array1<f64>>,
1310}
1311
1312pub fn compute_multiblock_alo(
1331 input: &MultiBlockAloInput,
1332) -> Result<MultiBlockAloDiagnostics, EstimationError> {
1333 compute_multiblock_alo_inner(input).map_err(EstimationError::from)
1334}
1335
1336fn compute_multiblock_alo_inner(
1337 input: &MultiBlockAloInput,
1338) -> Result<MultiBlockAloDiagnostics, AloError> {
1339 use rayon::prelude::*;
1340
1341 let n = input.n_obs;
1342 let b = input.n_blocks;
1343 let p_tot = input.penalized_hessian_inv.nrows();
1344
1345 if input.block_designs.len() != b {
1347 return Err(AloError::InvalidInput {
1348 reason: format!(
1349 "MultiBlockAloInput: expected {} block designs, got {}",
1350 b,
1351 input.block_designs.len()
1352 ),
1353 });
1354 }
1355
1356 let col_sum: usize = input.block_designs.iter().map(|d| d.ncols()).sum();
1358 if col_sum != p_tot {
1359 return Err(AloError::InvalidInput {
1360 reason: format!(
1361 "MultiBlockAloInput: total design columns ({}) != penalized_hessian_inv size ({})",
1362 col_sum, p_tot
1363 ),
1364 });
1365 }
1366
1367 let col_offsets = multiblock_col_offsets(input.block_designs);
1368 let (chunk_size, max_concurrent_chunks) = multiblock_alo_parallel_plan(p_tot, b, n);
1369 let chunk_starts: Vec<usize> = (0..n).step_by(chunk_size).collect();
1370
1371 let mut chunk_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> =
1377 Vec::with_capacity(chunk_starts.len());
1378 for chunk_wave in chunk_starts.chunks(max_concurrent_chunks) {
1379 let mut wave_results: Vec<Result<MultiBlockAloChunkDiagnostics, AloError>> = chunk_wave
1380 .par_iter()
1381 .map_init(
1382 || MultiBlockAloScratch::new(b),
1383 |scratch, &chunk_start| {
1384 let chunk_end = (chunk_start + chunk_size).min(n);
1385 compute_multiblock_alo_chunk(
1386 input,
1387 &col_offsets,
1388 chunk_start,
1389 chunk_end,
1390 scratch,
1391 )
1392 },
1393 )
1394 .collect();
1395 chunk_results.append(&mut wave_results);
1396 }
1397
1398 let mut eta_tilde = Vec::with_capacity(n);
1399 let mut leverage = Array1::<f64>::zeros(n);
1400 let mut alo_variance = Vec::with_capacity(n);
1401 let mut cook_distance = Array1::<f64>::zeros(n);
1402
1403 let mut chunks = Vec::with_capacity(chunk_results.len());
1404 for result in chunk_results {
1405 chunks.push(result?);
1406 }
1407 chunks.sort_unstable_by_key(|chunk| chunk.chunk_start);
1408
1409 for chunk in chunks {
1410 let chunk_start = chunk.chunk_start;
1411 eta_tilde.extend(chunk.eta_tilde);
1412 alo_variance.extend(chunk.alo_variance);
1413 for (local_i, lev) in chunk.leverage.into_iter().enumerate() {
1414 leverage[chunk_start + local_i] = lev;
1415 }
1416 for (local_i, cook) in chunk.cook_distance.into_iter().enumerate() {
1417 cook_distance[chunk_start + local_i] = cook;
1418 }
1419 }
1420
1421 Ok(MultiBlockAloDiagnostics {
1422 eta_tilde,
1423 leverage,
1424 alo_variance,
1425 cook_distance,
1426 })
1427}
1428
1429#[inline]
1430fn multiblock_alo_parallel_plan(p_tot: usize, n_blocks: usize, n_obs: usize) -> (usize, usize) {
1431 if p_tot == 0 || n_blocks == 0 || n_obs == 0 {
1432 return (1, 1);
1433 }
1434 let bytes_per_obs = (p_tot * n_blocks * std::mem::size_of::<f64>()).max(1);
1435 let workers = rayon::current_num_threads().max(1);
1436 let max_concurrent_chunks = (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / bytes_per_obs)
1437 .max(1)
1438 .min(workers);
1439 let per_worker_budget =
1440 (MULTIBLOCK_ALO_MEMORY_BUDGET_BYTES / max_concurrent_chunks).max(bytes_per_obs);
1441 let budget_obs = (per_worker_budget / bytes_per_obs).max(1);
1442 (budget_obs.min(n_obs), max_concurrent_chunks)
1443}
1444
1445struct MultiBlockAloScratch {
1446 a_i: Vec<f64>,
1447 wa: Vec<f64>,
1448 aw: Vec<f64>,
1449 imwa: Vec<f64>,
1450 imaw: Vec<f64>,
1451 perm_imwa: Vec<usize>,
1452 perm_imaw: Vec<usize>,
1453 delta_eta: Vec<f64>,
1454 rhs_buf: Vec<f64>,
1455 w_u: Vec<f64>,
1456 var_diag_buf: Vec<f64>,
1457 w_flat: Vec<f64>,
1458 lu_scratch: Vec<f64>,
1459}
1460
1461impl MultiBlockAloScratch {
1462 fn new(b: usize) -> Self {
1463 let bb_sz = b * b;
1464 Self {
1465 a_i: vec![0.0f64; bb_sz],
1466 wa: vec![0.0f64; bb_sz],
1467 aw: vec![0.0f64; bb_sz],
1468 imwa: vec![0.0f64; bb_sz],
1469 imaw: vec![0.0f64; bb_sz],
1470 perm_imwa: vec![0usize; b],
1471 perm_imaw: vec![0usize; b],
1472 delta_eta: vec![0.0f64; b],
1473 rhs_buf: vec![0.0f64; b],
1474 w_u: vec![0.0f64; b],
1475 var_diag_buf: vec![0.0f64; b],
1476 w_flat: vec![0.0f64; bb_sz],
1477 lu_scratch: vec![0.0f64; b],
1478 }
1479 }
1480}
1481
1482struct MultiBlockAloChunkDiagnostics {
1483 chunk_start: usize,
1484 eta_tilde: Vec<Array1<f64>>,
1485 leverage: Vec<f64>,
1486 alo_variance: Vec<Array1<f64>>,
1487 cook_distance: Vec<f64>,
1488}
1489
1490fn compute_multiblock_alo_chunk(
1491 input: &MultiBlockAloInput,
1492 col_offsets: &[usize],
1493 chunk_start: usize,
1494 chunk_end: usize,
1495 scratch: &mut MultiBlockAloScratch,
1496) -> Result<MultiBlockAloChunkDiagnostics, AloError> {
1497 let b = input.n_blocks;
1498 let chunk_len = chunk_end - chunk_start;
1499
1500 let mut q_blocks = Vec::with_capacity(b);
1501 for blk in 0..b {
1502 let x_chunk_t = input.block_designs[blk]
1503 .slice(s![chunk_start..chunk_end, ..])
1504 .t()
1505 .to_owned();
1506 let off_b = col_offsets[blk];
1507 let h_slice = input
1508 .penalized_hessian_inv
1509 .slice(s![.., off_b..off_b + x_chunk_t.nrows()])
1510 .to_owned();
1511 q_blocks.push(h_slice.dot(&x_chunk_t));
1512 }
1513
1514 let mut eta_tilde = Vec::with_capacity(chunk_len);
1515 let mut leverage = vec![0.0f64; chunk_len];
1516 let mut alo_variance = Vec::with_capacity(chunk_len);
1517 let mut cook_distance = vec![0.0f64; chunk_len];
1518
1519 for local_i in 0..chunk_len {
1520 let i = chunk_start + local_i;
1521 let w_i = &input.block_weights[i];
1522
1523 for r in 0..b {
1525 for c in 0..b {
1526 scratch.w_flat[r * b + c] = w_i[(r, c)];
1527 }
1528 }
1529
1530 for a in 0..b {
1532 let x_a = &input.block_designs[a];
1533 let p_a = x_a.ncols();
1534 let off_a = col_offsets[a];
1535 let xa_row = x_a.row(i);
1536 for bb in 0..b {
1537 let q_bb = &q_blocks[bb];
1538 let mut dot = 0.0f64;
1539 for k in 0..p_a {
1540 dot += xa_row[k] * q_bb[(off_a + k, local_i)];
1541 }
1542 scratch.a_i[a * b + bb] = dot;
1543 }
1544 }
1545
1546 mat_mul_flat(&scratch.w_flat, &scratch.a_i, &mut scratch.wa, b);
1548 mat_mul_flat(&scratch.a_i, &scratch.w_flat, &mut scratch.aw, b);
1550
1551 let mut tr = 0.0f64;
1554 for d in 0..b {
1555 tr += scratch.aw[d * b + d];
1556 }
1557 leverage[local_i] = tr;
1558
1559 for r in 0..b {
1561 for c in 0..b {
1562 let idx = r * b + c;
1563 let id = if r == c { 1.0 } else { 0.0 };
1564 scratch.imwa[idx] = id - scratch.wa[idx];
1565 scratch.imaw[idx] = id - scratch.aw[idx];
1566 }
1567 }
1568
1569 if !lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b) {
1575 for r in 0..b {
1576 for c in 0..b {
1577 let idx = r * b + c;
1578 let id = if r == c { 1.0 } else { 0.0 };
1579 scratch.imwa[idx] = id - scratch.wa[idx];
1580 }
1581 }
1582 for d in 0..b {
1583 scratch.imwa[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1584 }
1585 let refactored = lu_factor_in_place(&mut scratch.imwa, &mut scratch.perm_imwa, b);
1586 assert!(
1587 refactored,
1588 "ALO local block remained singular after ridge regularization"
1589 );
1590 }
1591 if !lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b) {
1592 for r in 0..b {
1593 for c in 0..b {
1594 let idx = r * b + c;
1595 let id = if r == c { 1.0 } else { 0.0 };
1596 scratch.imaw[idx] = id - scratch.aw[idx];
1597 }
1598 }
1599 for d in 0..b {
1600 scratch.imaw[d * b + d] += ALO_LOCAL_BLOCK_RIDGE;
1601 }
1602 let refactored = lu_factor_in_place(&mut scratch.imaw, &mut scratch.perm_imaw, b);
1603 assert!(
1604 refactored,
1605 "ALO local variance block remained singular after ridge regularization"
1606 );
1607 }
1608
1609 let s_i = &input.scores[i];
1611 for k in 0..b {
1612 scratch.rhs_buf[k] = s_i[k];
1613 }
1614 lu_solve_in_place(
1615 &scratch.imwa,
1616 &scratch.perm_imwa,
1617 &mut scratch.rhs_buf,
1618 &mut scratch.lu_scratch,
1619 b,
1620 );
1621 for r in 0..b {
1623 let mut acc = 0.0f64;
1624 let row_off = r * b;
1625 for k in 0..b {
1626 acc += scratch.a_i[row_off + k] * scratch.rhs_buf[k];
1627 }
1628 scratch.delta_eta[r] = acc;
1629 }
1630
1631 let eta_i = &input.eta_hat[i];
1632 let mut corrected = Array1::<f64>::zeros(b);
1633 for d in 0..b {
1634 corrected[d] = eta_i[d] + scratch.delta_eta[d];
1635 }
1636 eta_tilde.push(corrected);
1637
1638 let mut cook = 0.0f64;
1640 for r in 0..b {
1641 let mut w_delta_r = 0.0f64;
1642 let row_off = r * b;
1643 for k in 0..b {
1644 w_delta_r += scratch.w_flat[row_off + k] * scratch.delta_eta[k];
1645 }
1646 cook += scratch.delta_eta[r] * w_delta_r;
1647 }
1648 cook_distance[local_i] = cook;
1649
1650 for d in 0..b {
1656 let row_off = d * b;
1657 for k in 0..b {
1659 scratch.rhs_buf[k] = scratch.a_i[row_off + k];
1660 }
1661 lu_solve_in_place(
1662 &scratch.imaw,
1663 &scratch.perm_imaw,
1664 &mut scratch.rhs_buf,
1665 &mut scratch.lu_scratch,
1666 b,
1667 );
1668 for r in 0..b {
1670 let mut acc = 0.0f64;
1671 let wr = r * b;
1672 for k in 0..b {
1673 acc += scratch.w_flat[wr + k] * scratch.rhs_buf[k];
1674 }
1675 scratch.w_u[r] = acc;
1676 }
1677 lu_solve_in_place(
1679 &scratch.imwa,
1680 &scratch.perm_imwa,
1681 &mut scratch.w_u,
1682 &mut scratch.lu_scratch,
1683 b,
1684 );
1685 let mut v_dd = 0.0f64;
1687 for k in 0..b {
1688 v_dd += scratch.a_i[row_off + k] * scratch.w_u[k];
1689 }
1690 scratch.var_diag_buf[d] = v_dd.max(0.0);
1691 }
1692 let mut var_diag = Array1::<f64>::zeros(b);
1693 for d in 0..b {
1694 var_diag[d] = scratch.var_diag_buf[d];
1695 }
1696 alo_variance.push(var_diag);
1697 }
1698
1699 Ok(MultiBlockAloChunkDiagnostics {
1700 chunk_start,
1701 eta_tilde,
1702 leverage,
1703 alo_variance,
1704 cook_distance,
1705 })
1706}
1707
1708#[inline]
1710fn mat_mul_flat(a: &[f64], b_mat: &[f64], out: &mut [f64], b: usize) {
1711 for r in 0..b {
1712 let ar = r * b;
1713 let or = r * b;
1714 for c in 0..b {
1715 let mut acc = 0.0f64;
1716 for k in 0..b {
1717 acc += a[ar + k] * b_mat[k * b + c];
1718 }
1719 out[or + c] = acc;
1720 }
1721 }
1722}
1723
1724fn lu_factor_in_place(m: &mut [f64], perm: &mut [usize], b: usize) -> bool {
1731 for i in 0..b {
1732 perm[i] = i;
1733 }
1734 for col in 0..b {
1735 let mut max_val = m[col * b + col].abs();
1737 let mut max_idx = col;
1738 for row in (col + 1)..b {
1739 let v = m[row * b + col].abs();
1740 if v > max_val {
1741 max_val = v;
1742 max_idx = row;
1743 }
1744 }
1745 if max_val < LU_PIVOT_SINGULAR_TOL {
1746 return false;
1747 }
1748 if max_idx != col {
1749 for k in 0..b {
1751 m.swap(col * b + k, max_idx * b + k);
1752 }
1753 perm.swap(col, max_idx);
1754 }
1755 let pivot = m[col * b + col];
1756 for row in (col + 1)..b {
1757 let factor = m[row * b + col] / pivot;
1758 m[row * b + col] = factor; for k in (col + 1)..b {
1760 let upd = factor * m[col * b + k];
1761 m[row * b + k] -= upd;
1762 }
1763 }
1764 }
1765 true
1766}
1767
1768fn lu_solve_in_place(m: &[f64], perm: &[usize], rhs: &mut [f64], scratch: &mut [f64], b: usize) {
1771 let y = &mut scratch[..b];
1773 for row in 0..b {
1774 let mut s = rhs[perm[row]];
1775 for k in 0..row {
1776 s -= m[row * b + k] * y[k];
1777 }
1778 y[row] = s;
1779 }
1780 for row in (0..b).rev() {
1782 let mut s = y[row];
1783 for k in (row + 1)..b {
1784 s -= m[row * b + k] * rhs[k];
1785 }
1786 rhs[row] = s / m[row * b + row];
1787 }
1788}
1789
1790pub fn compute_multiblock_alo_leverages(
1798 n_obs: usize,
1799 n_blocks: usize,
1800 block_designs: &[Array2<f64>],
1801 penalized_hessian_inv: &Array2<f64>,
1802 block_weights: &[Array2<f64>],
1803) -> Result<Array1<f64>, EstimationError> {
1804 use rayon::prelude::*;
1805
1806 let n = n_obs;
1807 let b = n_blocks;
1808 let p_tot = penalized_hessian_inv.nrows();
1809
1810 let col_offsets = multiblock_col_offsets(block_designs);
1811 let max_workers = rayon::current_num_threads();
1812 let chunk_size = multiblock_alo_parallel_leverage_chunk_size(p_tot, b, n, max_workers);
1813
1814 let mut leverage = Array1::<f64>::zeros(n);
1815
1816 let block_widths: Vec<usize> = block_designs.iter().map(|d| d.ncols()).collect();
1820 let mut h_stripes: Vec<FaerMat<f64>> = block_widths
1821 .iter()
1822 .map(|&p_blk| FaerMat::<f64>::zeros(p_tot, p_blk))
1823 .collect();
1824 for blk in 0..b {
1827 let off_b = col_offsets[blk];
1828 let p_blk = block_widths[blk];
1829 let stripe = &mut h_stripes[blk];
1830 for c in 0..p_blk {
1831 for r in 0..p_tot {
1832 stripe[(r, c)] = penalized_hessian_inv[(r, off_b + c)];
1833 }
1834 }
1835 }
1836
1837 leverage
1838 .as_slice_mut()
1839 .expect("newly allocated Array1 is contiguous")
1840 .par_chunks_mut(chunk_size)
1841 .enumerate()
1842 .for_each(|(chunk_idx, leverage_chunk)| {
1843 let chunk_start = chunk_idx * chunk_size;
1844 let chunk_len = leverage_chunk.len();
1845 let chunk_end = chunk_start + chunk_len;
1846
1847 let bb_sz = b * b;
1851 let mut a_i = vec![0.0f64; bb_sz];
1852 let mut aw = vec![0.0f64; bb_sz];
1853 let mut w_flat = vec![0.0f64; bb_sz];
1854
1855 let mut q_storage: Vec<FaerMat<f64>> = block_widths
1859 .iter()
1860 .map(|_| FaerMat::<f64>::zeros(p_tot, chunk_len))
1861 .collect();
1862
1863 let mut xt_storage: Vec<FaerMat<f64>> = block_widths
1867 .iter()
1868 .map(|&p_blk| FaerMat::<f64>::zeros(p_blk, chunk_len))
1869 .collect();
1870
1871 for blk in 0..b {
1876 let p_blk = block_widths[blk];
1877
1878 let x_chunk = block_designs[blk].slice(s![chunk_start..chunk_end, ..]);
1879 let xt = &mut xt_storage[blk];
1880 for local_i in 0..chunk_len {
1881 let row = x_chunk.row(local_i);
1882 for j in 0..p_blk {
1883 xt[(j, local_i)] = row[j];
1884 }
1885 }
1886
1887 matmul(
1888 q_storage[blk].as_mut(),
1889 Accum::Replace,
1890 h_stripes[blk].as_ref(),
1891 xt_storage[blk].as_ref(),
1892 1.0,
1893 Par::Seq,
1894 );
1895 }
1896
1897 for local_i in 0..chunk_len {
1898 let i = chunk_start + local_i;
1899 let w_i = &block_weights[i];
1900
1901 for r in 0..b {
1903 for c in 0..b {
1904 w_flat[r * b + c] = w_i[(r, c)];
1905 }
1906 }
1907
1908 for r in 0..bb_sz {
1912 a_i[r] = 0.0;
1913 }
1914 for k in 0..b {
1915 let q_k = &q_storage[k];
1916 let q_col = q_k.col_as_slice(local_i);
1917 for a in 0..b {
1918 let p_a = block_widths[a];
1919 let off_a = col_offsets[a];
1920 let xa_row = block_designs[a].row(i);
1921 let mut dot = 0.0f64;
1922 for j in 0..p_a {
1923 dot = xa_row[j].mul_add(q_col[off_a + j], dot);
1924 }
1925 a_i[a * b + k] = dot;
1926 }
1927 }
1928
1929 mat_mul_flat(&a_i, &w_flat, &mut aw, b);
1931 let mut tr = 0.0f64;
1932 for d in 0..b {
1933 tr += aw[d * b + d];
1934 }
1935 leverage_chunk[local_i] = tr;
1936 }
1937 });
1938
1939 Ok(leverage)
1940}
1941
1942#[cfg(test)]
1946mod tests {
1947 use super::{
1948 ALO_EXACT_SCALAR_MAX_ITERS, AloExactScalarError, AloInput, alo_eta_exact_frozen_curvature,
1949 alo_eta_updatewith_offset, bayesvar_eta, compute_alo_from_input_inner,
1950 percentile_from_sorted, percentile_index, sandwichvar_eta,
1951 };
1952 use gam_linalg::matrix::{PsdWeightsView, SignedWeightsView};
1953 use gam_problem::LinkFunction;
1954
1955 #[test]
1956 fn alo_offset_update_matches_centered_algebra() {
1957 let eta_hat = 11.0;
1958 let z = 13.0;
1959 let offset = 10.0;
1960 let x_hinv_x = 0.2;
1961 let hessian_weight = 1.0;
1962 let score_weight = 1.0;
1963 let leverage = hessian_weight * x_hinv_x;
1965 let expected = offset + ((eta_hat - offset) - leverage * (z - offset)) / (1.0 - leverage);
1966 let got =
1967 alo_eta_updatewith_offset(eta_hat, z, offset, x_hinv_x, score_weight, 1.0 - leverage);
1968 assert!((got - expected).abs() < 1e-12);
1969 }
1970
1971 #[test]
1972 fn alo_offset_update_reduces_to_classicwhen_offsetzero() {
1973 let eta_hat = 1.25;
1974 let z = -0.5;
1975 let x_hinv_x = 0.35;
1976 let hessian_weight = 1.0;
1977 let score_weight = 1.0;
1978 let leverage = hessian_weight * x_hinv_x;
1979 let expected = (eta_hat - leverage * z) / (1.0 - leverage);
1980 let got =
1981 alo_eta_updatewith_offset(eta_hat, z, 0.0, x_hinv_x, score_weight, 1.0 - leverage);
1982 assert!((got - expected).abs() < 1e-12);
1983 }
1984
1985 #[test]
1986 fn alo_offset_update_uses_distinct_score_and_hessian_weights() {
1987 let eta_hat = 1.7;
1988 let z = 0.4;
1989 let offset = -0.2;
1990 let x_hinv_x = 0.15;
1991 let hessian_weight = 3.0;
1992 let score_weight = 5.0;
1993 let expected = offset
1994 + (eta_hat - offset)
1995 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset))
1996 / (1.0 - hessian_weight * x_hinv_x);
1997 let got = alo_eta_updatewith_offset(
1998 eta_hat,
1999 z,
2000 offset,
2001 x_hinv_x,
2002 score_weight,
2003 1.0 - hessian_weight * x_hinv_x,
2004 );
2005 assert!((got - expected).abs() < 1e-12);
2006 }
2007
2008 #[test]
2009 fn alo_offset_update_handles_zero_hessian_weight() {
2010 let eta_hat = 0.8;
2011 let z = -0.3;
2012 let offset = 0.1;
2013 let x_hinv_x = 0.4;
2014 let hessian_weight = 0.0;
2015 let score_weight = 2.5;
2016 let expected = offset
2017 + (eta_hat - offset)
2018 + x_hinv_x * score_weight * ((eta_hat - offset) - (z - offset));
2019 let got = alo_eta_updatewith_offset(
2020 eta_hat,
2021 z,
2022 offset,
2023 x_hinv_x,
2024 score_weight,
2025 1.0 - hessian_weight * x_hinv_x,
2026 );
2027 assert!((got - expected).abs() < 1e-12);
2028 }
2029
2030 #[test]
2031 fn alo_exact_frozen_curvature_converges_to_fixed_point() {
2032 let eta_hat = 1.0;
2033 let a_ii = 0.4;
2034 let got = alo_eta_exact_frozen_curvature(eta_hat, a_ii, &|eta| (0.5 * (eta - 2.0), 0.5))
2035 .expect("linear scalar fixed point should converge in one Newton step");
2036 assert!((got - 0.75).abs() < 1e-12);
2037 }
2038
2039 #[test]
2040 fn alo_exact_frozen_curvature_reports_nonconvergence() {
2041 let err = alo_eta_exact_frozen_curvature(0.0, 1.0, &|eta| (eta + 1.0, 0.0))
2042 .expect_err("constant residual should exhaust the scalar iteration budget");
2043 let AloExactScalarError::MaxIterations { iterations, .. } = err else {
2044 panic!("constant residual must report MaxIterations, got {err:?}");
2045 };
2046 assert_eq!(
2047 iterations, ALO_EXACT_SCALAR_MAX_ITERS,
2048 "non-convergence must report the full scalar iteration budget"
2049 );
2050 }
2051
2052 #[test]
2053 fn alo_input_reports_exact_scalar_nonconvergence_with_row_context() {
2054 let design = Array2::from_elem((1, 1), 1.0);
2055 let penalized_hessian = Array2::from_elem((1, 1), 1.0);
2056 let hessian_weights = Array1::from_vec(vec![0.0]);
2057 let score_weights = Array1::from_vec(vec![0.0]);
2058 let working_response = Array1::from_vec(vec![0.0]);
2059 let eta = Array1::from_vec(vec![0.0]);
2060 let offset = Array1::from_vec(vec![0.0]);
2061 let score_curvature = |_: usize, eta: f64| (eta + 1.0, 0.0);
2062 let input = AloInput {
2063 design: &design,
2064 penalized_hessian: &penalized_hessian,
2065 hessian_weights: SignedWeightsView::from_array(&hessian_weights),
2066 score_weights: PsdWeightsView::try_from_array(&score_weights).expect("psd weights"),
2067 working_response: &working_response,
2068 eta: &eta,
2069 offset: &offset,
2070 link: LinkFunction::Logit,
2071 phi: 1.0,
2072 penalty_root: None,
2073 ridge: 0.0,
2074 score_curvature: Some(&score_curvature),
2075 };
2076
2077 let err =
2078 compute_alo_from_input_inner(&input).expect_err("non-converged exact ALO must error");
2079 let msg = err.to_string();
2080 assert!(
2081 msg.contains("ALO exact frozen-curvature solve failed at row 0"),
2082 "missing row context in exact ALO error: {msg}"
2083 );
2084 assert!(
2085 msg.contains("did not converge within"),
2086 "missing non-convergence cause in exact ALO error: {msg}"
2087 );
2088 }
2089
2090 #[test]
2091 fn gaussian_unpenalized_sandwich_equals_bayes() {
2092 let phi = 2.5;
2095 let x_hinv_x = 0.3;
2096 let es_norm2 = 0.0;
2097 let ridge = 0.0;
2098 let s_norm2 = 0.0;
2099 let vb = bayesvar_eta(phi, x_hinv_x);
2100 let vs = sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2);
2101 assert!((vb - vs).abs() < 1e-12);
2102 }
2103
2104 #[test]
2105 fn sandwich_matches_direct_linear_gaussian_formula() {
2106 let phi = 1.7;
2109 let x_hinv_x = 0.41;
2110 let es_norm2 = 0.05;
2111 let ridge = 1e-3;
2112 let s_norm2 = 2.0;
2113 let got = sandwichvar_eta(phi, x_hinv_x, es_norm2, ridge, s_norm2);
2114 let expected = phi * (x_hinv_x - es_norm2 - ridge * s_norm2);
2115 assert!((got - expected).abs() < 1e-12);
2116 }
2117
2118 #[test]
2119 fn percentile_index_matches_expected_rounding() {
2120 assert_eq!(percentile_index(0, 0.95), 0);
2121 assert_eq!(percentile_index(1, 0.95), 0);
2122 assert_eq!(percentile_index(10, 0.50), 5);
2123 assert_eq!(percentile_index(10, 0.95), 9);
2124 }
2125
2126 #[test]
2127 fn percentile_from_sorted_returns_order_statistic() {
2128 let values = [1.0, 2.0, 3.0, 4.0, 5.0];
2129 assert_eq!(percentile_from_sorted(&values, 0.50), 3.0);
2130 assert_eq!(percentile_from_sorted(&values, 0.95), 5.0);
2131 assert_eq!(percentile_from_sorted(&[], 0.95), 0.0);
2132 }
2133
2134 use super::{MultiBlockAloInput, compute_multiblock_alo, compute_multiblock_alo_leverages};
2137 use ndarray::{Array1, Array2};
2138
2139 #[test]
2140 fn multiblock_b1_matches_scalar_leverage() {
2141 let n = 3;
2144 let p = 2;
2145 let x = Array2::from_shape_vec((n, p), vec![1.0, 0.5, 0.8, -0.3, 0.2, 1.1]).unwrap();
2146 let w = [1.0, 2.0, 0.5];
2148 let mut h = Array2::<f64>::eye(p);
2149 for i in 0..n {
2150 for r in 0..p {
2151 for c in 0..p {
2152 h[(r, c)] += w[i] * x[(i, r)] * x[(i, c)];
2153 }
2154 }
2155 }
2156 let det = h[(0, 0)] * h[(1, 1)] - h[(0, 1)] * h[(1, 0)];
2158 let mut h_inv = Array2::<f64>::zeros((p, p));
2159 h_inv[(0, 0)] = h[(1, 1)] / det;
2160 h_inv[(1, 1)] = h[(0, 0)] / det;
2161 h_inv[(0, 1)] = -h[(0, 1)] / det;
2162 h_inv[(1, 0)] = -h[(1, 0)] / det;
2163
2164 let mut scalar_lev = vec![0.0f64; n];
2166 for i in 0..n {
2167 let mut xhx = 0.0;
2168 for r in 0..p {
2169 for c in 0..p {
2170 xhx += x[(i, r)] * h_inv[(r, c)] * x[(i, c)];
2171 }
2172 }
2173 scalar_lev[i] = w[i] * xhx;
2174 }
2175
2176 let block_designs = vec![x.clone()];
2178 let block_weights: Vec<Array2<f64>> =
2179 w.iter().map(|&wi| Array2::from_elem((1, 1), wi)).collect();
2180 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.1])).collect();
2181 let eta_hat: Vec<Array1<f64>> = (0..n).map(|i| Array1::from_vec(vec![i as f64])).collect();
2182
2183 let input = MultiBlockAloInput {
2184 n_obs: n,
2185 n_blocks: 1,
2186 block_designs: &block_designs,
2187 penalized_hessian_inv: &h_inv,
2188 block_weights,
2189 scores,
2190 eta_hat,
2191 };
2192
2193 let result = compute_multiblock_alo(&input).unwrap();
2194 for i in 0..n {
2195 assert!(
2196 (result.leverage[i] - scalar_lev[i]).abs() < 1e-10,
2197 "leverage mismatch at i={}: got {}, expected {}",
2198 i,
2199 result.leverage[i],
2200 scalar_lev[i]
2201 );
2202 }
2203 }
2204
2205 #[test]
2206 fn multiblock_leverage_only_matches_full() {
2207 let n = 4;
2210 let p1 = 2;
2211 let p2 = 3;
2212 let x1 = Array2::from_shape_fn((n, p1), |(i, j)| (i + j + 1) as f64 * 0.3);
2213 let x2 = Array2::from_shape_fn((n, p2), |(i, j)| (i * 2 + j) as f64 * 0.2 - 0.1);
2214 let p_tot = p1 + p2;
2215 let h_inv = Array2::<f64>::eye(p_tot); let block_weights: Vec<Array2<f64>> = (0..n)
2217 .map(|i| {
2218 let v = (i + 1) as f64;
2219 Array2::from_shape_vec((2, 2), vec![v, 0.1, 0.1, v * 0.5]).unwrap()
2220 })
2221 .collect();
2222 let scores: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2223 let eta_hat: Vec<Array1<f64>> = (0..n).map(|_| Array1::from_vec(vec![0.0, 0.0])).collect();
2224 let block_designs = vec![x1.clone(), x2.clone()];
2225
2226 let input = MultiBlockAloInput {
2227 n_obs: n,
2228 n_blocks: 2,
2229 block_designs: &block_designs,
2230 penalized_hessian_inv: &h_inv,
2231 block_weights: block_weights.clone(),
2232 scores,
2233 eta_hat,
2234 };
2235 let full = compute_multiblock_alo(&input).unwrap();
2236 let lev_only =
2237 compute_multiblock_alo_leverages(n, 2, &block_designs, &h_inv, &block_weights).unwrap();
2238
2239 for i in 0..n {
2240 assert!(
2241 (full.leverage[i] - lev_only[i]).abs() < 1e-12,
2242 "leverage mismatch at i={}: full={}, lev_only={}",
2243 i,
2244 full.leverage[i],
2245 lev_only[i]
2246 );
2247 }
2248 }
2249
2250 #[test]
2251 fn multiblock_singular_weight_still_corrects() {
2252 let n = 1;
2256 let p = 2;
2257 let x = Array2::from_shape_vec((1, p), vec![1.0, 0.5]).unwrap();
2258 let h_inv = Array2::eye(p);
2259 let block_designs = vec![x.clone()];
2260 let block_weights = vec![Array2::from_elem((1, 1), 0.0)]; let scores = vec![Array1::from_vec(vec![1.0])];
2262 let eta_hat = vec![Array1::from_vec(vec![std::f64::consts::PI])];
2263
2264 let input = MultiBlockAloInput {
2265 n_obs: n,
2266 n_blocks: 1,
2267 block_designs: &block_designs,
2268 penalized_hessian_inv: &h_inv,
2269 block_weights,
2270 scores,
2271 eta_hat,
2272 };
2273 let result = compute_multiblock_alo(&input).unwrap();
2274 let expected = std::f64::consts::PI + 1.25;
2276 assert!(
2277 (result.eta_tilde[0][0] - expected).abs() < 1e-12,
2278 "expected {}, got {}",
2279 expected,
2280 result.eta_tilde[0][0]
2281 );
2282 assert!(result.cook_distance[0].abs() < 1e-14);
2284 assert!(result.alo_variance[0][0].abs() < 1e-14);
2286 }
2287
2288 #[test]
2289 fn multiblock_cook_and_variance_basic() {
2290 let n = 1;
2292 let x = Array2::from_elem((1, 1), 1.0);
2293 let h_inv = Array2::from_elem((1, 1), 0.5);
2295 let block_designs = vec![x.clone()];
2296 let w_val = 2.0;
2297 let s_val = 0.4;
2298 let block_weights = vec![Array2::from_elem((1, 1), w_val)];
2299 let scores = vec![Array1::from_vec(vec![s_val])];
2300 let eta_hat = vec![Array1::from_vec(vec![1.0])];
2301
2302 let input = MultiBlockAloInput {
2303 n_obs: n,
2304 n_blocks: 1,
2305 block_designs: &block_designs,
2306 penalized_hessian_inv: &h_inv,
2307 block_weights,
2308 scores,
2309 eta_hat,
2310 };
2311 let result = compute_multiblock_alo(&input).unwrap();
2312
2313 assert!(result.eta_tilde[0][0].is_finite());
2320 assert!(result.cook_distance[0].is_finite());
2321 assert!(result.alo_variance[0][0].is_finite());
2322 }
2323}