use super::*;
#[derive(Debug, Clone)]
pub struct OuterHessianIndefinite {
pub min_eigenvalue: f64,
pub active_constraints: Vec<usize>,
pub theta: Vec<f64>,
pub gradient_norm: f64,
pub hessian_norm: f64,
pub suggested_action: &'static str,
}
#[derive(Debug, Clone)]
pub enum CorrectedCovarianceError {
ShapeMismatch {
context: &'static str,
reason: String,
},
EigendecompositionFailed {
context: &'static str,
reason: String,
},
Indefinite(OuterHessianIndefinite),
}
impl core::fmt::Display for CorrectedCovarianceError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::ShapeMismatch { context, reason } => {
write!(f, "shape mismatch in {context}: {reason}")
}
Self::EigendecompositionFailed { context, reason } => {
write!(f, "eigendecomposition failed in {context}: {reason}")
}
Self::Indefinite(d) => write!(
f,
"outer Hessian indefinite on free subspace (min eigenvalue = {:.3e}, \
||H||_F = {:.3e}, ||g||_2 = {:.3e}, active = {:?}, theta = {:?}); {}",
d.min_eigenvalue,
d.hessian_norm,
d.gradient_norm,
d.active_constraints,
d.theta,
d.suggested_action,
),
}
}
}
impl std::error::Error for CorrectedCovarianceError {}
#[derive(Debug, Clone)]
pub struct CorrectedCovariance {
pub matrix: Array2<f64>,
pub active_constraints: Vec<usize>,
pub rank_deficient_directions: Vec<usize>,
}
impl CorrectedCovariance {
pub(crate) fn has_structural_diagnostics(&self) -> bool {
!self.active_constraints.is_empty() || !self.rank_deficient_directions.is_empty()
}
}
pub(crate) const INDEFINITE_SUGGESTED_ACTION: &str = "refit with a tighter outer tolerance, verify the inspected objective is the true \
REML/LAML cost rather than a surrogate, and audit recent active-set transitions";
pub(crate) fn detect_active_theta_bounds(theta: Option<&[f64]>, q: usize) -> Vec<usize> {
let Some(theta) = theta else {
return Vec::new();
};
if theta.len() != q {
return Vec::new();
}
let bound = crate::estimate::RHO_BOUND;
const ACTIVE_THETA_BOUND_TOL: f64 = 1e-8;
theta
.iter()
.enumerate()
.filter_map(|(i, &v)| (v.abs() >= bound - ACTIVE_THETA_BOUND_TOL).then_some(i))
.collect()
}
pub(crate) fn active_bound_indices_for_theta(
theta: Option<&[f64]>,
rho_len: usize,
ext_len: usize,
) -> Vec<usize> {
let q = rho_len + ext_len;
let mut active = detect_active_theta_bounds(theta, q);
active.retain(|&i| i < rho_len);
active
}
pub(crate) fn projected_inverse_with_inertia_gate(
outer_hessian: &Array2<f64>,
active: &[usize],
theta_for_diag: Option<&[f64]>,
gradient_norm: f64,
) -> Result<(Array2<f64>, Vec<usize>), CorrectedCovarianceError> {
let q = outer_hessian.nrows();
let mut is_active = vec![false; q];
for &i in active {
if i < q {
is_active[i] = true;
}
}
let free: Vec<usize> = (0..q).filter(|i| !is_active[*i]).collect();
let qf = free.len();
let h_norm = outer_hessian.iter().map(|v| v * v).sum::<f64>().sqrt();
let mut v_theta_full = Array2::<f64>::zeros((q, q));
if qf == 0 {
return Ok((v_theta_full, Vec::new()));
}
let mut h_ff = Array2::<f64>::zeros((qf, qf));
for (a, &ia) in free.iter().enumerate() {
for (b, &ib) in free.iter().enumerate() {
h_ff[[a, b]] = outer_hessian[[ia, ib]];
}
}
let (evals, evecs) = h_ff.eigh(faer::Side::Lower).map_err(|e| {
CorrectedCovarianceError::EigendecompositionFailed {
context: "projected outer Hessian",
reason: e.to_string(),
}
})?;
let eps = f64::EPSILON;
let neg_tol = 8.0 * eps * (q.max(1) as f64) * h_norm.max(1.0);
let min_eig = evals.iter().copied().fold(f64::INFINITY, f64::min);
if min_eig < -neg_tol {
let diagnostic = OuterHessianIndefinite {
min_eigenvalue: min_eig,
active_constraints: active.to_vec(),
theta: theta_for_diag.map(|t| t.to_vec()).unwrap_or_default(),
gradient_norm,
hessian_norm: h_norm,
suggested_action: INDEFINITE_SUGGESTED_ACTION,
};
return Err(CorrectedCovarianceError::Indefinite(diagnostic));
}
let pos_tol = 8.0 * eps * (q.max(1) as f64) * h_norm.max(1.0);
let mut v_theta_ff = Array2::<f64>::zeros((qf, qf));
let mut rank_deficient_free: Vec<usize> = Vec::new();
for j in 0..qf {
let sigma = evals[j];
if sigma.abs() <= pos_tol {
rank_deficient_free.push(j);
continue;
}
let inv_sigma = 1.0 / sigma;
let u = evecs.column(j);
for a in 0..qf {
let ua = inv_sigma * u[a];
for b in a..qf {
let val = ua * u[b];
v_theta_ff[[a, b]] += val;
if a != b {
v_theta_ff[[b, a]] += val;
}
}
}
}
for (a, &ia) in free.iter().enumerate() {
for (b, &ib) in free.iter().enumerate() {
v_theta_full[[ia, ib]] = v_theta_ff[[a, b]];
}
}
let rank_deficient_directions: Vec<usize> =
rank_deficient_free.into_iter().map(|j| free[j]).collect();
Ok((v_theta_full, rank_deficient_directions))
}
pub fn compute_corrected_covariance(
v_ks: &[Array1<f64>],
ext_v: &[Array1<f64>],
outer_hessian: &Array2<f64>,
hop: &dyn HessianOperator,
) -> Result<Array2<f64>, CorrectedCovarianceError> {
compute_corrected_covariance_with_constraints(v_ks, ext_v, outer_hessian, hop, None, f64::NAN)
.map(|cov| {
if cov.has_structural_diagnostics() {
log::debug!(
"corrected covariance diagnostics: active_constraints={:?} rank_deficient_directions={:?}",
cov.active_constraints,
cov.rank_deficient_directions
);
}
cov.matrix
})
}
pub fn compute_corrected_covariance_with_constraints(
v_ks: &[Array1<f64>],
ext_v: &[Array1<f64>],
outer_hessian: &Array2<f64>,
hop: &dyn HessianOperator,
theta_at_optimum: Option<&[f64]>,
gradient_norm: f64,
) -> Result<CorrectedCovariance, CorrectedCovarianceError> {
let p = hop.dim();
let q = v_ks.len() + ext_v.len();
if q == 0 {
let eye = Array2::eye(p);
return Ok(CorrectedCovariance {
matrix: hop.solve_multi(&eye),
active_constraints: Vec::new(),
rank_deficient_directions: Vec::new(),
});
}
if outer_hessian.nrows() != q || outer_hessian.ncols() != q {
return Err(CorrectedCovarianceError::ShapeMismatch {
context: "compute_corrected_covariance outer Hessian",
reason: format!(
"dimension ({}, {}) does not match total hyperparameter count q = {} (rho: {}, ext: {})",
outer_hessian.nrows(),
outer_hessian.ncols(),
q,
v_ks.len(),
ext_v.len(),
),
});
}
let mut j_alpha = Array2::zeros((p, q));
for (col, v) in v_ks.iter().enumerate() {
for row in 0..p {
j_alpha[[row, col]] = -v[row];
}
}
for (i, v) in ext_v.iter().enumerate() {
let col = v_ks.len() + i;
for row in 0..p {
j_alpha[[row, col]] = -v[row];
}
}
let active = active_bound_indices_for_theta(theta_at_optimum, v_ks.len(), ext_v.len());
let (v_theta, rank_deficient_directions) = projected_inverse_with_inertia_gate(
outer_hessian,
&active,
theta_at_optimum,
gradient_norm,
)?;
let j_v_theta = j_alpha.dot(&v_theta);
let correction = j_v_theta.dot(&j_alpha.t());
let eye = Array2::eye(p);
let mut h_inv = hop.solve_multi(&eye);
h_inv += &correction;
gam_linalg::matrix::symmetrize_in_place(&mut h_inv);
Ok(CorrectedCovariance {
matrix: h_inv,
active_constraints: active,
rank_deficient_directions,
})
}
pub fn compute_corrected_covariance_diagonal(
v_ks: &[Array1<f64>],
ext_v: &[Array1<f64>],
outer_hessian: &Array2<f64>,
hop: &dyn HessianOperator,
) -> Result<Array1<f64>, CorrectedCovarianceError> {
compute_corrected_covariance_diagonal_with_constraints(
v_ks,
ext_v,
outer_hessian,
hop,
None,
f64::NAN,
)
.map(|d| {
if d.has_structural_diagnostics() {
log::debug!(
"corrected covariance diagonal diagnostics: active_constraints={:?} rank_deficient_directions={:?}",
d.active_constraints,
d.rank_deficient_directions
);
}
d.diagonal
})
}
#[derive(Debug, Clone)]
pub struct CorrectedCovarianceDiagonal {
pub diagonal: Array1<f64>,
pub active_constraints: Vec<usize>,
pub rank_deficient_directions: Vec<usize>,
}
impl CorrectedCovarianceDiagonal {
pub(crate) fn has_structural_diagnostics(&self) -> bool {
!self.active_constraints.is_empty() || !self.rank_deficient_directions.is_empty()
}
}
pub fn compute_corrected_covariance_diagonal_with_constraints(
v_ks: &[Array1<f64>],
ext_v: &[Array1<f64>],
outer_hessian: &Array2<f64>,
hop: &dyn HessianOperator,
theta_at_optimum: Option<&[f64]>,
gradient_norm: f64,
) -> Result<CorrectedCovarianceDiagonal, CorrectedCovarianceError> {
let p = hop.dim();
let q = v_ks.len() + ext_v.len();
let mut diag = Array1::zeros(p);
for i in 0..p {
let mut e_i = Array1::zeros(p);
e_i[i] = 1.0;
let h_inv_ei = hop.solve(&e_i);
diag[i] = h_inv_ei[i];
}
if q == 0 {
return Ok(CorrectedCovarianceDiagonal {
diagonal: diag,
active_constraints: Vec::new(),
rank_deficient_directions: Vec::new(),
});
}
if outer_hessian.nrows() != q || outer_hessian.ncols() != q {
return Err(CorrectedCovarianceError::ShapeMismatch {
context: "compute_corrected_covariance_diagonal outer Hessian",
reason: format!(
"dimension ({}, {}) does not match q = {}",
outer_hessian.nrows(),
outer_hessian.ncols(),
q,
),
});
}
let active = active_bound_indices_for_theta(theta_at_optimum, v_ks.len(), ext_v.len());
let (v_theta_full, rank_deficient_directions) = projected_inverse_with_inertia_gate(
outer_hessian,
&active,
theta_at_optimum,
gradient_norm,
)?;
let (sym_evals, sym_evecs) = v_theta_full.eigh(faer::Side::Lower).map_err(|e| {
CorrectedCovarianceError::EigendecompositionFailed {
context: "corrected covariance hyperparameter covariance",
reason: e.to_string(),
}
})?;
let mut v_theta_sqrt = Array2::<f64>::zeros((q, q));
for j in 0..q {
let s = sym_evals[j];
if s <= 0.0 {
continue;
}
let scale = s.sqrt();
for row in 0..q {
v_theta_sqrt[[row, j]] = sym_evecs[[row, j]] * scale;
}
}
let mut j_alpha = Array2::zeros((p, q));
for (col, v) in v_ks.iter().enumerate() {
for row in 0..p {
j_alpha[[row, col]] = -v[row];
}
}
for (i, v) in ext_v.iter().enumerate() {
let col = v_ks.len() + i;
for row in 0..p {
j_alpha[[row, col]] = -v[row];
}
}
let m = j_alpha.dot(&v_theta_sqrt);
for i in 0..p {
let mut row_norm_sq = 0.0;
for j in 0..m.ncols() {
row_norm_sq += m[[i, j]] * m[[i, j]];
}
diag[i] += row_norm_sq;
}
Ok(CorrectedCovarianceDiagonal {
diagonal: diag,
active_constraints: active,
rank_deficient_directions,
})
}
#[inline]
pub fn spectral_regularize(sigma: f64, epsilon: f64) -> f64 {
let disc = sigma.hypot(2.0 * epsilon);
if sigma >= 0.0 {
0.5 * sigma + 0.5 * disc
} else {
(2.0 * epsilon * epsilon) / (disc - sigma)
}
}
#[inline]
pub fn spectral_epsilon(eigenvalues: &[f64]) -> f64 {
f64::EPSILON.sqrt() * (eigenvalues.len() as f64).max(1.0)
}