use super::*;
#[derive(Clone)]
pub(crate) struct BatchedOuterHessianTestFamily {
pub(crate) matrix: Array2<f64>,
}
pub(crate) struct TestOuterHessianOperator {
pub(crate) matrix: Array2<f64>,
}
impl crate::solver::rho_optimizer::OuterHessianOperator for TestOuterHessianOperator {
fn dim(&self) -> usize {
self.matrix.nrows()
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(self.matrix.dot(v))
}
fn is_cheap_to_materialize(&self) -> bool {
true
}
}
impl CustomFamily for BatchedOuterHessianTestFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let _ = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![],
})
}
fn outer_hyper_hessian_hvp_available(&self, block_specs: &[ParameterBlockSpec]) -> bool {
let _unused_block_specs = block_specs;
let _ = block_specs;
true
}
fn outer_hyper_hessian_operator(
&self,
block_specs: &[ParameterBlockSpec],
) -> Option<Arc<dyn crate::solver::rho_optimizer::OuterHessianOperator>> {
let _unused_block_specs = block_specs;
let _ = block_specs;
Some(Arc::new(TestOuterHessianOperator {
matrix: self.matrix.clone(),
}))
}
}
#[test]
pub(crate) fn blockwise_fit_from_parts_accepts_stacked_solver_eta_with_canonical_geometry_rows() {
let canonical_design = DesignMatrix::from(Array2::ones((2, 1)));
let stacked_design = DesignMatrix::from(Array2::ones((6, 1)));
let spec = ParameterBlockSpec {
name: "stacked".to_string(),
design: canonical_design,
offset: Array1::zeros(2),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: Some(stacked_design),
stacked_offset: Some(Array1::zeros(6)),
};
let state = ParameterBlockState {
beta: array![0.25],
eta: Array1::zeros(6),
};
let fit = blockwise_fit_from_parts(
BlockwiseFitResultParts {
block_states: vec![state],
log_likelihood: -1.0,
log_lambdas: Array1::zeros(0),
lambdas: Array1::zeros(0),
covariance_conditional: Some(Array2::eye(1)),
stable_penalty_term: 0.0,
penalized_objective: 1.0,
outer_iterations: 0,
outer_gradient_norm: Some(0.0),
criterion_certificate: None,
inner_cycles: 0,
outer_converged: true,
geometry: Some(FitGeometry {
penalized_hessian: Array2::eye(1).into(),
working_weights: Array1::ones(2),
working_response: Array1::zeros(2),
}),
precomputed_edf: Some((1.0, Vec::new(), vec![1.0], Vec::new())),
},
&[spec],
)
.expect("stacked solver eta should assemble against canonical geometry rows");
assert_eq!(fit.block_states[0].eta.len(), 6);
assert_eq!(fit.geometry.as_ref().unwrap().working_weights.len(), 2);
}
#[test]
pub(crate) fn batched_outer_hessian_terms_materialize_to_exact_small_matrix() {
let exact = array![[4.0, -1.0], [-1.0, 3.0]];
let family = BatchedOuterHessianTestFamily {
matrix: exact.clone(),
};
let terms = family
.batched_outer_hessian_terms(&[], &[], &[], &Array1::<f64>::zeros(0), None)
.expect("batched Hessian hook succeeds")
.expect("test family exposes batched HVP terms");
let operator = match terms.outer_hessian {
crate::solver::rho_optimizer::HessianResult::Operator(operator) => operator,
_ => panic!("batched hook should expose an operator"),
};
let dense = operator
.mul_mat(Array2::<f64>::eye(2).view())
.expect("operator materializes on small exact case");
assert_eq!(dense, exact);
}
#[test]
pub(crate) fn batched_outer_hessian_operator_selected_only_for_hessian_eval() {
let family = BatchedOuterHessianTestFamily {
matrix: array![[2.0, 0.5], [0.5, 5.0]],
};
let selected = custom_family_batched_outer_hessian_operator(
&family,
&[],
&[],
&[],
&Array1::<f64>::zeros(0),
None,
EvalMode::ValueGradientHessian,
)
.expect("selection check succeeds");
assert!(
selected.is_some(),
"supported Hessian/HVP families should select the batched operator path"
);
let not_selected = custom_family_batched_outer_hessian_operator(
&family,
&[],
&[],
&[],
&Array1::<f64>::zeros(0),
None,
EvalMode::ValueAndGradient,
)
.expect("non-Hessian selection check succeeds");
assert!(
not_selected.is_none(),
"batched Hessian terms must not run for gradient-only evaluations"
);
}
#[test]
pub(crate) fn batched_outer_gradient_override_rejected_when_jeffreys_curvature_is_active() {
assert!(
batched_outer_gradient_contract_allows_override(None),
"released objective without robust Jeffreys curvature may use a family-owned batched gradient"
);
let zero_hphi = Array2::<f64>::zeros((2, 2));
assert!(
batched_outer_gradient_contract_allows_override(Some(&zero_hphi)),
"a gated zero Jeffreys curvature leaves the batched gradient contract unchanged"
);
let active_hphi = array![[0.0, 0.0], [0.0, 1.0e-6]];
assert!(
!batched_outer_gradient_contract_allows_override(Some(&active_hphi)),
"nonzero H_phi changes the logdet operator and needs the unified H_phi-aware gradient"
);
}
use crate::families::gamlss::{BinomialLocationScaleFamily, BinomialLocationScaleWiggleFamily};
use crate::matrix::DesignMatrix;
use crate::test_support::binomial_location_scale_base_fixture;
use approx::assert_relative_eq;
use faer::sparse::{SparseColMat, Triplet};
use ndarray::{Array1, Array2, array};
pub(crate) fn assert_kronecker_factored_matches_dense(
left: Array2<f64>,
right: Array2<f64>,
vectors: Vec<Array1<f64>>,
) {
let penalty = PenaltyMatrix::KroneckerFactored { left, right };
let dense = penalty.to_dense();
for v in vectors {
let factored_dot = penalty.dot(&v);
let dense_dot = dense.dot(&v);
for i in 0..v.len() {
assert!(
(factored_dot[i] - dense_dot[i]).abs() <= 1.0e-14,
"Kronecker dot mismatch at component {i}: factored={}, dense={}",
factored_dot[i],
dense_dot[i],
);
}
let factored_quad = penalty.quadratic_form(&v);
let dense_quad = v.dot(&dense_dot);
assert!(
(factored_quad - dense_quad).abs() <= 1.0e-14,
"Kronecker quadratic form mismatch: factored={factored_quad}, dense={dense_quad}",
);
}
}
#[test]
pub(crate) fn kronecker_factored_dot_and_quadratic_form_match_dense_row_major_operator() {
let left_diag = array![[10.0, 0.0], [0.0, 100.0]];
let right_diag = array![[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]];
let mut diag_vectors = Vec::new();
for i in 0..6 {
let mut v = Array1::<f64>::zeros(6);
v[i] = 1.0;
diag_vectors.push(v);
}
diag_vectors.push(array![0.25, -1.5, 2.0, 0.75, -0.5, 3.25]);
assert_kronecker_factored_matches_dense(left_diag, right_diag, diag_vectors);
let left_nondiag = array![[1.0, 2.0], [3.0, 4.0]];
let right_nondiag = array![[0.0, 1.0], [1.0, 0.0]];
let mut nondiag_vectors = Vec::new();
for i in 0..4 {
let mut v = Array1::<f64>::zeros(4);
v[i] = 1.0;
nondiag_vectors.push(v);
}
nondiag_vectors.push(array![1.25, -0.75, 2.5, -3.0]);
assert_kronecker_factored_matches_dense(left_nondiag, right_nondiag, nondiag_vectors);
}
#[test]
pub(crate) fn joint_hessian_coupling_probe_detects_off_diagonal_blocks() {
let states = vec![
ParameterBlockState {
beta: Array1::zeros(2),
eta: Array1::zeros(3),
},
ParameterBlockState {
beta: Array1::zeros(2),
eta: Array1::zeros(3),
},
];
let block_diagonal = array![
[1.0_f64, 0.5, 0.0, 0.0],
[0.5, 1.0, 0.0, 0.0],
[0.0, 0.0, 2.0, 0.3],
[0.0, 0.0, 0.3, 2.0],
];
assert!(
!joint_hessian_has_cross_block_coupling(&block_diagonal, &states),
"block-diagonal joint Hessian must not be treated as coupled"
);
let mut coupled = block_diagonal.clone();
coupled[[0, 2]] = 1.0e-9;
coupled[[2, 0]] = 1.0e-9;
assert!(
joint_hessian_has_cross_block_coupling(&coupled, &states),
"a nonzero off-diagonal block must be detected as coupling"
);
let wrong_shape = Array2::<f64>::zeros((3, 3));
assert!(
!joint_hessian_has_cross_block_coupling(&wrong_shape, &states),
"shape disagreement must not be claimed as coupling"
);
}
pub(crate) fn solve_blockweighted_system(
x: &DesignMatrix,
y_star: &Array1<f64>,
w: &Array1<f64>,
s_lambda: &Array2<f64>,
ridge_floor: f64,
ridge_policy: RidgePolicy,
) -> Result<Array1<f64>, String> {
let n = x.nrows();
if y_star.len() != n || w.len() != n {
return Err(CustomFamilyError::DimensionMismatch {
reason: "weighted-system dimension mismatch".to_string(),
}
.into());
}
let xtwy = x.compute_xtwy(w, y_star)?;
x.solve_systemwith_policy(w, &xtwy, Some(s_lambda), ridge_floor, ridge_policy)
.map_err(|_| "block solve failed after ridge retries".to_string())
}
#[test]
pub(crate) fn default_inner_cycle_budget_covers_large_scale_joint_newton_tail() {
let options = BlockwiseFitOptions::default();
assert_eq!(
options.inner_max_cycles,
DEFAULT_CUSTOM_FAMILY_INNER_MAX_CYCLES
);
assert!(
options.inner_max_cycles > 300,
"startup validation must not reject still-descending exact joint solves at the old cap"
);
}
#[test]
pub(crate) fn startup_validation_failure_routes_to_never_fail_escalation() {
use crate::model_types::EstimationError;
let all_seeds_rejected = EstimationError::RemlOptimizationFailed(
"no candidate seeds passed outer startup validation (custom family):\n generated=4"
.to_string(),
);
assert!(
outer_startup_failure_is_escalatable(&all_seeds_rejected),
"post-audit all-seeds startup rejection must reach the never-fail escalation net"
);
let non_finite_eval = EstimationError::RemlOptimizationFailed(
"outer eval failed: objective returned a non-finite cost".to_string(),
);
assert!(
outer_startup_failure_is_escalatable(&non_finite_eval),
"non-finite startup evals are the same post-audit numerical pathology"
);
let structural_input = EstimationError::InvalidInput(
"zero-event survival marginal-slope input remains structurally invalid".to_string(),
);
assert!(
!outer_startup_failure_is_escalatable(&structural_input),
"structural input errors must not be converted into sampled fits"
);
}
#[test]
pub(crate) fn joint_penalty_subspace_trace_matches_projected_logdet_derivative() {
let ranges = vec![(0, 3)];
let s_lambda = array![[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 0.0]];
let penalties = vec![s_lambda];
let h = array![[4.0, 0.2, 7.0], [0.2, 9.0, -3.0], [7.0, -3.0, 30.0]];
let drift = array![[0.7, -0.4, 0.0], [-0.4, 1.3, 0.0], [0.0, 0.0, 0.0]];
let (logdet, kernel) = joint_penalty_subspace_trace_parts(
&JointHessianSource::Dense(h.clone()),
&ranges,
&penalties,
3,
0.0,
None,
)
.expect("projection parts build");
let kernel = kernel.expect("rank-deficient penalty still has an identified subspace");
assert_eq!(kernel.u_s.ncols(), 3);
let m = array![[5.0, 0.2, 7.0], [0.2, 11.0, -3.0], [7.0, -3.0, 30.0]];
let (m_evals, _) = m.eigh(faer::Side::Lower).expect("M eigendecomposition");
let expected_logdet: f64 = m_evals.iter().map(|&v| v.ln()).sum();
assert_relative_eq!(logdet, expected_logdet, epsilon = 1e-10);
let analytic = kernel.trace_projected_logdet(&drift);
let eps = 1.0e-6;
let h_plus = &h + &(drift.mapv(|v| eps * v));
let h_minus = &h - &(drift.mapv(|v| eps * v));
let (logdet_plus, _) = joint_penalty_subspace_trace_parts(
&JointHessianSource::Dense(h_plus),
&ranges,
&penalties,
3,
0.0,
None,
)
.expect("plus projection parts build");
let (logdet_minus, _) = joint_penalty_subspace_trace_parts(
&JointHessianSource::Dense(h_minus),
&ranges,
&penalties,
3,
0.0,
None,
)
.expect("minus projection parts build");
let finite_difference = (logdet_plus - logdet_minus) / (2.0 * eps);
assert_relative_eq!(
analytic,
finite_difference,
epsilon = 1e-8,
max_relative = 1e-8
);
}
#[test]
pub(crate) fn joint_outer_gradient_uses_projected_trace_for_rank_deficient_penalty() {
let ranges = vec![(0, 3)];
let rho = array![0.0];
let beta = array![1.0, -1.0, 3.0];
let s_lambda = array![[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 0.0]];
let h = array![[4.0, 0.2, 7.0], [0.2, 9.0, -3.0], [7.0, -3.0, 30.0]];
let spec = ParameterBlockSpec {
name: "surface".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
1, 3,
)))),
offset: Array1::zeros(1),
penalties: vec![PenaltyMatrix::Dense(s_lambda.clone())],
nullspace_dims: vec![1],
initial_log_lambdas: rho.clone(),
initial_beta: Some(beta.clone()),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![spec];
let inner = BlockwiseInnerResult {
block_states: vec![ParameterBlockState {
beta: beta.clone(),
eta: Array1::zeros(1),
}],
active_sets: vec![None],
log_likelihood: 0.0,
penalty_value: 0.5 * beta.dot(&fast_av(&s_lambda, &beta)),
cycles: 1,
converged: true,
block_logdet_h: 0.0,
block_logdet_s: 0.0,
s_lambdas: vec![s_lambda.clone()],
joint_workspace: None,
kkt_residual: None,
active_constraints: None,
};
let per_block = vec![rho.clone()];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: false,
..BlockwiseFitOptions::default()
};
let no_dh = |_direction: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> { Ok(None) };
let no_d2h = |_u: &Array1<f64>, _v: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> {
Ok(None)
};
let projected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta,
JointHessianSource::Dense(h.clone()),
&ranges,
3,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
true,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&no_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("projected outer evaluation succeeds");
let unprojected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta,
JointHessianSource::Dense(h.clone()),
&ranges,
3,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
false,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&no_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("unprojected outer evaluation succeeds");
let (_, kernel) = joint_penalty_subspace_trace_parts(
&JointHessianSource::Dense(h.clone()),
&ranges,
std::slice::from_ref(&s_lambda),
3,
0.0,
None,
)
.expect("projection kernel builds");
let projected_trace = kernel
.expect("rank-deficient penalty has positive subspace")
.trace_projected_logdet(&s_lambda);
let expected_gradient =
0.5 * beta.dot(&fast_av(&s_lambda, &beta)) + 0.5 * projected_trace - 0.5 * 2.0;
assert_relative_eq!(
projected.gradient[0],
expected_gradient,
epsilon = 1e-12,
max_relative = 1e-12
);
assert_relative_eq!(
projected.gradient[0],
unprojected.gradient[0],
epsilon = 1e-8,
max_relative = 1e-8
);
}
#[test]
pub(crate) fn joint_outer_gradient_projected_trace_drops_joint_null() {
let ranges = vec![(0, 3)];
let rho = array![0.0];
let beta = array![1.0, -1.0, 3.0];
let s_lambda = array![[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 0.0]];
let h = array![[4.0, 0.2, 0.0], [0.2, 9.0, 0.0], [0.0, 0.0, 0.0]];
let spec = ParameterBlockSpec {
name: "surface".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
1, 3,
)))),
offset: Array1::zeros(1),
penalties: vec![PenaltyMatrix::Dense(s_lambda.clone())],
nullspace_dims: vec![1],
initial_log_lambdas: rho.clone(),
initial_beta: Some(beta.clone()),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![spec];
let inner = BlockwiseInnerResult {
block_states: vec![ParameterBlockState {
beta: beta.clone(),
eta: Array1::zeros(1),
}],
active_sets: vec![None],
log_likelihood: 0.0,
penalty_value: 0.5 * beta.dot(&fast_av(&s_lambda, &beta)),
cycles: 1,
converged: true,
block_logdet_h: 0.0,
block_logdet_s: 0.0,
s_lambdas: vec![s_lambda.clone()],
joint_workspace: None,
kkt_residual: None,
active_constraints: None,
};
let per_block = vec![rho.clone()];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: false,
..BlockwiseFitOptions::default()
};
let no_dh = |_direction: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> { Ok(None) };
let no_d2h = |_u: &Array1<f64>, _v: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> {
Ok(None)
};
let projected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta,
JointHessianSource::Dense(h.clone()),
&ranges,
3,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
true,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&no_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("projected outer evaluation succeeds on a singular joint Hessian");
let (_, kernel) = joint_penalty_subspace_trace_parts(
&JointHessianSource::Dense(h.clone()),
&ranges,
std::slice::from_ref(&s_lambda),
3,
0.0,
None,
)
.expect("projection kernel builds");
let projected_trace = kernel
.expect("rank-deficient joint Hessian has a positive subspace")
.trace_projected_logdet(&s_lambda);
let expected_gradient =
0.5 * beta.dot(&fast_av(&s_lambda, &beta)) + 0.5 * projected_trace - 0.5 * 2.0;
assert!(
projected.objective.is_finite(),
"pseudo-logdet objective must stay finite when ker(H+Sλ) is dropped"
);
assert_relative_eq!(
projected.gradient[0],
expected_gradient,
epsilon = 1e-10,
max_relative = 1e-10
);
}
#[test]
pub(crate) fn large_scale_rho_scan_joint_outer_evaluate_is_projection_invariant() {
let ranges = vec![(0, 3)];
let beta = array![1.0, -1.0, 3.0];
let s_unit: Array2<f64> = array![[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 0.0]];
let n_scale = 2.0e5_f64;
let h: Array2<f64> =
array![[4.0, 0.2, 7.0], [0.2, 9.0, -3.0], [7.0, -3.0, 30.0]].mapv(|v| v * n_scale);
let no_dh = |_d: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> { Ok(None) };
let no_d2h = |_u: &Array1<f64>, _v: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> {
Ok(None)
};
let mut g_un_at_10 = 0.0_f64;
let mut g_pr_at_10 = 0.0_f64;
for &rho_val in &[0.0_f64, 2.0, 4.0, 6.0, 8.0, 10.0] {
let lam = rho_val.exp();
let rho = array![rho_val];
let s_lambda = s_unit.mapv(|v| v * lam);
let spec = ParameterBlockSpec {
name: "surface".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
1, 3,
)))),
offset: Array1::zeros(1),
penalties: vec![PenaltyMatrix::Dense(s_unit.clone())],
nullspace_dims: vec![1],
initial_log_lambdas: rho.clone(),
initial_beta: Some(beta.clone()),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![spec];
let inner = BlockwiseInnerResult {
block_states: vec![ParameterBlockState {
beta: beta.clone(),
eta: Array1::zeros(1),
}],
active_sets: vec![None],
log_likelihood: 0.0,
penalty_value: 0.5 * lam * beta.dot(&fast_av(&s_unit, &beta)),
cycles: 1,
converged: true,
block_logdet_h: 0.0,
block_logdet_s: 0.0,
s_lambdas: vec![s_lambda.clone()],
joint_workspace: None,
kkt_residual: None,
active_constraints: None,
};
let per_block = vec![rho.clone()];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: false,
..BlockwiseFitOptions::default()
};
let projected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta,
JointHessianSource::Dense(h.clone()),
&ranges,
3,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
true,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&no_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("projected eval ok");
let unprojected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta,
JointHessianSource::Dense(h.clone()),
&ranges,
3,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
false,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&no_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("unprojected eval ok");
let g_un = unprojected.gradient[0];
let g_pr = projected.gradient[0];
if rho_val == 10.0 {
g_un_at_10 = g_un.abs();
g_pr_at_10 = g_pr.abs();
}
}
let rel_diff = (g_un_at_10 - g_pr_at_10).abs() / g_pr_at_10.max(1e-30);
assert!(
rel_diff < 1e-4,
"projection should be near-invariant on this fixture at rho=10; \
got g_un={:.6e}, g_pr={:.6e}, rel_diff={:.3e}",
g_un_at_10,
g_pr_at_10,
rel_diff
);
}
#[test]
pub(crate) fn large_scale_multiblock_outer_gradient_with_realistic_drift_is_bounded() {
let p_time = 10usize;
let p_marg = 14usize;
let p_logs = 14usize;
let p_total = p_time + p_marg + p_logs;
let ranges = vec![
(0, p_time),
(p_time, p_time + p_marg),
(p_time + p_marg, p_total),
];
fn build_duchon_shape(p: usize, nullspace: usize, signal_scale: f64) -> Array2<f64> {
let rank = p - nullspace;
let mut eigvals = vec![0.0_f64; p];
for i in 0..rank {
eigvals[i] = signal_scale * 0.5_f64.powi(i as i32);
}
let mut u = Array2::<f64>::zeros((p, p));
for i in 0..p {
u[[i, 0]] = 1.0 / (p as f64).sqrt();
for j in 1..p {
u[[i, j]] = (2.0 / p as f64).sqrt()
* (std::f64::consts::PI * (i as f64 + 0.5) * j as f64 / p as f64).cos();
}
}
let mut s = Array2::<f64>::zeros((p, p));
for k in 0..p {
if eigvals[k] == 0.0 {
continue;
}
for i in 0..p {
for j in 0..p {
s[[i, j]] += eigvals[k] * u[[i, k]] * u[[j, k]];
}
}
}
s
}
let s_time = build_duchon_shape(p_time, 2, 1.0);
let s_marg_0 = build_duchon_shape(p_marg, 4, 1.0);
let s_marg_1 = build_duchon_shape(p_marg, 4, 0.7);
let s_logs = build_duchon_shape(p_logs, 4, 1.0);
let rho = array![10.0_f64, 10.0, 10.0, 4.5];
let lams: Array1<f64> = rho.mapv(f64::exp);
let s_lambdas_local: Vec<Array2<f64>> = vec![
s_time.mapv(|v| v * lams[0]),
(&s_marg_0 * lams[1]) + &(&s_marg_1 * lams[2]),
s_logs.mapv(|v| v * lams[3]),
];
let beta_flat = Array1::<f64>::from_iter((0..p_total).map(|i| ((i as f64) * 0.13).sin()));
let n_scale = 2.0e5_f64;
let mut h = Array2::<f64>::eye(p_total) * n_scale;
for i in 0..p_total {
for j in 0..p_total {
if i != j {
let v = 0.05_f64
* n_scale
* ((i as f64 - j as f64).abs() / p_total as f64).exp().recip();
h[[i, j]] = v;
}
}
}
let no_dh = |_v_k: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> { Ok(None) };
let compute_dh = no_dh;
let no_d2h = |_u: &Array1<f64>, _v: &Array1<f64>| -> Result<Option<DriftDerivResult>, String> {
Ok(None)
};
let mk_spec = |name: &str,
p: usize,
penalties: Vec<Array2<f64>>,
null: usize,
rho_block: Array1<f64>|
-> ParameterBlockSpec {
ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::<f64>::zeros((1, p)),
)),
offset: Array1::zeros(1),
penalties: penalties.into_iter().map(PenaltyMatrix::Dense).collect(),
nullspace_dims: vec![null],
initial_log_lambdas: rho_block,
initial_beta: Some(beta_flat.slice(s![..p]).to_owned()),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
};
let specs = vec![
mk_spec(
"time_surface",
p_time,
vec![s_time.clone()],
2,
array![rho[0]],
),
mk_spec(
"marginal_surface",
p_marg,
vec![s_marg_0.clone(), s_marg_1.clone()],
4,
array![rho[1], rho[2]],
),
mk_spec(
"logslope_surface",
p_logs,
vec![s_logs.clone()],
4,
array![rho[3]],
),
];
let per_block = vec![array![rho[0]], array![rho[1], rho[2]], array![rho[3]]];
let inner = BlockwiseInnerResult {
block_states: vec![
ParameterBlockState {
beta: beta_flat.slice(s![0..p_time]).to_owned(),
eta: Array1::zeros(1),
},
ParameterBlockState {
beta: beta_flat.slice(s![p_time..p_time + p_marg]).to_owned(),
eta: Array1::zeros(1),
},
ParameterBlockState {
beta: beta_flat.slice(s![p_time + p_marg..p_total]).to_owned(),
eta: Array1::zeros(1),
},
],
active_sets: vec![None, None, None],
log_likelihood: 0.0,
penalty_value: 0.5
* (lams[0]
* beta_flat.slice(s![0..p_time]).dot(&fast_av(
&s_time,
&beta_flat.slice(s![0..p_time]).to_owned(),
))
+ lams[1]
* beta_flat.slice(s![p_time..p_time + p_marg]).dot(&fast_av(
&s_marg_0,
&beta_flat.slice(s![p_time..p_time + p_marg]).to_owned(),
))
+ lams[2]
* beta_flat.slice(s![p_time..p_time + p_marg]).dot(&fast_av(
&s_marg_1,
&beta_flat.slice(s![p_time..p_time + p_marg]).to_owned(),
))
+ lams[3]
* beta_flat.slice(s![p_time + p_marg..p_total]).dot(&fast_av(
&s_logs,
&beta_flat.slice(s![p_time + p_marg..p_total]).to_owned(),
))),
cycles: 1,
converged: true,
block_logdet_h: 0.0,
block_logdet_s: 0.0,
s_lambdas: s_lambdas_local,
joint_workspace: None,
kkt_residual: None,
active_constraints: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: false,
..BlockwiseFitOptions::default()
};
let projected = joint_outer_evaluate(
&inner,
&specs,
&per_block,
&rho,
&beta_flat,
JointHessianSource::Dense(h.clone()),
&ranges,
p_total,
0.0,
0.0,
0.0,
1.0,
0.0,
true,
true,
false,
true,
EvalMode::ValueAndGradient,
&options,
crate::types::RhoPrior::Flat,
PseudoLogdetMode::Smooth,
&compute_dh,
None,
&no_d2h,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
.expect("large-scale projected eval");
let dominant_terms = [
0.5 * lams[0]
* beta_flat.slice(s![0..p_time]).dot(&fast_av(
&s_time,
&beta_flat.slice(s![0..p_time]).to_owned(),
)),
0.5 * lams[1]
* beta_flat.slice(s![p_time..p_time + p_marg]).dot(&fast_av(
&s_marg_0,
&beta_flat.slice(s![p_time..p_time + p_marg]).to_owned(),
)),
0.5 * lams[2]
* beta_flat.slice(s![p_time..p_time + p_marg]).dot(&fast_av(
&s_marg_1,
&beta_flat.slice(s![p_time..p_time + p_marg]).to_owned(),
)),
0.5 * lams[3]
* beta_flat.slice(s![p_time + p_marg..p_total]).dot(&fast_av(
&s_logs,
&beta_flat.slice(s![p_time + p_marg..p_total]).to_owned(),
)),
];
assert_eq!(
projected.gradient.len(),
dominant_terms.len(),
"projected gradient dimension changed"
);
for (k, (&g, &dominant_term)) in projected
.gradient
.iter()
.zip(dominant_terms.iter())
.enumerate()
{
let bound = dominant_term.abs().max(1.0) * 100.0;
assert!(g.is_finite(), "gradient[{k}] is non-finite: {g}");
assert!(
g.abs() <= bound,
"gradient[{k}] = {:.6e} exceeds physical bound 100·|½λβ'Sβ| = {:.6e} \
(dominant_term={:.6e}); this reproduces the large-scale blowup \
inside joint_outer_evaluate.",
g,
bound,
dominant_term
);
}
}
#[test]
pub(crate) fn direct_joint_hyper_inner_tolerance_follows_outer_target() {
let options = BlockwiseFitOptions {
inner_tol: 1e-6,
outer_tol: 1e-5,
inner_max_cycles: 100,
..BlockwiseFitOptions::default()
};
let (eval_options, strict_warm_start) =
derivative_quality_options_and_warm_start(&options, None, true);
assert_eq!(
eval_options.inner_tol, options.outer_tol,
"default exact joint-hyper eval should use the outer optimizer scale"
);
assert_eq!(eval_options.inner_max_cycles, options.inner_max_cycles);
assert!(
strict_warm_start.is_none(),
"loosening to the outer scale should not discard cached inner state"
);
let large_scale_objective = 3.689e5;
let posted_residual = 6.788e-1;
let posted_objective_change = 4.209e-2;
let eval_tol = eval_options.inner_tol * (1.0 + large_scale_objective);
assert!(
posted_residual <= 2.0 * eval_tol && posted_objective_change <= eval_tol,
"the exact outer startup validation should accept numerically flat inner solves at outer scale"
);
let (rho_default, _) = derivative_quality_options_and_warm_start(&options, None, false);
assert_eq!(
rho_default.inner_tol, options.inner_tol,
"rho-only exact joint-hyper eval must preserve the rho-only outer surface"
);
let tighter_options = BlockwiseFitOptions {
inner_tol: 1e-3,
outer_tol: 1e-5,
inner_max_cycles: 100,
..BlockwiseFitOptions::default()
};
let (tightened, _) = derivative_quality_options_and_warm_start(&tighter_options, None, true);
assert_eq!(tightened.inner_tol, tighter_options.outer_tol);
assert_eq!(tightened.inner_max_cycles, 200);
let (rho_only, _) = derivative_quality_options_and_warm_start(&tighter_options, None, false);
assert_eq!(rho_only.inner_tol, tighter_options.inner_tol);
assert_eq!(rho_only.inner_max_cycles, tighter_options.inner_max_cycles);
let explicitly_tight_options = BlockwiseFitOptions {
inner_tol: 1e-12,
outer_tol: 1e-10,
inner_max_cycles: 100,
..BlockwiseFitOptions::default()
};
let (explicitly_tight, _) =
derivative_quality_options_and_warm_start(&explicitly_tight_options, None, true);
assert_eq!(
explicitly_tight.inner_tol, 1e-12,
"an explicitly sub-default inner tolerance should be honored down to the explicit direct joint-hyper floor instead of being loosened to outer_tol"
);
assert_eq!(explicitly_tight.inner_max_cycles, 100);
}
#[test]
pub(crate) fn exact_spatial_joint_hyper_inner_tolerance_follows_spatial_outer_target() {
let options = BlockwiseFitOptions {
inner_tol: 1e-6,
outer_tol: 1e-10,
inner_max_cycles: 200,
..BlockwiseFitOptions::default()
};
let spatial_outer_tol = 1e-4;
let eval_input = joint_hyper_options_for_outer_tolerance(&options, spatial_outer_tol);
let (eval_options, strict_warm_start) =
derivative_quality_options_and_warm_start(&eval_input, None, true);
assert_eq!(eval_options.outer_tol, spatial_outer_tol);
assert_eq!(
eval_options.inner_tol, spatial_outer_tol,
"exact spatial [rho, psi] evaluations should certify beta only to the tolerance of the outer optimizer consuming the derivative"
);
assert!(
strict_warm_start.is_none(),
"loosening an over-tight caller tolerance should preserve the cached inner state"
);
let large_scale_objective = 3.689e5;
let posted_residual_plateau = 6.788e-1;
let posted_objective_change = 4.209e-2;
let eval_tol = eval_options.inner_tol * (1.0 + large_scale_objective);
assert!(
posted_residual_plateau <= eval_tol && posted_objective_change <= eval_tol,
"the posted saturated Newton plateau is below the spatial outer derivative accuracy target"
);
}
pub(crate) fn outerobjective_andgradient<F: CustomFamily + Clone + Send + Sync + 'static>(
family: &F,
specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
penalty_counts: &[usize],
rho: &Array1<f64>,
warm_start: Option<&ConstrainedWarmStart>,
) -> Result<(f64, Array1<f64>, ConstrainedWarmStart), String> {
let (obj, grad, _, warm) = super::test_support::outerobjectivegradienthessian(
family,
specs,
options,
penalty_counts,
rho,
warm_start,
EvalMode::ValueAndGradient,
)?;
Ok((obj, grad, warm))
}
pub(crate) struct BinomialLocationScaleWiggleOuterFixture {
pub(crate) family: BinomialLocationScaleWiggleFamily,
pub(crate) specs: Vec<ParameterBlockSpec>,
pub(crate) penalty_counts: Vec<usize>,
pub(crate) rho: Array1<f64>,
pub(crate) options: BlockwiseFitOptions,
}
pub(crate) fn binomial_location_scale_wiggle_outer_fixture()
-> BinomialLocationScaleWiggleOuterFixture {
let base = binomial_location_scale_base_fixture();
let q_seed = Array1::linspace(-1.4, 1.4, base.n);
let knots = crate::families::wiggle::initializewiggle_knots_from_seed(q_seed.view(), 3, 4)
.expect("knots");
let wiggle_block = crate::families::wiggle::buildwiggle_block_input_from_knots(
q_seed.view(),
&knots,
3,
2,
false,
)
.expect("wiggle block");
let wigglespec = ParameterBlockSpec {
name: "wiggle".to_string(),
design: wiggle_block.design.clone(),
offset: wiggle_block.offset.clone(),
penalties: wiggle_block
.penalties
.iter()
.map(|ps| match ps {
crate::model_types::PenaltySpec::Block {
local, col_range, ..
} => PenaltyMatrix::Blockwise {
local: local.clone(),
col_range: col_range.clone(),
total_dim: wiggle_block.design.ncols(),
},
crate::model_types::PenaltySpec::Dense(m)
| crate::model_types::PenaltySpec::DenseWithMean { matrix: m, .. } => {
PenaltyMatrix::Dense(m.clone())
}
})
.collect(),
nullspace_dims: wiggle_block.nullspace_dims.clone(),
initial_log_lambdas: array![0.1],
initial_beta: Some(Array1::from_elem(wiggle_block.design.ncols(), 0.03)),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let family = BinomialLocationScaleWiggleFamily {
y: base.y,
weights: base.weights,
link_kind: crate::types::InverseLink::Standard(crate::types::StandardLink::Probit),
threshold_design: Some(base.threshold_design),
log_sigma_design: Some(base.log_sigma_design),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
BinomialLocationScaleWiggleOuterFixture {
family,
specs: vec![base.threshold_spec, base.log_sigma_spec, wigglespec],
penalty_counts: vec![1usize, 1usize, 1usize],
rho: array![0.05, -0.15, 0.1],
options: BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
},
}
}
#[derive(Clone)]
pub(crate) struct OneBlockIdentityFamily;
#[test]
pub(crate) fn joint_coupled_coefficient_hessian_cost_matches_n_times_p_total_squared() {
let mk_spec = |p: usize| ParameterBlockSpec {
name: "test".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
200, p,
)))),
offset: Array1::zeros(200),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![mk_spec(12), mk_spec(20), mk_spec(8)];
assert_eq!(
joint_coupled_coefficient_hessian_cost(200, &specs),
200 * 40 * 40
);
assert_eq!(
default_coefficient_hessian_cost(&specs),
200 * (144 + 400 + 64)
);
assert!(
joint_coupled_coefficient_hessian_cost(200, &specs)
> default_coefficient_hessian_cost(&specs)
);
}
#[test]
pub(crate) fn large_scale_exact_adaptive_hessian_order_stays_second_order() {
let n_train = 320_000u64;
let p = 101usize;
let retained_rho_dim = 3usize;
let spec = ParameterBlockSpec {
name: "matern60".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
1, p,
)))),
offset: Array1::zeros(1),
penalties: (0..retained_rho_dim)
.map(|_| PenaltyMatrix::Dense(Array2::eye(p)))
.collect(),
nullspace_dims: vec![0; retained_rho_dim],
initial_log_lambdas: Array1::zeros(retained_rho_dim),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let coefficient_hessian_cost = n_train * (p as u64) * (p as u64);
assert_eq!(coefficient_hessian_cost, 3_264_320_000);
assert_eq!(
retained_rho_dim as u64 * coefficient_hessian_cost,
9_792_960_000
);
assert_eq!(
exact_outer_order_from_capability(&[spec], coefficient_hessian_cost),
ExactOuterDerivativeOrder::Second
);
}
#[test]
pub(crate) fn use_joint_matrix_free_path_triggers_at_each_documented_threshold() {
assert!(use_joint_matrix_free_path(512, 1));
assert!(use_joint_matrix_free_path(2048, 4));
assert!(!use_joint_matrix_free_path(511, 1));
assert!(use_joint_matrix_free_path(128, 50_000));
assert!(!use_joint_matrix_free_path(127, 50_000));
assert!(!use_joint_matrix_free_path(128, 31_249));
assert!(!use_joint_matrix_free_path(51, 320_000));
assert!(use_joint_matrix_free_path(128, 31_250));
assert!(!use_joint_matrix_free_path(127, 31_497));
assert!(!use_joint_matrix_free_path(8, 100));
assert!(!use_joint_matrix_free_path(64, 1000));
}
#[test]
pub(crate) fn large_scale_shape_margslope_flex_cycle0_uses_bounded_dense_route() {
let total_p = 51;
let total_n = 320_000;
let max_pcg_hvps_before_fix = JOINT_PCG_MAX_ITER_MULTIPLIER * total_p;
assert_eq!(max_pcg_hvps_before_fix, 204);
assert!(
!use_joint_matrix_free_path(total_p, total_n),
"p=51/n=320k should materialize exactly 51 columns instead of risking up to {max_pcg_hvps_before_fix} expensive PCG matvecs in cycle 0"
);
}
pub(crate) struct CountingHessianWorkspace {
pub(crate) dense_calls: Arc<AtomicUsize>,
pub(crate) matvec_calls: Arc<AtomicUsize>,
pub(crate) source_preference: JointHessianSourcePreference,
}
impl ExactNewtonJointHessianWorkspace for CountingHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
self.dense_calls.fetch_add(1, Ordering::Relaxed);
Ok(Some(Array2::eye(2)))
}
fn hessian_source_preference(&self) -> JointHessianSourcePreference {
self.source_preference
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
self.matvec_calls.fetch_add(1, Ordering::Relaxed);
Ok(Some(v.clone()))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
Ok(Some(Array1::ones(2)))
}
fn directional_derivative(&self, arr: &Array1<f64>) -> Result<Option<Array2<f64>>, String> {
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(None)
}
}
#[test]
pub(crate) fn workspace_hessian_source_prefers_dense_without_zero_matvec_probe() {
let dense_calls = Arc::new(AtomicUsize::new(0));
let matvec_calls = Arc::new(AtomicUsize::new(0));
let workspace: Arc<dyn ExactNewtonJointHessianWorkspace> = Arc::new(CountingHessianWorkspace {
dense_calls: Arc::clone(&dense_calls),
matvec_calls: Arc::clone(&matvec_calls),
source_preference: JointHessianSourcePreference::Dense,
});
let source = exact_newton_joint_hessian_source_from_workspace(
&workspace,
2,
MaterializationIntent::InnerSolve,
"counting workspace",
)
.expect("hessian source should build")
.expect("hessian source should be present");
assert_eq!(dense_calls.load(Ordering::Relaxed), 1);
assert_eq!(matvec_calls.load(Ordering::Relaxed), 0);
match source {
JointHessianSource::Dense(hessian) => assert_eq!(hessian, Array2::<f64>::eye(2)),
JointHessianSource::Operator { .. } => panic!("dense source was not preferred"),
}
assert_eq!(matvec_calls.load(Ordering::Relaxed), 0);
}
#[test]
pub(crate) fn workspace_hessian_source_honors_operator_preference_before_dense_probe() {
let dense_calls = Arc::new(AtomicUsize::new(0));
let matvec_calls = Arc::new(AtomicUsize::new(0));
let workspace: Arc<dyn ExactNewtonJointHessianWorkspace> = Arc::new(CountingHessianWorkspace {
dense_calls: Arc::clone(&dense_calls),
matvec_calls: Arc::clone(&matvec_calls),
source_preference: JointHessianSourcePreference::Operator,
});
let source = exact_newton_joint_hessian_source_from_workspace(
&workspace,
2,
MaterializationIntent::InnerSolve,
"operator-preferred counting workspace",
)
.expect("hessian source should build")
.expect("hessian source should be present");
assert_eq!(
dense_calls.load(Ordering::Relaxed),
0,
"operator-preferred source construction must not probe hessian_dense"
);
match source {
JointHessianSource::Operator { apply, .. } => {
let v = array![3.0, -2.0];
assert_eq!(apply(&v).expect("operator apply should succeed"), v);
assert_eq!(matvec_calls.load(Ordering::Relaxed), 1);
}
JointHessianSource::Dense(_) => panic!("operator source was not preferred"),
}
}
pub(crate) struct IntentRefiningHessianWorkspace {
pub(crate) dense_calls: Arc<AtomicUsize>,
pub(crate) matvec_calls: Arc<AtomicUsize>,
}
impl ExactNewtonJointHessianWorkspace for IntentRefiningHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
self.dense_calls.fetch_add(1, Ordering::Relaxed);
Ok(Some(Array2::eye(2)))
}
fn hessian_source_preference(&self) -> JointHessianSourcePreference {
JointHessianSourcePreference::Operator
}
fn hessian_source_preference_for_intent(
&self,
intent: MaterializationIntent,
) -> JointHessianSourcePreference {
match intent {
MaterializationIntent::LogdetFactorization => JointHessianSourcePreference::Dense,
MaterializationIntent::InnerSolve
| MaterializationIntent::OuterEvaluation
| MaterializationIntent::OuterGradient => JointHessianSourcePreference::Operator,
}
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
self.matvec_calls.fetch_add(1, Ordering::Relaxed);
Ok(Some(v.clone()))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
Ok(Some(Array1::ones(2)))
}
fn directional_derivative(&self, arr: &Array1<f64>) -> Result<Option<Array2<f64>>, String> {
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(None)
}
}
#[test]
pub(crate) fn logdet_intent_takes_dense_while_inner_solve_takes_operator() {
let dense_calls = Arc::new(AtomicUsize::new(0));
let matvec_calls = Arc::new(AtomicUsize::new(0));
let workspace: Arc<dyn ExactNewtonJointHessianWorkspace> =
Arc::new(IntentRefiningHessianWorkspace {
dense_calls: Arc::clone(&dense_calls),
matvec_calls: Arc::clone(&matvec_calls),
});
let logdet_source = exact_newton_joint_hessian_source_from_workspace(
&workspace,
2,
MaterializationIntent::LogdetFactorization,
"intent-refining logdet",
)
.expect("logdet source should build")
.expect("logdet source should be present");
assert_eq!(dense_calls.load(Ordering::Relaxed), 1);
assert_eq!(matvec_calls.load(Ordering::Relaxed), 0);
match logdet_source {
JointHessianSource::Dense(hessian) => assert_eq!(hessian, Array2::<f64>::eye(2)),
JointHessianSource::Operator { .. } => {
panic!("logdet intent must take the dense representation")
}
}
let inner_source = exact_newton_joint_hessian_source_from_workspace(
&workspace,
2,
MaterializationIntent::InnerSolve,
"intent-refining inner solve",
)
.expect("inner source should build")
.expect("inner source should be present");
assert_eq!(
dense_calls.load(Ordering::Relaxed),
1,
"inner-solve intent must not probe hessian_dense"
);
match inner_source {
JointHessianSource::Operator { apply, .. } => {
let v = array![1.5, -4.0];
assert_eq!(apply(&v).expect("operator apply should succeed"), v);
assert_eq!(matvec_calls.load(Ordering::Relaxed), 1);
}
JointHessianSource::Dense(_) => {
panic!("inner-solve intent must take the operator representation")
}
}
}
#[test]
pub(crate) fn default_coefficient_gradient_cost_is_half_of_hessian_cost() {
let mk_spec = |n: usize, p: usize| ParameterBlockSpec {
name: "test".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
n, p,
)))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![mk_spec(500, 10), mk_spec(500, 14)];
let h_cost = default_coefficient_hessian_cost(&specs);
let g_cost = default_coefficient_gradient_cost(&specs);
assert_eq!(h_cost, 500 * 100 + 500 * 196);
assert_eq!(g_cost, h_cost / 2);
}
#[test]
pub(crate) fn first_order_outer_iter_gate_caps_expensive_gradient_paths() {
assert_eq!(
cost_gated_first_order_max_iter(60, 10_000_000_000, false),
8
);
assert_eq!(
cost_gated_first_order_max_iter(60, 100_000_000_000, false),
4
);
assert_eq!(
cost_gated_first_order_max_iter(60, 100_000_000_000, true),
60
);
}
#[test]
pub(crate) fn custom_family_default_outer_seed_config_is_tightened_for_expensive_paths() {
let family = OneBlockIdentityFamily;
let small = family.outer_seed_config(4);
assert_eq!(small.max_seeds, 6);
assert_eq!(small.seed_budget, 1);
assert_eq!(small.screen_max_inner_iterations, 2);
let large = family.outer_seed_config(16);
assert_eq!(large.max_seeds, 4);
assert_eq!(large.seed_budget, 1);
assert_eq!(large.screen_max_inner_iterations, 2);
}
#[test]
pub(crate) fn floor_positiveworking_weights_preserves_exactzeros() {
let weights = array![0.0, 1.0e-16, 0.25];
let floored = floor_positiveworking_weights(&weights, 1.0e-6);
assert_eq!(floored[0], 0.0);
assert_eq!(floored[1], 1.0e-6);
assert_eq!(floored[2], 0.25);
}
#[test]
pub(crate) fn screened_outer_warm_start_reuses_any_matching_rho_dimension() {
let rho_far = array![2.25, -0.5];
let cache = Some(ConstrainedWarmStart {
rho: array![0.0, -0.5],
block_beta: vec![array![1.0, -1.0]],
active_sets: vec![None],
cached_inner: None,
});
let retained = screened_outer_warm_start(cache.as_ref(), &rho_far)
.expect("matching-dimension warm starts should remain reusable");
assert_eq!(retained.rho, array![0.0, -0.5]);
assert_eq!(retained.block_beta[0], array![1.0, -1.0]);
assert_eq!(retained.active_sets[0], None);
}
#[test]
pub(crate) fn cached_beta_warm_start_splits_blocks_and_validates_shape() {
let mk_spec = |name: &str, p: usize| ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
3, p,
)))),
offset: Array1::zeros(3),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let specs = vec![mk_spec("a", 2), mk_spec("b", 3)];
let warm = constrained_warm_start_from_cached_beta(4, &specs, &array![1., 2., 3., 4., 5.])
.expect("matching beta");
assert_eq!(warm.rho.len(), 4);
assert_eq!(warm.block_beta, vec![array![1., 2.], array![3., 4., 5.]]);
assert_eq!(warm.active_sets, vec![None, None]);
assert!(warm.cached_inner.is_none());
let err = match constrained_warm_start_from_cached_beta(4, &specs, &array![1., 2., 3.]) {
Ok(_) => panic!("wrong beta length should be rejected"),
Err(err) => err,
};
assert!(
err.to_string()
.contains("cached inner beta has length 3, but custom-family blocks require length 5"),
"{err}"
);
}
#[test]
pub(crate) fn cached_beta_warm_start_rejects_nonfinite_entries() {
let spec = ParameterBlockSpec {
name: "a".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
3, 2,
)))),
offset: Array1::zeros(3),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let err = match constrained_warm_start_from_cached_beta(1, &[spec], &array![1.0, f64::NAN]) {
Ok(_) => panic!("non-finite beta should be rejected"),
Err(err) => err,
};
assert!(
err.to_string()
.contains("cached inner beta contains non-finite entries"),
"{err}"
);
}
#[test]
pub(crate) fn custom_outer_state_reset_preserves_seeded_cached_beta() {
let spec = ParameterBlockSpec {
name: "a".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros((
3, 2,
)))),
offset: Array1::zeros(3),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let mut state = CustomOuterState::new(None);
state
.seed_cached_beta(1, &[spec], &array![4.0, -2.0])
.expect("cached beta seed");
state.warm_cache = None;
state.reset();
let warm = state
.warm_cache
.as_ref()
.expect("reset should restore cached beta seed");
assert_eq!(warm.rho.len(), 1);
assert_eq!(warm.block_beta, vec![array![4.0, -2.0]]);
assert!(warm.cached_inner.is_none());
}
#[test]
pub(crate) fn custom_outer_state_reset_preserves_existing_persistent_warm_start() {
let persistent = ConstrainedWarmStart {
rho: array![0.25],
block_beta: vec![array![1.0, 2.0]],
active_sets: vec![None],
cached_inner: None,
};
let mut state = CustomOuterState::new(Some(persistent.clone()));
state.warm_cache = None;
state.reset();
let warm = state
.warm_cache
.as_ref()
.expect("reset should restore persistent warm start");
assert_eq!(warm.rho, persistent.rho);
assert_eq!(warm.block_beta, persistent.block_beta);
}
#[test]
pub(crate) fn public_warm_start_compatibility_checks_rho_dimension() {
let warm = CustomFamilyWarmStart {
inner: ConstrainedWarmStart {
rho: array![0.0, -0.5],
block_beta: vec![array![1.0, -1.0]],
active_sets: vec![None],
cached_inner: None,
},
};
assert!(warm.compatible_with_rho(&array![0.75, -0.5]));
assert!(warm.compatible_with_rho(&array![1.75, -0.5]));
assert!(!warm.compatible_with_rho(&array![0.0]));
}
#[test]
pub(crate) fn psi_drift_deriv_workspace_preserves_block_local_operator() {
#[derive(Clone)]
struct ZeroFamily;
impl CustomFamily for ZeroFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let _ = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![],
})
}
}
struct BlockLocalPsiWorkspace;
impl ExactNewtonJointPsiWorkspace for BlockLocalPsiWorkspace {
fn second_order_terms(
&self,
_: usize,
_: usize,
) -> Result<Option<ExactNewtonJointPsiSecondOrderTerms>, String> {
Ok(None)
}
fn hessian_directional_derivative(
&self,
psi_index: usize,
arr: &Array1<f64>,
) -> Result<Option<DriftDerivResult>, String> {
assert!(arr.iter().all(|v| !v.is_nan()));
assert_eq!(psi_index, 0);
Ok(Some(DriftDerivResult::Operator(Arc::new(
BlockLocalDrift {
local: array![[3.0, 1.0], [1.0, 2.0]],
start: 1,
end: 3,
total_dim: 3,
},
))))
}
}
let callback = build_psi_drift_deriv_callback(
&ZeroFamily,
&[],
&[],
Arc::new(Vec::new()),
false,
Some(Arc::new(BlockLocalPsiWorkspace)),
)
.expect("non-Gaussian psi drift callback should be available");
let result = callback(0, &array![1.0, 2.0, 3.0])
.expect("workspace-backed psi drift derivative should be returned");
match result {
DriftDerivResult::Dense(_) => {
panic!("workspace-backed block-local psi drift derivative was densified")
}
DriftDerivResult::Operator(op) => {
let (local, start, end) = op
.block_local_data()
.expect("block-local operator metadata should be preserved");
assert_eq!((start, end), (1, 3));
assert_eq!(local, &array![[3.0, 1.0], [1.0, 2.0]]);
}
}
}
#[test]
pub(crate) fn contracted_psi_hook_declines_partial_axis_coverage_before_pair_tables_are_skipped() {
struct PartialContractedPsiWorkspace;
impl ExactNewtonJointPsiWorkspace for PartialContractedPsiWorkspace {
fn second_order_terms(
&self,
_: usize,
_: usize,
) -> Result<Option<ExactNewtonJointPsiSecondOrderTerms>, String> {
Ok(None)
}
fn second_order_terms_contracted(
&self,
alpha_psi: &[f64],
) -> Result<Option<ExactNewtonJointPsiSecondOrderContracted>, String> {
if alpha_psi.get(1).copied().unwrap_or(0.0) != 0.0 {
return Ok(None);
}
let psi_dim = alpha_psi.len();
Ok(Some(ExactNewtonJointPsiSecondOrderContracted {
objective: Array1::zeros(psi_dim),
score: Array2::zeros((psi_dim, 1)),
hessian: (0..psi_dim)
.map(|_| DriftDerivResult::Dense(Array2::zeros((1, 1))))
.collect(),
}))
}
fn hessian_directional_derivative(
&self,
_: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<DriftDerivResult>, String> {
assert_eq!(d_beta_flat.len(), 1);
Ok(None)
}
}
let specs = vec![ParameterBlockSpec {
name: "partial".to_string(),
design: DesignMatrix::from(Array2::ones((1, 1))),
offset: Array1::zeros(1),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let derivative_blocks = Arc::new(vec![vec![
CustomFamilyBlockPsiDerivative::new(
None,
Array2::zeros((1, 1)),
Array2::zeros((1, 1)),
None,
None,
None,
None,
),
CustomFamilyBlockPsiDerivative::new(
None,
Array2::zeros((1, 1)),
Array2::zeros((1, 1)),
None,
None,
None,
None,
),
]]);
let hook = build_contracted_psi_hook(
&specs,
derivative_blocks,
&array![0.0],
&[],
&[0],
None,
Some(Arc::new(PartialContractedPsiWorkspace)),
)
.expect("partial contracted psi hook probe should not error");
assert!(
hook.is_none(),
"partial contracted psi coverage must keep the exact per-pair assembly path"
);
}
#[test]
pub(crate) fn contracted_psi_hook_rejects_wrong_score_width_before_installing_operator_hook() {
struct WrongScoreWidthPsiWorkspace;
impl ExactNewtonJointPsiWorkspace for WrongScoreWidthPsiWorkspace {
fn second_order_terms(
&self,
_: usize,
_: usize,
) -> Result<Option<ExactNewtonJointPsiSecondOrderTerms>, String> {
Ok(None)
}
fn second_order_terms_contracted(
&self,
alpha_psi: &[f64],
) -> Result<Option<ExactNewtonJointPsiSecondOrderContracted>, String> {
let psi_dim = alpha_psi.len();
Ok(Some(ExactNewtonJointPsiSecondOrderContracted {
objective: Array1::zeros(psi_dim),
score: Array2::zeros((psi_dim, 0)),
hessian: (0..psi_dim)
.map(|_| DriftDerivResult::Dense(Array2::zeros((1, 1))))
.collect(),
}))
}
fn hessian_directional_derivative(
&self,
_: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<DriftDerivResult>, String> {
assert_eq!(d_beta_flat.len(), 1);
Ok(None)
}
}
let specs = vec![ParameterBlockSpec {
name: "wrong-score-width".to_string(),
design: DesignMatrix::from(Array2::ones((1, 1))),
offset: Array1::zeros(1),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let derivative_blocks = Arc::new(vec![vec![CustomFamilyBlockPsiDerivative::new(
None,
Array2::zeros((1, 1)),
Array2::zeros((1, 1)),
None,
None,
None,
None,
)]]);
let err = match build_contracted_psi_hook(
&specs,
derivative_blocks,
&array![0.0],
&[],
&[0],
None,
Some(Arc::new(WrongScoreWidthPsiWorkspace)),
) {
Ok(_) => panic!("wrong contracted score width must be rejected before hook install"),
Err(err) => err,
};
assert!(
err.contains("score=1x0") && err.contains("beta_dim=1"),
"unexpected wrong-score-width error: {err}"
);
}
#[test]
pub(crate) fn custom_family_outer_derivatives_respects_missing_second_order_capability() {
#[derive(Clone)]
struct OneBlockFirstOrderOnlyFamily;
impl CustomFamily for OneBlockFirstOrderOnlyFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn exact_outer_derivative_order(
&self,
block_specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
) -> ExactOuterDerivativeOrder {
let _unused_block_specs = block_specs;
let _ = block_specs;
let _ = options;
ExactOuterDerivativeOrder::First
}
}
let specs = vec![ParameterBlockSpec {
name: "x".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(array![[1.0]])],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let (gradient, hessian) = custom_family_outer_derivatives(
&OneBlockFirstOrderOnlyFamily,
&specs,
&BlockwiseFitOptions::default(),
);
assert_eq!(gradient, crate::solver::rho_optimizer::Derivative::Analytic);
assert_eq!(
hessian,
crate::solver::rho_optimizer::DeclaredHessianForm::Unavailable
);
}
#[derive(Clone)]
pub(crate) struct DefaultDiagonalExactHookFamily;
impl CustomFamily for DefaultDiagonalExactHookFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let eta = block_states[0].eta.clone();
let weights = eta.mapv(|value| 2.0 + value * value);
Ok(FamilyEvaluation {
log_likelihood: -0.5 * eta.dot(&eta),
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(eta.len()),
working_weights: weights,
}],
})
}
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn diagonalworking_weights_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
d_eta: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some((&block_states[0].eta * d_eta) * 2.0))
}
fn exact_newton_joint_hessiansecond_directional_derivative(
&self,
block_states: &[ParameterBlockState],
u: &Array1<f64>,
v: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
let spec = default_diagonal_exact_hook_spec();
let u_eta = spec.design.apply(u);
let v_eta = spec.design.apply(v);
assert_eq!(block_states[0].eta.len(), u_eta.len());
spec.design
.xt_diag_x_signed_op(SignedWeightsView::from_array(&((&u_eta * &v_eta) * 2.0)))
.map(Some)
}
}
pub(crate) fn default_diagonal_exact_hook_spec() -> ParameterBlockSpec {
ParameterBlockSpec {
name: "default_exact".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![
[1.0, 0.5],
[0.0, 1.0],
[2.0, -1.0]
])),
offset: Array1::zeros(3),
penalties: vec![PenaltyMatrix::Dense(Array2::eye(2))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.2, -0.1]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
}
#[test]
pub(crate) fn default_custom_family_exact_hessian_hooks_assemble_diagonal_working_sets() {
let family = DefaultDiagonalExactHookFamily;
let spec = default_diagonal_exact_hook_spec();
let beta = array![0.2, -0.1];
let eta = spec.design.apply(&beta);
let states = vec![ParameterBlockState {
beta: beta.clone(),
eta: eta.clone(),
}];
let h = family
.exact_newton_joint_hessian_with_specs(&states, &[spec.clone()])
.expect("default joint Hessian hook should succeed")
.expect("diagonal working sets should assemble an exact joint Hessian");
let expected_h = spec
.design
.xt_diag_x_signed_op(SignedWeightsView::from_array(
&eta.mapv(|value| 2.0 + value * value),
))
.unwrap();
assert_eq!(h, expected_h);
let direction = array![0.3, -0.4];
let dh = family
.exact_newton_joint_hessian_directional_derivative_with_specs(
&states,
&[spec.clone()],
&direction,
)
.expect("default joint dH hook should succeed")
.expect("diagonal weight derivative should assemble an exact joint dH");
let d_eta = spec.design.apply(&direction);
let expected_dh = spec
.design
.xt_diag_x_signed_op(SignedWeightsView::from_array(&((&eta * &d_eta) * 2.0)))
.unwrap();
assert_eq!(dh, expected_dh);
let d2h = family
.exact_newton_joint_hessiansecond_directional_derivative(&states, &direction, &beta)
.expect("family second directional hook should succeed")
.expect("second directional hook should be exact");
let beta_eta = spec.design.apply(&beta);
let expected_d2h = spec
.design
.xt_diag_x_signed_op(SignedWeightsView::from_array(&((&d_eta * &beta_eta) * 2.0)))
.unwrap();
assert_eq!(d2h, expected_d2h);
}
#[test]
pub(crate) fn default_custom_family_exact_hessian_hooks_drive_profiled_outer_hessian() {
let mut spec = default_diagonal_exact_hook_spec();
spec.initial_beta = Some(Array1::zeros(2));
let result = evaluate_custom_family_joint_hyper(
&DefaultDiagonalExactHookFamily,
&[spec],
&BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
compute_covariance: false,
inner_max_cycles: 1,
..BlockwiseFitOptions::default()
},
&array![0.0],
&[vec![]],
None,
EvalMode::ValueGradientHessian,
)
.expect("profiled outer Hessian should use default exact Hessian hooks");
assert_eq!(result.gradient.len(), 1);
match result.outer_hessian {
crate::solver::rho_optimizer::HessianResult::Analytic(hessian) => {
assert_eq!(hessian.dim(), (1, 1));
assert!(hessian[[0, 0]].is_finite());
}
_ => panic!("outer Hessian should be analytic"),
}
}
#[test]
pub(crate) fn nonconverged_inner_refuses_profile_derivatives() {
let spec = default_diagonal_exact_hook_spec();
let result = evaluate_custom_family_joint_hyper(
&DefaultDiagonalExactHookFamily,
&[spec],
&BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
compute_covariance: false,
inner_max_cycles: 1,
..BlockwiseFitOptions::default()
},
&array![0.0],
&[vec![]],
None,
EvalMode::ValueGradientHessian,
);
let err = match result {
Ok(_) => panic!("non-converged inner solve must not expose derivatives"),
Err(e) => e,
};
let msg = err.to_string();
assert!(
msg.contains("inner solve did not converge") && msg.contains("refusing to expose"),
"unexpected error: {msg}"
);
}
#[test]
pub(crate) fn custom_family_seed_screening_proxy_accepts_finite_partial_inner_fit() {
let specs = vec![default_diagonal_exact_hook_spec()];
let penalty_counts = validate_blockspecs(&specs).expect("valid test spec");
let layout = penalty_label_layout(&specs, penalty_counts).expect("valid label layout");
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
compute_covariance: false,
inner_max_cycles: 1,
..BlockwiseFitOptions::default()
};
let (score, warm_start, inner_converged) = custom_family_seed_screening_proxy_labeled(
&DefaultDiagonalExactHookFamily,
&specs,
&options,
&layout,
&array![0.0],
None,
&crate::types::RhoPrior::Flat,
)
.expect("screening proxy should score a finite partial inner solve");
assert!(score.is_finite());
assert!(
!inner_converged,
"one-cycle screening is expected to be a partial inner fit"
);
assert_eq!(warm_start.rho, array![0.0]);
assert_eq!(warm_start.block_beta.len(), 1);
}
#[test]
pub(crate) fn custom_family_outer_derivatives_exposes_surrogate_second_order_geometry() {
#[derive(Clone)]
struct SurrogateFamily;
impl CustomFamily for SurrogateFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
}
let specs = vec![ParameterBlockSpec {
name: "x".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(array![[1.0]])],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
..BlockwiseFitOptions::default()
};
let (gradient, hessian) = custom_family_outer_derivatives(&SurrogateFamily, &specs, &options);
assert_eq!(gradient, crate::solver::rho_optimizer::Derivative::Analytic);
assert_eq!(
hessian,
crate::solver::rho_optimizer::DeclaredHessianForm::Either
);
}
#[test]
pub(crate) fn custom_family_outer_derivatives_keeps_strict_second_order_geometry() {
#[derive(Clone)]
struct StrictFamily;
impl CustomFamily for StrictFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
}
let specs = vec![ParameterBlockSpec {
name: "x".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(array![[1.0]])],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
..BlockwiseFitOptions::default()
};
let (gradient, hessian) = custom_family_outer_derivatives(&StrictFamily, &specs, &options);
assert_eq!(gradient, crate::solver::rho_optimizer::Derivative::Analytic);
assert_eq!(
hessian,
crate::solver::rho_optimizer::DeclaredHessianForm::Either
);
}
#[derive(Clone)]
pub(crate) struct OneBlockQuarticExactFamily {
pub(crate) linear: f64,
pub(crate) curvature: f64,
pub(crate) second_scale: f64,
}
impl CustomFamily for OneBlockQuarticExactFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states[0].beta[0];
let log_likelihood =
self.linear * beta - 0.5 * beta * beta - self.curvature * beta.powi(4) / 12.0;
let gradient = self.linear - beta - self.curvature * beta.powi(3) / 3.0;
let hessian = 1.0 + self.curvature * beta * beta;
Ok(FamilyEvaluation {
log_likelihood,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![gradient],
hessian: SymmetricMatrix::Dense(array![[hessian]]),
}],
})
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
direction: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_idx, 0);
let beta = block_states[0].beta[0];
Ok(Some(array![[2.0 * self.curvature * beta * direction[0]]]))
}
fn exact_newton_hessian_second_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
u: &Array1<f64>,
v: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_idx, 0);
let value = 2.0 * self.curvature * self.second_scale * u[0] * v[0];
Ok(Some(array![[value]]))
}
}
#[test]
pub(crate) fn generic_single_block_fallback_includes_nonzero_d2h_drift() {
let spec = ParameterBlockSpec {
name: "quartic".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(array![[1.0]])],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.75]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_tol: 1e-11,
use_remlobjective: true,
use_outer_hessian: true,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let penalty_counts = vec![1];
let rho = array![0.0];
let with_d2 = evaluate_custom_family_hyper_internal(
&OneBlockQuarticExactFamily {
linear: 3.0,
curvature: 0.5,
second_scale: 1.0,
},
std::slice::from_ref(&spec),
&options,
&penalty_counts,
&rho,
&[vec![]],
None,
crate::types::RhoPrior::Flat,
EvalMode::ValueGradientHessian,
)
.expect("single-block fallback with exact d2H should evaluate");
let without_d2_contribution = evaluate_custom_family_hyper_internal(
&OneBlockQuarticExactFamily {
linear: 3.0,
curvature: 0.5,
second_scale: 0.0,
},
&[spec],
&options,
&penalty_counts,
&rho,
&[vec![]],
None,
crate::types::RhoPrior::Flat,
EvalMode::ValueGradientHessian,
)
.expect("single-block fallback with zero d2H should evaluate");
let h_with = match with_d2.outer_hessian {
crate::solver::rho_optimizer::HessianResult::Analytic(hessian) => hessian,
crate::solver::rho_optimizer::HessianResult::Operator(_)
| crate::solver::rho_optimizer::HessianResult::Unavailable => {
panic!("expected dense analytic Hessian")
}
};
let h_without = match without_d2_contribution.outer_hessian {
crate::solver::rho_optimizer::HessianResult::Analytic(hessian) => hessian,
crate::solver::rho_optimizer::HessianResult::Operator(_)
| crate::solver::rho_optimizer::HessianResult::Unavailable => {
panic!("expected dense analytic Hessian")
}
};
let d2h_delta = h_with[[0, 0]] - h_without[[0, 0]];
assert!(
d2h_delta.abs() > 1e-8,
"expected nonzero outer Hessian contribution from d2H; with={:?}, without={:?}",
h_with,
h_without
);
}
pub(crate) fn jeffreys_seam_spec(p: usize) -> ParameterBlockSpec {
ParameterBlockSpec {
name: "jeffreys-seam".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::eye(p))),
offset: Array1::zeros(p),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
}
pub(crate) fn jeffreys_seam_state(beta: Array1<f64>) -> ParameterBlockState {
let eta = beta.clone();
ParameterBlockState { beta, eta }
}
#[derive(Clone)]
pub(crate) struct ObservedJeffreysSeamFamily;
impl CustomFamily for ObservedJeffreysSeamFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.eta
.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
let beta = &block_states[0].beta;
Ok(Some(array![
[2.0 + beta[0] * beta[0], 0.3],
[0.3, 1.5 + beta[1] * beta[1]]
]))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
let beta = &block_states[0].beta;
Ok(Some(array![
[2.0 * beta[0] * d_beta_flat[0], 0.0],
[0.0, 2.0 * beta[1] * d_beta_flat[1]]
]))
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
Ok(Some(array![
[2.0 * d_beta_u_flat[0] * d_betav_flat[0], 0.0],
[0.0, 2.0 * d_beta_u_flat[1] * d_betav_flat[1]]
]))
}
}
#[test]
pub(crate) fn joint_jeffreys_information_defaults_delegate_to_observed_hessian() {
let family = ObservedJeffreysSeamFamily;
let specs = vec![jeffreys_seam_spec(2)];
let states = vec![jeffreys_seam_state(array![0.4, -0.7])];
let u = array![0.3, -0.2];
let v = array![-0.1, 0.5];
let observed = family
.exact_newton_joint_hessian_with_specs(&states, &specs)
.expect("observed H")
.expect("observed H present");
let info = family
.joint_jeffreys_information_with_specs(&states, &specs)
.expect("jeffreys info")
.expect("jeffreys info present");
assert_eq!(info, observed, "default Jeffreys info must be observed H");
let observed_dot = family
.exact_newton_joint_hessian_directional_derivative_with_specs(&states, &specs, &u)
.expect("observed Hdot")
.expect("observed Hdot present");
let info_dot = family
.joint_jeffreys_information_directional_derivative_with_specs(&states, &specs, &u)
.expect("jeffreys dI")
.expect("jeffreys dI present");
assert_eq!(
info_dot, observed_dot,
"default Jeffreys dI must be observed Hdot"
);
let observed_ddot = family
.exact_newton_joint_hessian_second_directional_derivative_with_specs(
&states, &specs, &u, &v,
)
.expect("observed H2dot")
.expect("observed H2dot present");
let info_ddot = family
.joint_jeffreys_information_second_directional_derivative_with_specs(
&states, &specs, &u, &v,
)
.expect("jeffreys d2I")
.expect("jeffreys d2I present");
assert_eq!(
info_ddot, observed_ddot,
"default Jeffreys d2I must be observed H2dot"
);
assert!(!family.joint_jeffreys_information_contracted_trace_hessian_available());
let weight = Array2::<f64>::eye(2);
let contracted = family
.joint_jeffreys_information_contracted_trace_hessian_with_specs(&states, &specs, &weight)
.expect("contracted default");
assert!(
contracted.is_none(),
"default contracted trace hook must be None"
);
assert!(family.joint_jeffreys_information_matches_observed_hessian());
}
#[derive(Clone)]
pub(crate) struct ContractedJeffreysSeamFamily;
impl CustomFamily for ContractedJeffreysSeamFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.eta
.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn joint_jeffreys_information_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
let scale = 1.0e6 * d_beta_u_flat.dot(d_betav_flat);
Ok(Some(scale * Array2::<f64>::eye(2)))
}
fn joint_jeffreys_information_contracted_trace_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
weight: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
assert_eq!(weight.dim(), (2, 2));
Ok(Some(7.0 * Array2::<f64>::eye(2)))
}
fn joint_jeffreys_information_contracted_trace_hessian_available(&self) -> bool {
true
}
}
#[test]
pub(crate) fn jeffreys_second_order_completion_prefers_contracted_hook() {
let family = ContractedJeffreysSeamFamily;
let specs = vec![jeffreys_seam_spec(2)];
let states = vec![jeffreys_seam_state(Array1::zeros(2))];
let h_joint = array![[1.0e-4, 0.0], [0.0, 1.0]];
let z_joint = Array2::<f64>::eye(2);
let completion = custom_family_joint_jeffreys_second_order_completion(
&family, &states, &specs, &h_joint, &z_joint, true,
)
.expect("completion")
.expect("completion present");
let expected = -3.5 * Array2::<f64>::eye(2);
for i in 0..2 {
for j in 0..2 {
assert!(
(completion[[i, j]] - expected[[i, j]]).abs() < 1e-12,
"contracted completion mismatch at ({i},{j}): {} vs {}",
completion[[i, j]],
expected[[i, j]]
);
}
}
}
#[derive(Clone)]
pub(crate) struct PairwiseJeffreysSeamFamily;
impl CustomFamily for PairwiseJeffreysSeamFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.eta
.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn joint_jeffreys_information_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), specs.len());
let scale = d_beta_u_flat.dot(d_betav_flat);
Ok(Some(scale * array![[2.0, 1.0], [1.0, 3.0]]))
}
}
#[test]
pub(crate) fn jeffreys_second_order_completion_pairwise_fallback_when_hook_absent() {
let family = PairwiseJeffreysSeamFamily;
let specs = vec![jeffreys_seam_spec(2)];
let states = vec![jeffreys_seam_state(Array1::zeros(2))];
let h_joint = array![[1.0e-4, 0.0], [0.0, 1.0]];
let z_joint = Array2::<f64>::eye(2);
let completion = custom_family_joint_jeffreys_second_order_completion(
&family, &states, &specs, &h_joint, &z_joint, true,
)
.expect("completion")
.expect("completion present");
let direct = crate::estimate::reml::jeffreys_subspace::joint_jeffreys_second_order_completion(
h_joint.view(),
z_joint.view(),
|u: &Array1<f64>, v: &Array1<f64>| {
family.joint_jeffreys_information_second_directional_derivative_with_specs(
&states, &specs, u, v,
)
},
)
.expect("direct pairwise completion")
.expect("direct pairwise completion present");
assert_eq!(
completion, direct,
"fallback must be the exact pairwise completion"
);
assert!(
completion.iter().any(|value| value.abs() > 0.0),
"pairwise completion should be nonzero on this gated fixture"
);
let blocked = custom_family_joint_jeffreys_second_order_completion(
&family, &states, &specs, &h_joint, &z_joint, false,
)
.expect("blocked completion");
assert!(
blocked.is_none(),
"completion must decline (None) when the pairwise fallback is disallowed and no hook exists"
);
}
#[test]
pub(crate) fn custom_family_outer_derivatives_keeps_second_order_for_large_inner_problem() {
#[derive(Clone)]
struct StrictFamily;
impl CustomFamily for StrictFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
}
let specs = vec![ParameterBlockSpec {
name: "x".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::<f64>::zeros((20_100, 50)),
)),
offset: Array1::zeros(20_100),
penalties: vec![PenaltyMatrix::Dense(Array2::<f64>::eye(50))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let options = BlockwiseFitOptions {
use_remlobjective: true,
use_outer_hessian: true,
..BlockwiseFitOptions::default()
};
let (gradient, hessian) = custom_family_outer_derivatives(&StrictFamily, &specs, &options);
assert_eq!(gradient, crate::solver::rho_optimizer::Derivative::Analytic);
assert_eq!(
hessian,
crate::solver::rho_optimizer::DeclaredHessianForm::Either
);
}
impl CustomFamily for OneBlockIdentityFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::ones(n),
working_weights: Array1::ones(n),
}],
})
}
}
#[test]
pub(crate) fn fit_custom_family_rejects_invalid_blockspec_before_output_channel_probe() {
let spec = ParameterBlockSpec {
name: "bad_penalty".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
array![[1.0], [2.0],],
)),
offset: Array1::zeros(2),
penalties: vec![PenaltyMatrix::Dense(Array2::<f64>::eye(2))],
nullspace_dims: vec![0],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let err = fit_custom_family(
&OneBlockIdentityFamily,
&[spec],
&BlockwiseFitOptions::default(),
)
.expect_err("invalid block spec should return a typed error");
let message = err.to_string();
assert!(
message.contains("block 0 penalty 0 must be 1x1, got 2x2"),
"unexpected error: {message}",
);
}
#[derive(Clone)]
pub(crate) struct OneBlockGaussianFamily {
pub(crate) y: Array1<f64>,
}
impl CustomFamily for OneBlockGaussianFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let eta = &block_states[0].eta;
let resid = eta - &self.y;
let ll = -0.5 * resid.dot(&resid);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: self.y.clone(),
working_weights: Array1::ones(self.y.len()),
}],
})
}
fn diagonalworking_weights_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
d_eta: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(Array1::zeros(d_eta.len())))
}
fn diagonalworking_weights_second_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
d_eta_u: &Array1<f64>,
arr: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(Array1::zeros(d_eta_u.len())))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockConstrainedExactFamily {
pub(crate) target: f64,
pub(crate) lower: f64,
}
impl CustomFamily for OneBlockConstrainedExactFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.beta
.first()
.copied()
.ok_or_else(|| "missing coefficient".to_string())?;
let g = self.target - beta;
let ll = -0.5 * (beta - self.target) * (beta - self.target);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![g],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
}],
})
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
let _unused_block_states = block_states;
assert!(!block_spec.name.is_empty());
if block_idx != 0 {
return Ok(None);
}
Ok(Some(LinearInequalityConstraints {
a: array![[1.0]],
b: array![self.lower],
}))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockConstrainedNaNHessianFamily;
impl CustomFamily for OneBlockConstrainedNaNHessianFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![0.0],
hessian: SymmetricMatrix::Dense(array![[f64::NAN]]),
}],
})
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
let _unused_block_states = block_states;
assert!(!block_spec.name.is_empty());
if block_idx != 0 {
return Ok(None);
}
Ok(Some(LinearInequalityConstraints {
a: array![[1.0]],
b: array![0.0],
}))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockConstrainedIndefiniteHessianFamily;
impl CustomFamily for OneBlockConstrainedIndefiniteHessianFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![-1.0],
hessian: SymmetricMatrix::Dense(array![[-1.0]]),
}],
})
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
let _unused_block_states = block_states;
assert!(!block_spec.name.is_empty());
if block_idx != 0 {
return Ok(None);
}
Ok(Some(LinearInequalityConstraints {
a: array![[1.0]],
b: array![1.0],
}))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockLinearLikelihoodExactFamily {
pub(crate) score: f64,
}
impl CustomFamily for OneBlockLinearLikelihoodExactFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.beta
.first()
.copied()
.ok_or_else(|| "missing coefficient".to_string())?;
Ok(FamilyEvaluation {
log_likelihood: self.score * beta,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![self.score],
hessian: SymmetricMatrix::Dense(array![[0.0]]),
}],
})
}
}
#[derive(Clone)]
pub(crate) struct PreferJointExactFamily;
impl CustomFamily for PreferJointExactFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![0.0],
hessian: SymmetricMatrix::Dense(array![[2.0]]),
}],
})
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Err(
"blockwise exact-newton path should not be used when joint path is available"
.to_string(),
)
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[2.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[0.0]]))
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockJointConstrainedFamily {
pub(crate) coupling: f64,
}
impl CustomFamily for TwoBlockJointConstrainedFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta0 = block_states[0].beta[0];
let beta1 = block_states[1].beta[0];
let g0 = 1.0 - beta0 - self.coupling * beta1;
let g1 = 1.0 - beta1 - self.coupling * beta0;
Ok(FamilyEvaluation {
log_likelihood: -0.5
* (beta0 * beta0 + beta1 * beta1 + 2.0 * self.coupling * beta0 * beta1)
+ beta0
+ beta1,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: array![g0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
BlockWorkingSet::ExactNewton {
gradient: array![g1],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
],
})
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[1.0, self.coupling], [self.coupling, 1.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(Array2::zeros((2, 2))))
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
let _unused_block_states = block_states;
assert!(!block_spec.name.is_empty());
if block_idx >= 2 {
return Ok(None);
}
Ok(Some(LinearInequalityConstraints {
a: array![[1.0]],
b: array![0.0],
}))
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockPersistentGradientFamily;
impl CustomFamily for TwoBlockPersistentGradientFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta0 = block_states[0].beta[0];
let beta1 = block_states[1].beta[0];
Ok(FamilyEvaluation {
log_likelihood: beta0 + beta1,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: array![1.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
BlockWorkingSet::ExactNewton {
gradient: array![1.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
],
})
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[1.0, 0.25], [0.25, 1.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(Array2::zeros((2, 2))))
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockNonFiniteCurvatureFamily;
impl CustomFamily for TwoBlockNonFiniteCurvatureFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta0 = block_states[0].beta[0];
let beta1 = block_states[1].beta[0];
Ok(FamilyEvaluation {
log_likelihood: beta0 + beta1,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: array![1.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
BlockWorkingSet::ExactNewton {
gradient: array![1.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
},
],
})
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[f64::NAN, 0.25], [0.25, 1.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(Array2::zeros((2, 2))))
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockJointSurrogateFamily;
impl CustomFamily for TwoBlockJointSurrogateFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n0 = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.eta
.len();
let n1 = block_states
.get(1)
.ok_or_else(|| "missing block 1".to_string())?
.eta
.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![
BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n0),
working_weights: Array1::ones(n0),
},
BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n1),
working_weights: Array1::ones(n1),
},
],
})
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
let p: usize = specs.iter().map(|spec| spec.design.ncols()).sum();
Ok(Some(Array2::eye(p)))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
let p: usize = specs.iter().map(|spec| spec.design.ncols()).sum();
Ok(Some(Array2::zeros((p, p))))
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
arr: &Array1<f64>,
arr2: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
assert!(arr2.iter().all(|v| !v.is_nan()));
let p: usize = specs.iter().map(|spec| spec.design.ncols()).sum();
Ok(Some(Array2::zeros((p, p))))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockPseudoLaplaceExactFamily {
pub(crate) target: f64,
}
impl CustomFamily for OneBlockPseudoLaplaceExactFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.beta
.first()
.copied()
.ok_or_else(|| "missing coefficient".to_string())?;
let resid = beta - self.target;
Ok(FamilyEvaluation {
log_likelihood: -resid * resid,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![-2.0 * resid],
hessian: SymmetricMatrix::Dense(array![[2.0]]),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[2.0]]))
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[0.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[0.0]]))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockExactPsiHookFamily;
impl CustomFamily for OneBlockExactPsiHookFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![0.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[1.0]]))
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
_: usize,
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[0.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[0.0]]))
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
block_specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
_: usize,
) -> Result<Option<ExactNewtonJointPsiTerms>, String> {
let _unused_derivative_blocks = derivative_blocks;
let _unused_block_specs = block_specs;
let _unused_block_states = block_states;
let _ = (block_states, block_specs, derivative_blocks);
Ok(Some(ExactNewtonJointPsiTerms {
objective_psi: 3.5,
score_psi: array![0.0],
hessian_psi: array![[0.0]],
hessian_psi_operator: None,
}))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockIndefinitePseudoLaplaceFamily;
impl CustomFamily for OneBlockIndefinitePseudoLaplaceFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![0.0],
hessian: SymmetricMatrix::Dense(array![[-1.0]]),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[-1.0]]))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockNearlySymmetricPseudoLaplaceFamily;
impl CustomFamily for OneBlockNearlySymmetricPseudoLaplaceFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.beta
.clone();
let h = array![[2.0, 0.1], [3.0, 2.0]];
let gradient = -h.dot(&beta);
Ok(FamilyEvaluation {
log_likelihood: -0.5 * beta.dot(&h.dot(&beta)),
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient,
hessian: SymmetricMatrix::Dense(h),
}],
})
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[2.0, 0.1], [3.0, 2.0]]))
}
}
#[derive(Clone)]
pub(crate) struct OneBlockAlwaysErrorFamily;
impl CustomFamily for OneBlockAlwaysErrorFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
Err("synthetic outer objective failure: block[0] evaluate()".to_string())
}
}
#[derive(Clone)]
pub(crate) struct OneBlockCovarianceErrorFamily;
impl CustomFamily for OneBlockCovarianceErrorFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = block_states[0].eta.len();
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n),
working_weights: Array1::ones(n),
}],
})
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
block_specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_specs = block_specs;
let _unused_block_states = block_states;
Err("synthetic covariance assembly failure".to_string())
}
}
#[test]
pub(crate) fn effectiveridge_is_never_below_solver_floor() {
assert!((effective_solverridge(0.0) - 1e-15).abs() < 1e-30);
assert!((effective_solverridge(1e-8) - 1e-8).abs() < 1e-20);
}
#[test]
pub(crate) fn objective_includes_solverridge_quadratic_term() {
let spec = ParameterBlockSpec {
name: "b0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
inner_tol: 0.0,
outer_max_iter: 1,
outer_tol: 1e-8,
outer_rel_cost_tol: None,
rho_lower_bound: -10.0,
minweight: CUSTOM_FAMILY_WEIGHT_FLOOR,
ridge_floor: 1e-4,
ridge_policy: RidgePolicy::explicit_stabilization_pospart(),
use_remlobjective: false,
compute_covariance: false,
use_outer_hessian: false,
screening_max_inner_iterations: None,
outer_inner_max_iterations: None,
seed_screening: false,
early_exit_threshold: None,
outer_score_subsample: None,
auto_outer_subsample: false,
outer_eval_context: None,
cache_session: None,
cache_mirror_sessions: Vec::new(),
joint_penalties: None,
screen_initial_rho: true,
};
let result = fit_custom_family(&OneBlockIdentityFamily, &[spec], &options)
.expect("custom family fit should succeed");
let ridge = effective_solverridge(options.ridge_floor);
let beta = result.block_states[0].beta[0];
let expected_penalty = 0.5 * ridge * beta * beta;
assert!(
(result.penalized_objective - expected_penalty).abs() < 1e-12,
"penalized objective should equal ridge quadratic term when ll=0 and S=0; got {}, expected {}",
result.penalized_objective,
expected_penalty
);
}
#[test]
pub(crate) fn inner_block_accepts_penalty_improving_step_even_if_loglik_drops() {
let family = OneBlockGaussianFamily { y: array![1.0] };
let spec = ParameterBlockSpec {
name: "b0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(array![[1.0]])],
nullspace_dims: vec![],
initial_log_lambdas: array![10.0_f64.ln()],
initial_beta: Some(array![1.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: 20,
inner_tol: 1e-10,
outer_max_iter: 1,
outer_tol: 1e-8,
outer_rel_cost_tol: None,
rho_lower_bound: -10.0,
minweight: CUSTOM_FAMILY_WEIGHT_FLOOR,
ridge_floor: 0.0,
ridge_policy: RidgePolicy::explicit_stabilization_pospart(),
use_remlobjective: false,
compute_covariance: false,
use_outer_hessian: false,
screening_max_inner_iterations: None,
outer_inner_max_iterations: None,
seed_screening: false,
early_exit_threshold: None,
outer_score_subsample: None,
auto_outer_subsample: false,
outer_eval_context: None,
cache_session: None,
cache_mirror_sessions: Vec::new(),
joint_penalties: None,
screen_initial_rho: true,
};
let per_block_log_lambdas = vec![array![10.0_f64.ln()]];
let inner = inner_blockwise_fit(&family, &[spec], &per_block_log_lambdas, &options, None)
.expect("inner blockwise fit should succeed");
let beta = inner.block_states[0].beta[0];
assert!(
beta < 0.5,
"beta should shrink toward penalized mode; got {}",
beta
);
assert!(
inner.log_likelihood < -1e-8,
"raw log-likelihood should drop for this strongly penalized move; got {}",
inner.log_likelihood
);
}
#[test]
pub(crate) fn exact_newton_backtracking_descent_includes_explicit_ridge() {
let family = OneBlockLinearLikelihoodExactFamily { score: 0.5 };
let spec = ParameterBlockSpec {
name: "b0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![1.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
inner_tol: 0.0,
outer_max_iter: 1,
outer_tol: 1e-8,
outer_rel_cost_tol: None,
rho_lower_bound: -10.0,
minweight: CUSTOM_FAMILY_WEIGHT_FLOOR,
ridge_floor: 1.0,
ridge_policy: RidgePolicy::explicit_stabilization_pospart(),
use_remlobjective: false,
compute_covariance: false,
use_outer_hessian: false,
screening_max_inner_iterations: None,
outer_inner_max_iterations: None,
seed_screening: false,
early_exit_threshold: None,
outer_score_subsample: None,
auto_outer_subsample: false,
outer_eval_context: None,
cache_session: None,
cache_mirror_sessions: Vec::new(),
joint_penalties: None,
screen_initial_rho: true,
};
let inner = inner_blockwise_fit(&family, &[spec], &[Array1::zeros(0)], &options, None)
.expect("inner blockwise fit should succeed");
let beta = inner.block_states[0].beta[0];
let objective = -inner.log_likelihood + inner.penalty_value;
assert!(
beta < 1.0 - 1e-12,
"ridge-aware fallback descent should shrink beta after rejecting the uphill Newton step; got {}",
beta
);
assert!(
objective < -1e-12,
"accepted fallback step should lower the penalized objective; got {}",
objective
);
}
#[test]
pub(crate) fn outergradient_matches_finite_difference_for_one_block() {
let n = 8usize;
let y = Array1::from_vec(vec![0.4, -0.2, 0.8, 1.0, -0.5, 0.3, 0.1, -0.7]);
let spec = ParameterBlockSpec {
name: "b0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 1),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.2],
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
..BlockwiseFitOptions::default()
};
let penalty_counts = vec![1usize];
let rho = array![0.1];
let (f0, g0, _) = outerobjective_andgradient(
&OneBlockGaussianFamily { y: y.clone() },
std::slice::from_ref(&spec),
&options,
&penalty_counts,
&rho,
None,
)
.expect("objective/gradient");
let h = 1e-5;
let rho_p = array![rho[0] + h];
let rho_m = array![rho[0] - h];
let (fp, _, _) = outerobjective_andgradient(
&OneBlockGaussianFamily { y: y.clone() },
std::slice::from_ref(&spec),
&options,
&penalty_counts,
&rho_p,
None,
)
.expect("objective+");
let (fm, _, _) = outerobjective_andgradient(
&OneBlockGaussianFamily { y },
std::slice::from_ref(&spec),
&options,
&penalty_counts,
&rho_m,
None,
)
.expect("objective-");
let gfd = (fp - fm) / (2.0 * h);
let rel = (g0[0] - gfd).abs() / gfd.abs().max(1e-8);
assert!(f0.is_finite());
assert_eq!(
g0[0].signum(),
gfd.signum(),
"outer gradient sign mismatch: analytic={} fd={}",
g0[0],
gfd
);
assert!(
rel < 5e-3,
"outer gradient mismatch: analytic={} fd={} rel={}",
g0[0],
gfd,
rel
);
}
#[test]
pub(crate) fn outergradient_prefers_joint_exact_pathwhen_available() {
let spec = ParameterBlockSpec {
name: "joint_exact".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
..BlockwiseFitOptions::default()
};
let penalty_counts = vec![1usize];
let rho = array![0.0];
let result = outerobjective_andgradient(
&PreferJointExactFamily,
std::slice::from_ref(&spec),
&options,
&penalty_counts,
&rho,
None,
);
assert!(
result.is_ok(),
"joint exact path should be preferred over blockwise fallback: {:?}",
result.err()
);
}
#[test]
pub(crate) fn innerfit_uses_joint_exact_path_for_multiblock_constraints() {
let spec0 = ParameterBlockSpec {
name: "block0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let spec1 = ParameterBlockSpec {
name: "block1".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
inner_tol: 1e-10,
ridge_floor: CUSTOM_FAMILY_RIDGE_FLOOR,
..BlockwiseFitOptions::default()
};
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let result = inner_blockwise_fit(
&TwoBlockJointConstrainedFamily { coupling: 0.25 },
&[spec0, spec1],
&per_block,
&options,
None,
)
.expect("joint constrained inner fit should succeed");
assert!(
result.converged,
"joint constrained inner fit should converge in one cycle"
);
assert_eq!(result.cycles, 1);
assert!((result.block_states[0].beta[0] - 0.8).abs() < 1e-8);
assert!((result.block_states[1].beta[0] - 0.8).abs() < 1e-8);
assert_eq!(result.active_sets, vec![None, None]);
}
#[test]
pub(crate) fn joint_newton_budget_exhaustion_refuses_coupled_exact_inner() {
let spec0 = ParameterBlockSpec {
name: "block0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let spec1 = ParameterBlockSpec {
name: "block1".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
inner_tol: 1e-12,
ridge_floor: CUSTOM_FAMILY_RIDGE_FLOOR,
..BlockwiseFitOptions::default()
};
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let err = inner_blockwise_fit(
&TwoBlockPersistentGradientFamily,
&[spec0, spec1],
&per_block,
&options,
None,
)
.expect_err("coupled exact-joint max-budget exhaustion must fail loudly");
assert!(
err.contains("exhausted the joint Newton budget without KKT convergence"),
"budget exhaustion should be named explicitly: {err}"
);
assert!(
err.contains("block_residual_inf"),
"error should carry per-block residual diagnostics: {err}"
);
}
#[test]
pub(crate) fn non_finite_curvature_exits_joint_newton_far_below_budget() {
let spec0 = ParameterBlockSpec {
name: "block0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let spec1 = ParameterBlockSpec {
name: "block1".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
inner_max_cycles: DEFAULT_CUSTOM_FAMILY_INNER_MAX_CYCLES,
inner_tol: 1e-12,
ridge_floor: CUSTOM_FAMILY_RIDGE_FLOOR,
..BlockwiseFitOptions::default()
};
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let err = inner_blockwise_fit(
&TwoBlockNonFiniteCurvatureFamily,
&[spec0, spec1],
&per_block,
&options,
None,
)
.expect_err("a non-finite joint Hessian must fail the coupled exact-joint inner solve");
assert!(
err.contains("exited the joint Newton path before convergence"),
"non-finite curvature must take the early structured exit, not the \
budget path: {err}"
);
assert!(
!err.contains("exhausted the joint Newton budget"),
"non-finite curvature must NOT consume the joint Newton budget: {err}"
);
}
#[test]
pub(crate) fn joint_proposal_at_step_floor_suppresses_descent_substitution_near_optimum() {
assert!(
joint_proposal_at_step_floor(1.413e-5, 1.355e-5),
"a proposal within 4× step_tol is at the convergence floor; \
the descent substitution must be suppressed"
);
assert!(joint_proposal_at_step_floor(4.0 * 1.355e-5, 1.355e-5));
assert!(
!joint_proposal_at_step_floor(1.182e-2, 1.355e-5),
"an O(1e-2) proposal is far above the step floor; the \
preconditioned-descent fallback must remain active there"
);
assert!(!joint_proposal_at_step_floor(f64::NAN, 1.0e-5));
assert!(!joint_proposal_at_step_floor(1.0e-6, f64::INFINITY));
}
#[test]
pub(crate) fn ridge_stabilization_gap_produces_exact_rho_two_in_null_direction() {
let h_nll = array![[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]];
let source = JointHessianSource::Dense(h_nll.clone());
let ranges = vec![(0, 3)];
let s_lambdas = vec![Array2::<f64>::zeros((3, 3))];
let base = JOINT_TRACE_STABILITY_RIDGE;
let ridge_floor = 1.0e-12_f64;
let joint_mode_diagonal_ridge = 0.0_f64; let mut lhs = h_nll.clone();
add_joint_penalty_to_matrix(&mut lhs, &ranges, &s_lambdas, base, None);
let shift = exact_newton_stabilizing_shift_psd_penalized(&lhs, &lhs, ridge_floor)
.expect("indefinite Hessian must yield a positive stabilizing shift");
assert!(
shift > 0.9,
"shift should lift the -1 eigenvalue; got {shift}"
);
let joint_solver_diagonal_ridge = base + shift;
let big_delta = joint_solver_diagonal_ridge - joint_mode_diagonal_ridge;
let rhs = array![0.0_f64, 0.0, 1.0];
let h_used_22 = 0.0 + joint_solver_diagonal_ridge;
let delta = array![0.0, 0.0, rhs[2] / h_used_22];
let mut hpen_delta = Array1::<f64>::zeros(3);
apply_joint_penalized_hessian_into(
&source,
&ranges,
&s_lambdas,
joint_solver_diagonal_ridge,
&delta,
&mut hpen_delta,
None,
)
.expect("apply joint penalized hessian must succeed");
let predicted = joint_quadratic_predicted_reduction(&rhs, &hpen_delta, &delta);
let mut h_true_delta = Array1::<f64>::zeros(3);
apply_joint_penalized_hessian_into(
&source,
&ranges,
&s_lambdas,
joint_mode_diagonal_ridge,
&delta,
&mut h_true_delta,
None,
)
.expect("apply true (un-stabilized) hessian must succeed");
let actual = rhs.dot(&delta) - 0.5 * delta.dot(&h_true_delta);
let rho = actual / predicted;
assert!(
(rho - 2.0).abs() <= 1e-10,
"ρ should be EXACTLY 2 when Newton step lies in null(H_true) with stabilizing-shift gap; got {rho}",
);
let rhs_dot_delta = rhs.dot(&delta);
let delta_sq_times_big_delta = big_delta * delta.dot(&delta);
assert!(
(rhs_dot_delta - delta_sq_times_big_delta).abs() <= 1e-10 * rhs_dot_delta.abs(),
"Newton-identity null-space condition: rhs·δ ({rhs_dot_delta}) should equal Δ·‖δ‖² ({delta_sq_times_big_delta})",
);
for scale in [0.001_f64, 0.029, 1.0, 988.0] {
let scaled_rhs = &rhs * scale;
let scaled_delta = &delta * scale;
let mut scaled_hpen = Array1::<f64>::zeros(3);
apply_joint_penalized_hessian_into(
&source,
&ranges,
&s_lambdas,
joint_solver_diagonal_ridge,
&scaled_delta,
&mut scaled_hpen,
None,
)
.expect("apply scaled");
let scaled_predicted =
joint_quadratic_predicted_reduction(&scaled_rhs, &scaled_hpen, &scaled_delta);
let mut scaled_h_true_delta = Array1::<f64>::zeros(3);
apply_joint_penalized_hessian_into(
&source,
&ranges,
&s_lambdas,
joint_mode_diagonal_ridge,
&scaled_delta,
&mut scaled_h_true_delta,
None,
)
.expect("apply scaled true");
let scaled_actual =
scaled_rhs.dot(&scaled_delta) - 0.5 * scaled_delta.dot(&scaled_h_true_delta);
let scaled_rho = scaled_actual / scaled_predicted;
assert!(
(scaled_rho - 2.0).abs() <= 1e-10,
"ρ invariance under step rescaling broke at scale {scale}: got {scaled_rho}",
);
}
}
#[test]
pub(crate) fn cone_projection_preserves_step_where_alpha_crush_collapses_it() {
use crate::solver::active_set::{
LinearInequalityConstraints, project_point_strictly_into_feasible_cone,
};
let a = array![[1.0_f64, 0.0]];
let b = Array1::<f64>::zeros(1);
let constraints = LinearInequalityConstraints::from_paired(a, b);
let beta = array![0.0_f64, 0.0];
let trial_step = array![-1.0_f64, 5.0];
let trial_point = &beta + &trial_step;
let slack = constraints.a.row(0).dot(&beta) - constraints.b[0];
let drift = constraints.a.row(0).dot(&trial_step);
assert!(drift < 0.0, "binding-row drift must be negative");
let alpha = (slack / -drift).clamp(0.0, 1.0);
let alpha_step_norm = {
let s = &trial_step * alpha;
s.dot(&s).sqrt()
};
assert!(
alpha_step_norm < 1e-6,
"α-crush must collapse the whole step on a binding row; got |step|={alpha_step_norm:.3e}"
);
let projected = project_point_strictly_into_feasible_cone(&trial_point, &constraints)
.expect("cone projection of the trial point must succeed");
let projected_step = &projected - β
let projected_step_norm = projected_step.dot(&projected_step).sqrt();
assert!(
(projected[1] - 5.0).abs() < 1e-9,
"unconstrained coordinate must keep its full motion; got {:.6}",
projected[1]
);
assert!(
projected_step_norm > 4.9,
"cone projection must preserve the unconstrained step magnitude; got |step|={projected_step_norm:.3e}"
);
assert!(
projected_step_norm > 1e6 * alpha_step_norm,
"projection step ({projected_step_norm:.3e}) must dwarf the α-crushed step ({alpha_step_norm:.3e})"
);
assert!(
projected[0] >= -1e-9,
"projected binding coordinate must be feasible; got {:.3e}",
projected[0]
);
}
#[test]
pub(crate) fn joint_feasibility_alpha_gate_discriminates_healthy_from_crush() {
#[derive(Clone)]
struct AlphaFamily {
alpha: Option<f64>,
}
impl CustomFamily for AlphaFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
assert_eq!(block_states.len(), 1);
Ok(FamilyEvaluation {
log_likelihood: 0.0,
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: array![0.0],
hessian: SymmetricMatrix::Dense(array![[1.0]]),
}],
})
}
fn max_feasible_step_size(
&self,
block_states: &[ParameterBlockState],
idx: usize,
arr: &Array1<f64>,
) -> Result<Option<f64>, String> {
let _unused_block_states = block_states;
assert!(idx < block_states.len());
assert!(!arr.is_empty());
Ok(self.alpha)
}
}
let states = vec![ParameterBlockState {
beta: array![0.0],
eta: array![0.0],
}];
let ranges = vec![(0usize, 1usize)];
let step = array![1.0_f64];
let healthy = AlphaFamily { alpha: None };
let (alpha_healthy, _) =
compute_joint_feasibility_alpha(&healthy, &states, &ranges, &step).unwrap();
assert_eq!(alpha_healthy, 1.0, "no constraint ⇒ α = 1.0");
assert!(
alpha_healthy >= JOINT_FEASIBILITY_ALPHA_CRUSH_THRESHOLD,
"healthy α must NOT trip the crush bypass (legacy path stays byte-identical)"
);
let moderate = AlphaFamily {
alpha: Some(2.0 * JOINT_FEASIBILITY_ALPHA_CRUSH_THRESHOLD),
};
let (alpha_moderate, _) =
compute_joint_feasibility_alpha(&moderate, &states, &ranges, &step).unwrap();
assert!(
alpha_moderate >= JOINT_FEASIBILITY_ALPHA_CRUSH_THRESHOLD,
"moderate α (2× threshold) must stay on the legacy path"
);
let crush = AlphaFamily { alpha: Some(1e-4) };
let (alpha_crush, limiting) =
compute_joint_feasibility_alpha(&crush, &states, &ranges, &step).unwrap();
assert!(
alpha_crush < JOINT_FEASIBILITY_ALPHA_CRUSH_THRESHOLD,
"the survival pathology (α≈1e-4) must trip the crush bypass; got α={alpha_crush:.3e}"
);
assert_eq!(limiting, Some(0), "the binding block must be reported");
}
#[test]
pub(crate) fn per_block_penalized_shift_stays_data_scaled_under_oversmoothed_penalty() {
let h_data = array![
[1.0 - 0.7, 0.7, 0.0],
[0.7, 1.0 - 0.7, 0.0],
[0.0, 0.0, 1.0],
];
let lam = 1.0e7_f64;
let s = &array![[1.0_f64, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],] * lam;
let lhs = &h_data + &s;
assert!(
lhs.cholesky(Side::Lower).is_err(),
"penalized lhs must stay indefinite along ker(S) ∋ (1,−1,0) so the shift branch engages"
);
let ridge_floor = 1.0e-12_f64;
let p = lhs.nrows();
let naive_gershgorin_min = (0..p)
.map(|i| {
let radius: f64 = (0..p).filter(|&j| j != i).map(|j| lhs[[i, j]].abs()).sum();
lhs[[i, i]] - radius
})
.fold(f64::INFINITY, f64::min);
assert!(
naive_gershgorin_min < -1.0e6,
"over-smoothed penalty must make the naive penalized-Gershgorin bound spuriously huge-negative; got {naive_gershgorin_min:.3e}"
);
let naive_shift = ridge_floor.max(1e-15) - naive_gershgorin_min;
assert!(
naive_shift > 1.0e6,
"naive penalized-Gershgorin shift should read the spurious ~λ ridge; got {naive_shift:.3e}",
);
let psd_shift =
exact_newton_stabilizing_shift_psd_penalized(&lhs, &h_data, ridge_floor).unwrap_or(0.0);
assert!(
psd_shift < 10.0,
"PSD-penalized shift must stay O(data scale), NOT the ~{lam:.0e} penalty scale; got {psd_shift:.3e}",
);
assert!(
psd_shift > 0.0,
"PSD-penalized shift must still lift the data Hessian's negative eigenvalue; got {psd_shift:.3e}"
);
assert!(
psd_shift * 1.0e5 < naive_shift,
"PSD-penalized shift ({psd_shift:.3e}) must be ≥1e5× smaller than the spurious naive shift ({naive_shift:.3e})",
);
let data_gershgorin_min = (0..p)
.map(|i| {
let radius: f64 = (0..p)
.filter(|&j| j != i)
.map(|j| h_data[[i, j]].abs())
.sum();
h_data[[i, i]] - radius
})
.fold(f64::INFINITY, f64::min);
assert!(
psd_shift >= -data_gershgorin_min - 1e-9,
"PSD-penalized shift ({psd_shift:.3e}) must cover the data-Gershgorin bound ({data_gershgorin_min:.3e})"
);
let mut stabilized = lhs.clone();
for d in 0..stabilized.nrows() {
stabilized[[d, d]] += psd_shift + 1e-6;
}
assert!(
stabilized.cholesky(Side::Lower).is_ok(),
"stabilized penalized lhs must be PD after the PSD-penalized shift",
);
}
#[test]
pub(crate) fn joint_solver_ridge_stabilizes_dense_indefinite_coupled_hessian() {
let family = TwoBlockJointConstrainedFamily { coupling: 2.0 };
let source = JointHessianSource::Dense(array![[1.0, 2.0], [2.0, 1.0]]);
let ranges = vec![(0, 1), (1, 2)];
let s_lambdas = vec![Array2::zeros((1, 1)), Array2::zeros((1, 1))];
let ridge = stabilized_joint_solver_diagonal_ridge(
&family,
&source,
&ranges,
&s_lambdas,
JOINT_TRACE_STABILITY_RIDGE,
1e-12,
None,
);
assert!(
ridge > 1.0,
"dense joint solver ridge should lift the negative eigenvalue; got {ridge}"
);
let mut stabilized = match source {
JointHessianSource::Dense(matrix) => matrix,
JointHessianSource::Operator { .. } => {
panic!("dense joint solver fixture must use a dense Hessian source")
}
};
add_joint_penalty_to_matrix(&mut stabilized, &ranges, &s_lambdas, ridge, None);
let min_eval = 0.5
* (stabilized[[0, 0]] + stabilized[[1, 1]]
- ((stabilized[[0, 0]] - stabilized[[1, 1]]).powi(2)
+ 4.0 * stabilized[[0, 1]].powi(2))
.sqrt());
assert!(
min_eval > 0.0,
"stabilized dense joint Hessian should be SPD; min_eval={min_eval}"
);
}
#[test]
pub(crate) fn outergradient_uses_joint_surrogate_formultiblock_diagonal_family() {
let spec0 = ParameterBlockSpec {
name: "block0".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0], [1.0]])),
offset: array![0.0, 0.0],
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let spec1 = ParameterBlockSpec {
name: "block1".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0], [1.0]])),
offset: array![0.0, 0.0],
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
};
let penalty_counts = vec![1usize, 1usize];
let rho = array![0.0, 0.0];
let result = outerobjective_andgradient(
&TwoBlockJointSurrogateFamily,
&[spec0, spec1],
&options,
&penalty_counts,
&rho,
None,
);
assert!(
result.is_ok(),
"default joint multi-block surrogate path should succeed without blockwise dW callbacks: {:?}",
result.err()
);
}
#[test]
pub(crate) fn exact_newton_pseudo_laplace_objective_uses_logdet_h_without_logdet_s() {
let spec = ParameterBlockSpec {
name: "pseudo_laplace".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: CUSTOM_FAMILY_RIDGE_FLOOR,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let fit = fit_custom_family(
&OneBlockPseudoLaplaceExactFamily { target: 1.5 },
&[spec],
&options,
)
.expect("pseudo-laplace exact-newton fit");
let expected = 0.5 * 2.0_f64.ln();
assert!(
(fit.penalized_objective - expected).abs() < 1e-8,
"pseudo-Laplace objective mismatch: got {}, expected {}",
fit.penalized_objective,
expected
);
}
#[test]
pub(crate) fn exact_newton_joint_psi_hook_can_supply_fixed_beta_termswithout_quadratic_spsi() {
let spec = ParameterBlockSpec {
name: "psi_hook".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let deriv = CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: Array2::zeros((1, 1)),
s_psi: Array2::zeros((1, 1)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: None,
s_psi_psi: None,
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
};
let result = evaluate_custom_family_joint_hyper(
&OneBlockExactPsiHookFamily,
&[spec],
&BlockwiseFitOptions {
use_remlobjective: true,
compute_covariance: false,
..BlockwiseFitOptions::default()
},
&Array1::zeros(0),
&[vec![deriv]],
None,
EvalMode::ValueAndGradient,
)
.expect("joint hyper eval with exact joint psi hook");
assert_eq!(result.gradient.len(), 1);
assert!(
(result.gradient[0] - 3.5).abs() < 1e-12,
"expected family-supplied joint psi term, got {}",
result.gradient[0]
);
}
#[test]
pub(crate) fn pseudo_laplace_exact_newton_rejects_indefinite_hessian() {
let spec = ParameterBlockSpec {
name: "indefinite".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let result = fit_custom_family(
&OneBlockIndefinitePseudoLaplaceFamily,
&[spec],
&BlockwiseFitOptions {
use_remlobjective: true,
compute_covariance: false,
..BlockwiseFitOptions::default()
},
);
let err = result
.expect_err(
"strict pseudo-Laplace must reject the indefinite Hessian H=[[-1]], not δ-ridge mask it",
)
.to_string();
assert!(
err.contains("indefinite") || err.contains("below -tol"),
"rejection error should name the indefiniteness; got: {err}",
);
}
#[test]
pub(crate) fn auto_determinant_mode_is_exact_full_logdet_policy() {
let h = array![[6.0, 0.8, 0.1], [0.8, 4.5, 0.4], [0.1, 0.4, 3.2]];
let exact =
stable_logdet_with_ridge_policy(&h, 1e-8, RidgePolicy::explicit_stabilization_full_exact())
.expect("exact logdet");
let auto =
stable_logdet_with_ridge_policy(&h, 1e-8, RidgePolicy::explicit_stabilization_full())
.expect("auto logdet");
assert!((auto - exact).abs() < 1e-12, "auto={auto}, exact={exact}");
}
#[test]
pub(crate) fn indefinite_hessian_uses_smooth_regularized_logdet() {
let h = array![[-1.0, 0.0], [0.0, 2.0]];
let logdet =
stable_logdet_with_ridge_policy(&h, 1e-12, RidgePolicy::explicit_stabilization_pospart())
.expect("smooth-regularized logdet must be finite for indefinite H");
assert!(
logdet.is_finite(),
"smooth-regularized logdet should be finite, got {logdet}"
);
let eps = spectral_epsilon(&[-1.0_f64, 2.0]).max(1e-12_f64.max(1e-14));
let expected: f64 = [-1.0_f64 + 1e-12, 2.0 + 1e-12]
.iter()
.map(|&s| spectral_regularize(s, eps).ln())
.sum();
assert!(
(logdet - expected).abs() < 1e-10,
"logdet={logdet}, expected={expected}"
);
}
#[test]
pub(crate) fn pseudo_laplace_exact_newton_symmetrizes_nearly_symmetrichessian() {
let spec = ParameterBlockSpec {
name: "nearly_symmetric".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![
[1.0, 0.0],
[0.0, 1.0]
])),
offset: array![0.0, 0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0, 0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let fit = fit_custom_family(
&OneBlockNearlySymmetricPseudoLaplaceFamily,
&[spec],
&BlockwiseFitOptions {
use_remlobjective: true,
compute_covariance: false,
..BlockwiseFitOptions::default()
},
)
.expect("nearly symmetric pseudo-laplace Hessian should be accepted after symmetrization");
assert!(
fit.penalized_objective.is_finite(),
"expected finite pseudo-laplace objective, got {}",
fit.penalized_objective
);
}
#[test]
pub(crate) fn outer_lamlgradient_matches_finite_differencewhen_joint_exact_path_is_active() {
let BinomialLocationScaleWiggleOuterFixture {
family,
specs,
penalty_counts,
rho,
options: base_options,
} = binomial_location_scale_wiggle_outer_fixture();
let options = BlockwiseFitOptions {
inner_tol: 1e-12,
inner_max_cycles: 500,
..base_options
};
let (f0, g0, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho, None)
.expect("objective/gradient");
assert!(f0.is_finite());
assert_eq!(g0.len(), rho.len());
let h = 1e-5;
for k in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[k] += h;
rho_m[k] -= h;
let (fp, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_p, None)
.expect("objective+");
let (fm, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_m, None)
.expect("objective-");
let gfd = (fp - fm) / (2.0 * h);
let cost_magnitude = f0.abs().max(1.0);
let noise_floor = (10.0 * f64::EPSILON * cost_magnitude / h).max(1e-9);
let both_in_noise = g0[k].abs() < noise_floor && gfd.abs() < noise_floor;
if !both_in_noise {
assert_eq!(
g0[k].signum(),
gfd.signum(),
"outer LAML gradient sign mismatch at {}: analytic={} fd={} noise_floor={:.3e}",
k,
g0[k],
gfd,
noise_floor,
);
let rel = (g0[k] - gfd).abs() / gfd.abs().max(noise_floor);
assert!(
rel < 2e-2,
"outer LAML gradient mismatch at {}: analytic={} fd={} rel={} noise_floor={:.3e}",
k,
g0[k],
gfd,
rel,
noise_floor,
);
}
}
}
#[test]
pub(crate) fn rho_only_outer_objective_matches_joint_hyper_when_psi_is_empty() {
let BinomialLocationScaleWiggleOuterFixture {
family,
specs,
penalty_counts,
rho,
options,
} = binomial_location_scale_wiggle_outer_fixture();
let (outer_obj, outer_grad, outer_hessian, _) =
super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho,
None,
EvalMode::ValueGradientHessian,
)
.expect("rho-only outer objective");
let derivative_blocks = vec![Vec::<CustomFamilyBlockPsiDerivative>::new(); specs.len()];
let joint_result = evaluate_custom_family_joint_hyper(
&family,
&specs,
&options,
&rho,
&derivative_blocks,
None,
EvalMode::ValueGradientHessian,
)
.expect("joint hyper objective with empty psi");
assert!(
(outer_obj - joint_result.objective).abs() < 1e-12,
"objective mismatch: rho-only={} joint={}",
outer_obj,
joint_result.objective
);
assert_eq!(outer_grad.len(), joint_result.gradient.len());
let max_grad_diff = outer_grad
.iter()
.zip(joint_result.gradient.iter())
.map(|(lhs, rhs)| (lhs - rhs).abs())
.fold(0.0_f64, f64::max);
assert!(
max_grad_diff < 1e-12,
"gradient mismatch: max diff={}",
max_grad_diff
);
let outer_hessian = outer_hessian.expect("rho-only outer Hessian");
let joint_hessian = joint_result
.outer_hessian
.materialize_dense()
.expect("joint outer Hessian should materialize")
.expect("joint outer Hessian");
assert_eq!(outer_hessian.dim(), joint_hessian.dim());
let max_hessian_diff = outer_hessian
.iter()
.zip(joint_hessian.iter())
.map(|(lhs, rhs)| (lhs - rhs).abs())
.fold(0.0_f64, f64::max);
assert!(
max_hessian_diff < 1e-12,
"outer Hessian mismatch: max diff={}",
max_hessian_diff
);
}
pub(crate) fn binomial_location_scale_outer_fixture(
y: Array1<f64>,
threshold_initial_beta: f64,
log_sigma_initial_beta: f64,
) -> (
BinomialLocationScaleFamily,
Vec<ParameterBlockSpec>,
Vec<usize>,
BlockwiseFitOptions,
) {
let n = y.len();
let weights = Array1::from_elem(n, 1.0);
let thresholdspec = ParameterBlockSpec {
name: "threshold".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 1),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![threshold_initial_beta]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let log_sigmaspec = ParameterBlockSpec {
name: "log_sigma".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 1),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![log_sigma_initial_beta]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let threshold_design = thresholdspec.design.clone();
let log_sigma_design = log_sigmaspec.design.clone();
let family = BinomialLocationScaleFamily {
y,
weights,
link_kind: crate::types::InverseLink::Standard(crate::types::StandardLink::Probit),
threshold_design: Some(threshold_design),
log_sigma_design: Some(log_sigma_design),
policy: crate::resource::ResourcePolicy::default_library(),
};
let specs = vec![thresholdspec, log_sigmaspec];
let penalty_counts = vec![1usize, 1usize];
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
};
(family, specs, penalty_counts, options)
}
#[test]
pub(crate) fn outer_lamlgradient_diagonal_binomial_location_scale_matchesfd() {
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]);
let (family, specs, penalty_counts, options) =
binomial_location_scale_outer_fixture(y, 0.0, 0.0);
let rho = array![0.0, 0.0];
let (f0, g0, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho, None)
.expect("objective/gradient");
assert!(f0.is_finite());
assert_eq!(g0.len(), rho.len());
let h = 1e-5;
for k in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[k] += h;
rho_m[k] -= h;
let (fp, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_p, None)
.expect("objective+");
let (fm, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_m, None)
.expect("objective-");
let gfd = (fp - fm) / (2.0 * h);
let abs = (g0[k] - gfd).abs();
let rel = abs / gfd.abs().max(1e-8);
if abs >= 2e-3 {
assert_eq!(
g0[k].signum(),
gfd.signum(),
"outer diagonal LAML gradient sign mismatch at {}: analytic={} fd={}",
k,
g0[k],
gfd
);
}
assert!(
abs < 2e-3 || rel < 2e-3,
"outer diagonal LAML gradient mismatch at {}: analytic={} fd={} abs={} rel={}",
k,
g0[k],
gfd,
abs,
rel
);
}
}
#[test]
pub(crate) fn outer_lamlgradient_diagonal_binomial_location_scale_hard_case_matchesfd() {
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0]);
let (family, specs, penalty_counts, options) =
binomial_location_scale_outer_fixture(y, 0.2, -0.1);
let rho = array![0.15, -0.25];
let (f0, g0, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho, None)
.expect("objective/gradient");
assert!(f0.is_finite());
assert_eq!(g0.len(), rho.len());
let h = 1e-5;
for k in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[k] += h;
rho_m[k] -= h;
let (fp, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_p, None)
.expect("objective+");
let (fm, _, _) =
outerobjective_andgradient(&family, &specs, &options, &penalty_counts, &rho_m, None)
.expect("objective-");
let gfd = (fp - fm) / (2.0 * h);
let abs = (g0[k] - gfd).abs();
let rel = abs / gfd.abs().max(1e-8);
if abs >= 2e-3 {
assert_eq!(
g0[k].signum(),
gfd.signum(),
"outer diagonal hard-case LAML gradient sign mismatch at {}: analytic={} fd={}",
k,
g0[k],
gfd
);
}
assert!(
abs < 2e-3 || rel < 2e-3,
"outer diagonal hard-case LAML gradient mismatch at {}: analytic={} fd={} abs={} rel={}",
k,
g0[k],
gfd,
abs,
rel
);
}
}
#[test]
pub(crate) fn outer_lamlhessian_joint_exact_binomial_location_scale_matchesfd() {
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0]);
let (family, specs, penalty_counts, options) =
binomial_location_scale_outer_fixture(y, 0.15, -0.05);
let rho = array![0.1, -0.2];
let (_, _, h0_opt, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho,
None,
EvalMode::ValueGradientHessian,
)
.expect("objective/gradient/hessian");
let h0 = h0_opt.expect("analytic outer Hessian should be available");
assert_eq!(h0.nrows(), rho.len());
assert_eq!(h0.ncols(), rho.len());
let h = 1e-5;
for l in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[l] += h;
rho_m[l] -= h;
let (_, gp, _, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho_p,
None,
EvalMode::ValueAndGradient,
)
.expect("objective/gradient +");
let (_, gm, _, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho_m,
None,
EvalMode::ValueAndGradient,
)
.expect("objective/gradient -");
for k in 0..rho.len() {
let hfd = (gp[k] - gm[k]) / (2.0 * h);
let abs_err = (h0[[k, l]] - hfd).abs();
let rel = (h0[[k, l]] - hfd).abs() / hfd.abs().max(1e-7);
if h0[[k, l]].abs().max(hfd.abs()) > 1e-10 {
assert_eq!(
h0[[k, l]].signum(),
hfd.signum(),
"outer Hessian sign mismatch at ({k},{l}): analytic={} fd={}",
h0[[k, l]],
hfd
);
}
assert!(
abs_err < 1e-8 || rel < 2e-2,
"outer Hessian mismatch at ({k},{l}): analytic={} fd={} abs={} rel={}",
h0[[k, l]],
hfd,
abs_err,
rel
);
}
}
for i in 0..h0.nrows() {
for j in 0..i {
let asym = (h0[[i, j]] - h0[[j, i]]).abs();
assert!(
asym < 1e-8,
"outer Hessian not symmetric at ({i},{j}): {asym}"
);
}
}
}
#[test]
pub(crate) fn block_solve_sparse_matches_dense() {
let x_dense = array![
[1.0, 0.0, 2.0],
[0.0, 3.0, 0.0],
[4.0, 0.0, 5.0],
[0.0, 6.0, 0.0]
];
let y_star = array![1.0, -1.0, 0.5, 2.0];
let w = array![1.0, 0.5, 2.0, 1.5];
let s_lambda = Array2::<f64>::eye(3) * 0.1;
let mut triplets = Vec::new();
for i in 0..x_dense.nrows() {
for j in 0..x_dense.ncols() {
let v = x_dense[[i, j]];
if v != 0.0 {
triplets.push(Triplet::new(i, j, v));
}
}
}
let x_sparse = SparseColMat::try_new_from_triplets(4, 3, &triplets)
.expect("sparse matrix build should succeed");
let beta_dense = solve_blockweighted_system(
&DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x_dense.clone())),
&y_star,
&w,
&s_lambda,
1e-12,
RidgePolicy::explicit_stabilization_pospart(),
)
.expect("dense solve should succeed");
let beta_sparse = solve_blockweighted_system(
&DesignMatrix::from(x_sparse),
&y_star,
&w,
&s_lambda,
1e-12,
RidgePolicy::explicit_stabilization_pospart(),
)
.expect("sparse solve should succeed");
for j in 0..beta_dense.len() {
assert!(
(beta_dense[j] - beta_sparse[j]).abs() < 1e-10,
"dense/sparse mismatch at {}: {} vs {}",
j,
beta_dense[j],
beta_sparse[j]
);
}
}
#[test]
pub(crate) fn outer_lamlhessian_joint_exact_binomial_location_scale_hard_case_matchesfd() {
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0]);
let (family, specs, penalty_counts, options) =
binomial_location_scale_outer_fixture(y, 0.2, -0.1);
let rho = array![0.15, -0.25];
let (_, _, h0_opt, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho,
None,
EvalMode::ValueGradientHessian,
)
.expect("objective/gradient/hessian");
let h0 = h0_opt.expect("analytic outer Hessian should be available");
assert_eq!(h0.nrows(), rho.len());
assert_eq!(h0.ncols(), rho.len());
let h = 1e-5;
for l in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[l] += h;
rho_m[l] -= h;
let (_, gp, _, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho_p,
None,
EvalMode::ValueAndGradient,
)
.expect("objective/gradient +");
let (_, gm, _, _) = super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho_m,
None,
EvalMode::ValueAndGradient,
)
.expect("objective/gradient -");
for k in 0..rho.len() {
let hfd = (gp[k] - gm[k]) / (2.0 * h);
let abs_err = (h0[[k, l]] - hfd).abs();
let rel = abs_err / hfd.abs().max(1e-7);
if h0[[k, l]].abs().max(hfd.abs()) > 1e-10 {
assert_eq!(
h0[[k, l]].signum(),
hfd.signum(),
"hard-case outer Hessian sign mismatch at ({k},{l}): analytic={} fd={}",
h0[[k, l]],
hfd
);
}
assert!(
abs_err < 1e-8 || rel < 2e-2,
"hard-case outer Hessian mismatch at ({k},{l}): analytic={} fd={} abs={} rel={}",
h0[[k, l]],
hfd,
abs_err,
rel
);
}
}
}
#[test]
pub(crate) fn block_solve_falls_backwhen_llt_rejects_indefinite_system() {
let x_dense = array![[1.0, 0.0], [0.0, 0.0]];
let y_star = array![2.0, 0.0];
let w = array![1.0, 1.0];
let s_lambda = array![[0.0, 0.0], [0.0, -1e-12]];
let beta = solve_blockweighted_system(
&DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x_dense)),
&y_star,
&w,
&s_lambda,
1e-12,
RidgePolicy::explicit_stabilization_pospart(),
)
.expect("fallback solve should succeed");
assert!(beta.iter().all(|v| v.is_finite()));
assert!(
(beta[0] - 2.0).abs() < 1e-10,
"unexpected solved coefficient"
);
assert!(
beta[1].abs() < 1e-8,
"null-space coefficient should stay near zero"
);
}
#[test]
pub(crate) fn exact_newton_block_enforces_linear_constraints() {
let spec = ParameterBlockSpec {
name: "exact_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![1.5]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let family = OneBlockConstrainedExactFamily {
target: 0.0,
lower: 1.0,
};
let fit = fit_custom_family(&family, &[spec], &BlockwiseFitOptions::default())
.expect("constrained exact-newton fit");
let beta = fit.block_states[0].beta[0];
assert!(
(beta - 1.0).abs() < 1e-8,
"expected constrained optimum at lower bound, got {beta}"
);
}
#[test]
pub(crate) fn extract_simple_lower_bounds_accepts_axis_aligned_rows() {
let constraints = LinearInequalityConstraints {
a: array![[1.0, 0.0], [0.0, 2.0], [3.0, 0.0]],
b: array![0.25, 1.0, 1.5],
};
let bounds = extract_simple_lower_bounds(&constraints, 2)
.expect("lower-bound extraction should succeed")
.expect("axis-aligned rows should map to lower bounds");
assert_relative_eq!(bounds.lower_bounds[0], 0.5, epsilon = 1e-12);
assert_relative_eq!(bounds.lower_bounds[1], 0.5, epsilon = 1e-12);
assert_eq!(bounds.coeff_to_row, vec![Some(2), Some(1)]);
}
#[test]
pub(crate) fn extract_simple_lower_bounds_rejects_coupled_rows() {
let constraints = LinearInequalityConstraints {
a: array![[1.0, 1.0]],
b: array![0.0],
};
assert!(
extract_simple_lower_bounds(&constraints, 2)
.expect("lower-bound extraction should not error on valid shapes")
.is_none(),
"coupled rows must stay on the generic linear-constraint path"
);
}
#[test]
pub(crate) fn constrained_exact_newton_indefinite_hessian_uses_stabilized_delta_solve() {
let spec = ParameterBlockSpec {
name: "exact_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![1.5]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let states = vec![ParameterBlockState {
beta: array![1.5],
eta: array![1.5],
}];
let constraints = LinearInequalityConstraints {
a: array![[1.0]],
b: array![1.0],
};
let hessian = SymmetricMatrix::Dense(array![[-1.0]]);
let updater = ExactNewtonBlockUpdater {
gradient: &array![-1.0],
hessian: &hessian,
};
let s_lambda = Array2::zeros((1, 1));
let update = updater
.compute_update_step(&BlockUpdateContext {
family: &OneBlockConstrainedIndefiniteHessianFamily,
states: &states,
spec: &spec,
block_idx: 0,
s_lambda: &s_lambda,
options: &BlockwiseFitOptions::default(),
linear_constraints: Some(&constraints),
cached_active_set: None,
})
.expect("indefinite constrained exact-newton update should be stabilized");
assert_relative_eq!(update.beta_new_raw[0], 1.0, epsilon = 1e-12);
assert_eq!(update.active_set, Some(vec![0]));
}
#[test]
pub(crate) fn quadratic_linear_constraints_release_positive_kkt_systemmultiplier() {
let hessian = array![[1.0]];
let rhs = array![1.0];
let beta_start = array![0.0];
let constraints = LinearInequalityConstraints {
a: array![[1.0], [-1.0]],
b: array![0.0, -0.1],
};
let (beta, active) =
solve_quadratic_with_linear_constraints(&hessian, &rhs, &beta_start, &constraints, None)
.expect("constrained quadratic solve should succeed");
assert!(
(beta[0] - 0.1).abs() <= 1e-10,
"expected constrained optimum at upper bound 0.1, got {}",
beta[0]
);
assert_eq!(active.len(), 1);
}
#[test]
pub(crate) fn quadratic_linear_constraints_ignore_near_tangential_inactiverows() {
let hessian = array![[1.0, 0.0], [0.0, 1.0]];
let rhs = array![1.0, 0.0];
let beta_start = array![0.0, 0.0];
let constraints = LinearInequalityConstraints {
a: array![[-1e-16, 1.0]],
b: array![-1.0],
};
let (beta, active) =
solve_quadratic_with_linear_constraints(&hessian, &rhs, &beta_start, &constraints, None)
.expect("near-tangential inactive row should not block the quadratic step");
assert!(
(beta[0] - 1.0).abs() <= 1e-12,
"expected unconstrained x-solution of 1.0, got {}",
beta[0]
);
assert!(
beta[1].abs() <= 1e-12,
"expected zero y-solution, got {}",
beta[1]
);
assert!(active.is_empty(), "no row should become active");
}
#[test]
pub(crate) fn quadratic_linear_constraints_projectwarm_activerows_back_to_boundary() {
let hessian = array![[2.0]];
let rhs = array![0.0];
let beta_start = array![1e-9];
let constraints = LinearInequalityConstraints {
a: array![[1.0]],
b: array![0.0],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[0]),
)
.expect("constrained quadratic solve should project back to the boundary");
assert_relative_eq!(beta[0], 0.0, epsilon = 1e-14);
assert_eq!(active, vec![0]);
}
#[test]
pub(crate) fn quadratic_linear_constraints_handles_near_dependent_rows() {
let hessian = Array2::eye(2);
let rhs = array![-1.0, -1.0]; let beta_start = array![0.0, 0.0];
let eps = 1e-14;
let constraints = LinearInequalityConstraints {
a: array![[1.0, 0.0], [0.0, 1.0], [1.0 + eps, 1.0]],
b: array![0.0, 0.0, 0.0],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[0, 1, 2]), )
.expect("near-dependent constraint QP should converge");
assert!(
beta[0].abs() <= 1e-10 && beta[1].abs() <= 1e-10,
"expected optimum at origin, got ({}, {})",
beta[0],
beta[1]
);
assert!(
active.len() <= 2,
"at most 2 independent constraints should remain active, got {}",
active.len()
);
}
#[test]
pub(crate) fn quadratic_linear_constraints_release_merged_constraint_group_by_id() {
let hessian = array![[1.0]];
let rhs = array![1.0];
let beta_start = array![0.0];
let constraints = LinearInequalityConstraints {
a: array![[1.0], [2.0], [-1.0]],
b: array![0.0, 0.0, -0.1],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[0, 1]),
)
.expect("merged active constraint group should release cleanly");
assert!(
(beta[0] - 0.1).abs() <= 1e-10,
"expected constrained optimum at upper bound 0.1, got {}",
beta[0]
);
assert_eq!(active, vec![2]);
}
#[test]
pub(crate) fn quadratic_linear_constraints_release_merged_group_with_unsorted_active_positions() {
let hessian = array![[1.0]];
let rhs = array![1.0];
let beta_start = array![0.0];
let constraints = LinearInequalityConstraints {
a: array![[1.0], [2.0], [-1.0]],
b: array![0.0, 0.0, -0.1],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[2, 0, 1]),
)
.expect("merged active group release should handle unsorted active positions");
assert!(
(beta[0] - 0.1).abs() <= 1e-10,
"expected constrained optimum at upper bound 0.1, got {}",
beta[0]
);
assert_eq!(active, vec![2]);
}
#[test]
pub(crate) fn quadratic_linear_constraints_accept_boundary_kkt_after_rank_reduction() {
let hessian = array![[2.0]];
let rhs = array![0.0];
let beta_start = array![1e-9];
let constraints = LinearInequalityConstraints {
a: array![[1.0], [1.0 + 1e-13], [2.0], [3.0]],
b: array![0.0, 0.0, 0.0, 0.0],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[0, 1, 2, 3]),
)
.expect("degenerate boundary KKT point should be accepted");
assert_relative_eq!(beta[0], 0.0, epsilon = 1e-14);
assert!(
active.len() <= 1,
"rank-reduced boundary solution should keep at most one representative, got {:?}",
active
);
}
#[test]
pub(crate) fn quadratic_linear_constraints_singular_kkt_uses_pseudoinverse_fallback() {
let hessian = Array2::<f64>::zeros((2, 2));
let rhs = array![0.0, 0.0];
let beta_start = array![0.0, 0.0];
let constraints = LinearInequalityConstraints {
a: array![[1.0, 1.0]],
b: array![0.0],
};
let (beta, active) = solve_quadratic_with_linear_constraints(
&hessian,
&rhs,
&beta_start,
&constraints,
Some(&[0]),
)
.expect("singular KKT system should fall back to a finite pseudoinverse solve");
assert!(beta.iter().all(|value| value.is_finite()));
assert_relative_eq!(beta[0], 0.0, epsilon = 1e-14);
assert_relative_eq!(beta[1], 0.0, epsilon = 1e-14);
assert_eq!(active, vec![0]);
}
#[test]
pub(crate) fn rank_reduce_drops_exactly_dependent_row() {
let a = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0],];
let b = array![0.0, 0.0, 0.0];
let member_constraint_ids = vec![vec![0], vec![1], vec![2]];
let (a_out, b_out, member_constraint_ids_out, _) =
crate::solver::active_set::rank_reduce_rows_pivoted_qr_with_dependence(
a,
b,
member_constraint_ids,
);
assert_eq!(
a_out.nrows(),
2,
"should keep 2 independent rows, got {}",
a_out.nrows()
);
assert_eq!(b_out.len(), 2);
let total_constraint_ids: usize = member_constraint_ids_out.iter().map(|g| g.len()).sum();
assert_eq!(
total_constraint_ids, 3,
"all original constraint ids must be preserved"
);
}
#[test]
pub(crate) fn rank_reduce_preserves_full_rank_matrix() {
let a = array![[1.0, 0.0], [0.0, 1.0], [1.0, 1.0],];
let b = array![0.0, 0.0, 0.0];
let member_constraint_ids = vec![vec![0], vec![1], vec![2]];
let (a_out, b_out, member_constraint_ids_out, _) =
crate::solver::active_set::rank_reduce_rows_pivoted_qr_with_dependence(
a,
b,
member_constraint_ids,
);
assert_eq!(a_out.nrows(), 2);
assert_eq!(b_out.len(), 2);
let total_constraint_ids: usize = member_constraint_ids_out.iter().map(|g| g.len()).sum();
assert_eq!(total_constraint_ids, 3);
}
#[test]
pub(crate) fn constrained_exact_newton_nan_hessian_returns_feasible_noop_instead_of_failing() {
let spec = ParameterBlockSpec {
name: "exact_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]])),
offset: array![0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let states = vec![ParameterBlockState {
beta: array![0.0],
eta: array![0.0],
}];
let constraints = LinearInequalityConstraints {
a: array![[1.0]],
b: array![0.0],
};
let hessian = SymmetricMatrix::Dense(array![[f64::NAN]]);
let updater = ExactNewtonBlockUpdater {
gradient: &array![0.0],
hessian: &hessian,
};
let s_lambda = Array2::zeros((1, 1));
let update = updater
.compute_update_step(&BlockUpdateContext {
family: &OneBlockConstrainedNaNHessianFamily,
states: &states,
spec: &spec,
block_idx: 0,
s_lambda: &s_lambda,
options: &BlockwiseFitOptions::default(),
linear_constraints: Some(&constraints),
cached_active_set: None,
})
.expect("constrained exact-newton NaN Hessian should produce a no-op update");
assert_relative_eq!(update.beta_new_raw[0], 0.0, epsilon = 1e-14);
assert_eq!(update.active_set, Some(vec![0]));
}
#[test]
pub(crate) fn outerobjective_failure_context_is_preserved() {
let spec = ParameterBlockSpec {
name: "err_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0], [1.0]])),
offset: array![0.0, 0.0],
penalties: vec![PenaltyMatrix::Dense(Array2::eye(1))],
nullspace_dims: vec![],
initial_log_lambdas: array![0.0],
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
outer_max_iter: 3,
..BlockwiseFitOptions::default()
};
let err = match fit_custom_family(&OneBlockAlwaysErrorFamily, &[spec], &options) {
Ok(_) => panic!("fit should fail when family evaluate always errors"),
Err(e) => e,
};
assert!(
err.to_string().contains(
"last objective error: synthetic outer objective failure: block[0] evaluate()"
),
"expected preserved root-cause context in error, got: {err}"
);
}
#[test]
pub(crate) fn fit_fails_when_requested_covariance_cannot_be_computed() {
let spec = ParameterBlockSpec {
name: "cov_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0], [1.0]])),
offset: array![0.0, 0.0],
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let options = BlockwiseFitOptions {
use_remlobjective: false,
compute_covariance: true,
..BlockwiseFitOptions::default()
};
let err = match fit_custom_family(&OneBlockCovarianceErrorFamily, &[spec], &options) {
Ok(_) => panic!("fit should fail when covariance computation fails"),
Err(e) => e,
};
assert!(
err.to_string()
.contains("synthetic covariance assembly failure"),
"expected covariance root cause in fit error, got: {err}"
);
}
#[derive(Clone)]
pub(crate) struct TwoBlockNaNHessianFamily;
impl CustomFamily for TwoBlockNaNHessianFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n0 = block_states[0].eta.len();
let p1 = block_states[1].beta.len();
let mut hessian = Array2::<f64>::eye(p1);
hessian[[0, 0]] = f64::NAN; Ok(FamilyEvaluation {
log_likelihood: -0.5 * block_states[0].eta.iter().map(|&v| v * v).sum::<f64>(),
blockworking_sets: vec![
BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n0),
working_weights: Array1::ones(n0),
},
BlockWorkingSet::ExactNewton {
gradient: Array1::zeros(p1),
hessian: SymmetricMatrix::Dense(hessian),
},
],
})
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockFiniteHessianFamily;
impl CustomFamily for TwoBlockFiniteHessianFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n0 = block_states[0].eta.len();
let p1 = block_states[1].beta.len();
let beta1 = &block_states[1].beta;
let resid1: f64 = beta1.iter().map(|&b| b * b).sum();
Ok(FamilyEvaluation {
log_likelihood: -0.5 * block_states[0].eta.iter().map(|&v| v * v).sum::<f64>()
- 0.5 * resid1,
blockworking_sets: vec![
BlockWorkingSet::Diagonal {
working_response: Array1::zeros(n0),
working_weights: Array1::ones(n0),
},
BlockWorkingSet::ExactNewton {
gradient: -beta1.clone(),
hessian: SymmetricMatrix::Dense(Array2::eye(p1)),
},
],
})
}
}
#[derive(Clone)]
pub(crate) struct TwoBlockNaNHessianPseudoLaplaceFamily;
impl CustomFamily for TwoBlockNaNHessianPseudoLaplaceFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
TwoBlockNaNHessianFamily.evaluate(block_states)
}
fn exact_newton_outerobjective(&self) -> ExactNewtonOuterObjective {
ExactNewtonOuterObjective::StrictPseudoLaplace
}
}
pub(crate) fn make_two_block_specs(n: usize) -> Vec<ParameterBlockSpec> {
vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 1),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 2),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0, 0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
]
}
#[test]
pub(crate) fn exact_newton_nan_hessian_fails_loudly_before_eigendecomposition() {
let specs = make_two_block_specs(4);
let per_block_log_lambdas = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(
&TwoBlockNaNHessianFamily,
&specs,
&per_block_log_lambdas,
&options,
None,
);
let err = result.expect_err("NaN exact Hessian must fail loudly");
assert!(
err.contains("smooth-regularized logdet Hessian contains non-finite entry"),
"expected explicit non-finite Hessian error, got: {err}"
);
}
#[test]
pub(crate) fn exact_newton_finite_hessian_succeeds_where_nan_hessian_fails() {
let specs = make_two_block_specs(4);
let per_block_log_lambdas = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(
&TwoBlockFiniteHessianFamily,
&specs,
&per_block_log_lambdas,
&options,
None,
);
assert!(
result.is_ok(),
"inner fit should succeed with finite Hessian: {:?}",
result.err()
);
}
#[test]
pub(crate) fn checked_penalizedobjective_rejects_non_finite_values() {
let err = checked_penalizedobjective(-1.0, 0.5, f64::NAN, "test objective")
.expect_err("non-finite objective should fail loudly");
assert!(
err.contains("non-finite penalized objective"),
"unexpected error: {err}"
);
}
#[test]
pub(crate) fn exact_newton_dh_closure_rejects_non_finite_directional_derivative() {
#[derive(Clone)]
struct OneBlockNonFiniteJointDhFamily;
impl CustomFamily for OneBlockNonFiniteJointDhFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let beta = block_states
.first()
.ok_or_else(|| "missing block 0".to_string())?
.beta
.clone();
Ok(FamilyEvaluation {
log_likelihood: -0.5 * beta.dot(&beta),
blockworking_sets: vec![BlockWorkingSet::ExactNewton {
gradient: beta.mapv(|v| -v),
hessian: SymmetricMatrix::Dense(array![[1.0]]),
}],
})
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
Ok(Some(array![[1.0]]))
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
arr: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let _unused_block_states = block_states;
assert!(arr.iter().all(|v| !v.is_nan()));
Ok(Some(array![[f64::NAN]]))
}
}
let family = OneBlockNonFiniteJointDhFamily;
let specs = vec![ParameterBlockSpec {
name: "beta".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(2, 1),
1.0,
))),
offset: Array1::zeros(2),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(array![0.0]),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}];
let states = vec![ParameterBlockState {
beta: array![0.0],
eta: Array1::zeros(2),
}];
let synced_states = Arc::new(
synchronized_states_from_flat_beta(&family, &specs, &states, &array![0.0])
.expect("sync states for exact_newton_dh_closure"),
);
let compute_dh = exact_newton_dh_closure(&family, synced_states, &specs, 1, false, 1.0, None);
let err = compute_dh(&array![1.0]).expect_err("non-finite dH should fail loudly");
assert!(err.contains("non-finite"), "unexpected error: {err}");
}
#[test]
pub(crate) fn nan_propagating_min_detects_nan_eigenvalues() {
let mut mat = Array2::<f64>::eye(3);
mat[[1, 0]] = f64::NAN;
mat[[0, 1]] = f64::NAN;
use crate::faer_ndarray::FaerEigh;
match FaerEigh::eigh(&mat, faer::Side::Lower) {
Err(_) => {
}
Ok((evals, _)) => {
let new_min = evals.iter().copied().fold(f64::INFINITY, |a, b| {
if a.is_nan() || b.is_nan() {
f64::NAN
} else {
a.min(b)
}
});
assert!(
!new_min.is_finite(),
"NaN-propagating min should detect NaN eigenvalues, got {new_min}"
);
}
}
}
#[test]
pub(crate) fn multiblock_generic_outer_fallback_returns_error_instead_of_panicking() {
let family = TwoBlockFiniteHessianFamily;
let specs = make_two_block_specs(4);
let penalty_counts = vec![0usize, 0usize];
let rho = Array1::zeros(0);
let options = BlockwiseFitOptions {
use_remlobjective: true,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
};
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
super::test_support::outerobjectivegradienthessian(
&family,
&specs,
&options,
&penalty_counts,
&rho,
None,
EvalMode::ValueGradientHessian,
)
}));
let outcome = result.expect("multi-block outer fallback must return an error, not panic");
let err = match outcome {
Ok(_) => panic!("multi-block family without a joint path should fail loudly"),
Err(err) => err.to_string(),
};
assert!(
err.contains("multi-block families must provide a joint outer path"),
"unexpected error: {err}"
);
}
#[test]
pub(crate) fn pseudo_laplace_path_skips_eigendecomposition_avoiding_nan_crash() {
let specs = make_two_block_specs(4);
let per_block_log_lambdas = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(
&TwoBlockNaNHessianPseudoLaplaceFamily,
&specs,
&per_block_log_lambdas,
&options,
None,
);
match result {
Ok(_) => {} Err(ref msg) => {
assert!(
!msg.contains("exact-newton eigendecomposition failed"),
"PseudoLaplace path should NOT hit eigendecomposition; \
got eigendecomposition error anyway: {msg}"
);
}
}
}
#[test]
pub(crate) fn strict_solve_spd_falls_back_to_eigen_floor_on_indefinite_matrix() {
let p = 4usize;
let mut h = Array2::<f64>::zeros((p, p));
for i in 0..p {
h[[i, i]] = -1e32 - (i as f64) * 1e30;
}
h[[0, 1]] = 5e29;
h[[1, 0]] = 5e29;
let rhs = Array1::from_vec(vec![1e30, -5e29, 2.5e29, 7.5e29]);
let (x, stats) = strict_solve_spd_with_lm_continuation(&h, &rhs)
.expect("eigen-floor fallback must succeed on the negative-definite matrix");
assert!(
stats.escalations > 16,
"expected eigen-floor terminal fallback (escalations > MAX_ESCALATIONS), got {}",
stats.escalations,
);
for &v in x.iter() {
assert!(
v.is_finite(),
"eigen-floor solve returned non-finite component {v}"
);
}
let mut sym = h.clone();
symmetrize_dense_in_place(&mut sym);
let (evals, evecs) = FaerEigh::eigh(&sym, Side::Lower).expect("eigh");
let max_abs_eval = evals.iter().fold(0.0_f64, |a, &b| a.max(b.abs()));
let eps_floor = (CUSTOM_FAMILY_EVAL_FLOOR * max_abs_eval).max(1e-300);
let mut want = Array1::<f64>::zeros(p);
for k in 0..p {
let mut q_t_rhs = 0.0;
for i in 0..p {
q_t_rhs += evecs[[i, k]] * rhs[i];
}
let scaled = q_t_rhs / evals[k].max(eps_floor);
for i in 0..p {
want[i] += evecs[[i, k]] * scaled;
}
}
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - x[i]).abs() <= tol,
"eigen-floor solve component {i}: want={:.6e}, got={:.6e}",
want[i],
x[i],
);
}
}
#[derive(Clone)]
pub(crate) struct HeterogeneousEtaLengthFamily {
pub(crate) n: usize,
}
impl CustomFamily for HeterogeneousEtaLengthFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = self.n;
let eta0 = &block_states[0].eta;
let eta1 = &block_states[1].eta;
assert_eq!(eta0.len(), 3 * n, "block 0 eta must be 3n");
assert_eq!(eta1.len(), n, "block 1 eta must be n");
let p0 = block_states[0].beta.len();
let p1 = block_states[1].beta.len();
let ll = -0.5 * eta0.dot(eta0) - 0.5 * eta1.dot(eta1);
let grad0 = &(-&block_states[0].beta) + &Array1::from_elem(p0, 0.1);
let grad1 = &(-&block_states[1].beta) + &Array1::from_elem(p1, 0.1);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: grad0,
hessian: SymmetricMatrix::Dense(Array2::eye(p0)),
},
BlockWorkingSet::ExactNewton {
gradient: grad1,
hessian: SymmetricMatrix::Dense(Array2::eye(p1)),
},
],
})
}
}
pub(crate) fn make_heterogeneous_eta_specs(n: usize) -> Vec<ParameterBlockSpec> {
let p0 = 2;
let p1 = 2;
vec![
ParameterBlockSpec {
name: "big_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(3 * n, p0),
1.0,
))),
offset: Array1::zeros(3 * n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(Array1::from_elem(p0, 1.0)),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "small_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, p1),
1.0,
))),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(Array1::from_elem(p1, 1.0)),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
]
}
#[test]
pub(crate) fn uniform_eta_lengths_do_not_panic() {
let n = 10;
#[derive(Clone)]
struct UniformEtaFamily;
impl CustomFamily for UniformEtaFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let p0 = block_states[0].beta.len();
let p1 = block_states[1].beta.len();
let eta0 = &block_states[0].eta;
let eta1 = &block_states[1].eta;
let ll = -0.5 * eta0.dot(eta0) - 0.5 * eta1.dot(eta1);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: &(-&block_states[0].beta) + &Array1::from_elem(p0, 0.1),
hessian: SymmetricMatrix::Dense(Array2::eye(p0)),
},
BlockWorkingSet::ExactNewton {
gradient: &(-&block_states[1].beta) + &Array1::from_elem(p1, 0.1),
hessian: SymmetricMatrix::Dense(Array2::eye(p1)),
},
],
})
}
}
let specs =
vec![
ParameterBlockSpec {
name: "block_a".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, 2), 1.0),
)),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(Array1::from_elem(2, 1.0)),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "block_b".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, 2), 1.0),
)),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: Some(Array1::from_elem(2, 1.0)),
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 3,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(&UniformEtaFamily, &specs, &per_block, &options, None);
assert!(
result.is_ok(),
"uniform eta lengths should not panic: {result:?}"
);
}
#[test]
pub(crate) fn heterogeneous_eta_lengths_inner_fit_completes() {
let n = 10;
let family = HeterogeneousEtaLengthFamily { n };
let specs = make_heterogeneous_eta_specs(n);
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 3,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(&family, &specs, &per_block, &options, None);
assert!(result.is_ok(), "inner fit should complete: {result:?}");
}
#[test]
pub(crate) fn heterogeneous_eta_single_cycle_completes() {
let n = 10;
let family = HeterogeneousEtaLengthFamily { n };
let specs = make_heterogeneous_eta_specs(n);
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(&family, &specs, &per_block, &options, None);
assert!(
result.is_ok(),
"single-cycle inner fit should complete: {result:?}"
);
}
#[test]
pub(crate) fn heterogeneous_eta_no_panic_when_all_blocks_converged() {
let n = 10;
#[derive(Clone)]
struct AllConvergedFamily {
pub(crate) n: usize,
}
impl CustomFamily for AllConvergedFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = self.n;
let eta0 = &block_states[0].eta;
let eta1 = &block_states[1].eta;
assert_eq!(eta0.len(), 3 * n);
assert_eq!(eta1.len(), n);
let p0 = block_states[0].beta.len();
let p1 = block_states[1].beta.len();
let ll = -0.5 * eta0.dot(eta0) - 0.5 * eta1.dot(eta1);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: Array1::zeros(p0),
hessian: SymmetricMatrix::Dense(Array2::eye(p0)),
},
BlockWorkingSet::ExactNewton {
gradient: Array1::zeros(p1),
hessian: SymmetricMatrix::Dense(Array2::eye(p1)),
},
],
})
}
}
let mut specs = make_heterogeneous_eta_specs(n);
specs[0].initial_beta = Some(Array1::zeros(2));
specs[1].initial_beta = Some(Array1::zeros(2));
let family = AllConvergedFamily { n };
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(&family, &specs, &per_block, &options, None);
assert!(
result.is_ok(),
"should not panic when all blocks are converged: {result:?}"
);
}
#[test]
pub(crate) fn heterogeneous_eta_completes_when_only_small_block_steps() {
let n = 10;
#[derive(Clone)]
struct OnlySmallBlockStepsFamily {
pub(crate) n: usize,
}
impl CustomFamily for OnlySmallBlockStepsFamily {
fn evaluate(
&self,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let _unused_block_states = block_states;
let n = self.n;
let eta0 = &block_states[0].eta;
let eta1 = &block_states[1].eta;
assert_eq!(eta0.len(), 3 * n);
assert_eq!(eta1.len(), n);
let p0 = block_states[0].beta.len();
let p1 = block_states[1].beta.len();
let ll = -0.5 * eta0.dot(eta0) - 0.5 * eta1.dot(eta1);
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: Array1::zeros(p0),
hessian: SymmetricMatrix::Dense(Array2::eye(p0)),
},
BlockWorkingSet::ExactNewton {
gradient: &(-&block_states[1].beta) + &Array1::from_elem(p1, 0.1),
hessian: SymmetricMatrix::Dense(Array2::eye(p1)),
},
],
})
}
}
let mut specs = make_heterogeneous_eta_specs(n);
specs[0].initial_beta = Some(Array1::zeros(2)); let family = OnlySmallBlockStepsFamily { n };
let per_block = vec![Array1::zeros(0), Array1::zeros(0)];
let options = BlockwiseFitOptions {
inner_max_cycles: 1,
use_remlobjective: false,
compute_covariance: false,
..BlockwiseFitOptions::default()
};
let result = inner_blockwise_fit(&family, &specs, &per_block, &options, None);
assert!(
result.is_ok(),
"fit should complete when only small block steps: {result:?}"
);
}
#[test]
pub(crate) fn projected_stationarity_inf_norm_respects_kkt_multipliers() {
let beta = array![1.0, 2.0, -0.5];
let residual = array![0.3, -0.1, 0.2];
let inf_nocon = projected_stationarity_inf_norm(&residual, &beta, None, None);
assert_relative_eq!(inf_nocon, 0.3_f64, epsilon = 1e-12);
let beta_active = array![0.0, 2.0];
let residual_active = array![0.5, -0.1];
let constraints_lb0 = LinearInequalityConstraints {
a: array![[1.0, 0.0], [0.0, 1.0]],
b: array![0.0, f64::NEG_INFINITY], };
let single = LinearInequalityConstraints {
a: array![[1.0, 0.0]],
b: array![0.0],
};
let inf_projected =
projected_stationarity_inf_norm(&residual_active, &beta_active, Some(&single), None);
assert_relative_eq!(inf_projected, 0.1_f64, epsilon = 1e-12);
let vec_projected = projected_linear_constraint_stationarity_vector(
&residual_active,
&beta_active,
&single,
None,
)
.expect("active lower-bound projection should succeed");
assert_relative_eq!(vec_projected[0], 0.0_f64, epsilon = 1e-10);
assert_relative_eq!(vec_projected[1], -0.1_f64, epsilon = 1e-12);
let inf_with_two_row = projected_stationarity_inf_norm(
&residual_active,
&beta_active,
Some(&constraints_lb0),
None,
);
assert_relative_eq!(inf_with_two_row, 0.1_f64, epsilon = 1e-12);
let beta_wrong_sign = array![0.0];
let residual_wrong_sign = array![-0.2];
let single1 = LinearInequalityConstraints {
a: array![[1.0]],
b: array![0.0],
};
let inf_wrong_sign = projected_stationarity_inf_norm(
&residual_wrong_sign,
&beta_wrong_sign,
Some(&single1),
None,
);
assert_relative_eq!(inf_wrong_sign, 0.2_f64, epsilon = 1e-12);
let beta_interior = array![1.5];
let residual_interior = array![0.4];
let inf_interior =
projected_stationarity_inf_norm(&residual_interior, &beta_interior, Some(&single1), None);
assert_relative_eq!(inf_interior, 0.4_f64, epsilon = 1e-12);
}
#[test]
pub(crate) fn joint_newton_math_constrained_stationary_signature_matches_aou_failure() {
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 8.613e5,
linearized_next_kkt_inf: 8.580e5,
predicted_reduction: 1.589e-2,
actual_reduction: 1.589e-2,
trust_ratio: 1.000,
step_inf: 1.270e-2,
proposal_inf: 1.270e-2,
};
let linearized_rel = math.linearized_next_kkt_inf / (1.0 + math.old_kkt_inf);
assert!(
linearized_rel >= 0.5,
"large-scale exit has linearized_rel = {:.3e}, must be >= 0.5 for the \
constrained-stationary certificate to fire",
linearized_rel,
);
let relerr = math.scalar_model_relative_error();
assert!(
relerr <= 1e-3,
"large-scale exit has scalar_model_relerr = {:.3e}, must be <= 1e-3 \
(model agrees with actual ⇒ residual is a real multiplier)",
relerr,
);
let objective_change = 1.589e-2_f64;
let objective_tol = 1e-6 * (1.0 + 3.484783e5_f64);
assert!(
objective_change <= objective_tol,
"large-scale exit has |Δobj| = {:.3e}, must be <= obj_tol {:.3e}",
objective_change,
objective_tol,
);
}
#[test]
pub(crate) fn constrained_stationary_certificate_keeps_iterating_when_step_is_large() {
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 2.708e4,
linearized_next_kkt_inf: 2.707e4,
predicted_reduction: 3.421e-1,
actual_reduction: 3.421e-1,
trust_ratio: 1.0,
step_inf: 2.891e-2,
proposal_inf: 2.891e-2,
};
let objective_change = 3.421e-1;
let objective_tol = 3.479e-1;
let residual = 8.102;
let residual_tol = 2.707e-2;
let step_tol = 1.2e-5;
let linearized_rel = math.linearized_next_kkt_inf / (1.0 + math.old_kkt_inf);
assert!(linearized_rel >= 0.5);
assert!(math.scalar_model_relative_error() <= 1e-3);
assert!(objective_change <= objective_tol);
assert!(math.step_inf > step_tol);
assert!(residual > residual_tol);
assert_eq!(
constrained_stationary_certificate_decision(
&math,
objective_change,
objective_tol,
step_tol,
None,
residual,
residual_tol,
),
ConstrainedStationaryCertificate::NotCandidate,
);
}
#[test]
pub(crate) fn residual_steady_geometric_descent_distinguishes_converging_from_plateau() {
use std::collections::VecDeque;
let converging: VecDeque<f64> = [6.985e-4, 2.388e-4, 7.987e-5, 2.597e-5]
.into_iter()
.collect();
assert!(
residual_in_steady_geometric_descent(&converging),
"a steadily ~0.33x/cycle descending residual must be recognized as converging"
);
let plateau: VecDeque<f64> = [2.066e0, 2.063e0, 2.066e0, 2.063e0].into_iter().collect();
assert!(
!residual_in_steady_geometric_descent(&plateau),
"a flat/oscillating residual plateau must NOT be treated as converging"
);
let noisy: VecDeque<f64> = [2.0e0, 2.0e0, 1.0e-3].into_iter().collect();
assert!(
!residual_in_steady_geometric_descent(&noisy),
"a single-cycle drop must not be mistaken for steady descent"
);
let short: VecDeque<f64> = [1.0e-3, 3.0e-4].into_iter().collect();
assert!(
!residual_in_steady_geometric_descent(&short),
"fewer than the window of cycles must not assert steady descent"
);
}
#[test]
pub(crate) fn constrained_stationary_certificate_refuses_only_when_step_is_exhausted() {
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 2.708e4,
linearized_next_kkt_inf: 2.707e4,
predicted_reduction: 3.421e-1,
actual_reduction: 3.421e-1,
trust_ratio: 1.0,
step_inf: 2.891e-7,
proposal_inf: 2.891e-7,
};
let objective_change = 3.421e-1;
let objective_tol = 3.479e-1;
let step_tol = 1.0e-6;
let residual_tol = 2.707e-2;
assert_eq!(
constrained_stationary_certificate_decision(
&math,
objective_change,
objective_tol,
step_tol,
None,
residual_tol,
residual_tol,
),
ConstrainedStationaryCertificate::Accept,
);
assert_eq!(
constrained_stationary_certificate_decision(
&math,
objective_change,
objective_tol,
step_tol,
None,
residual_tol + 1.0e-12,
residual_tol,
),
ConstrainedStationaryCertificate::Accept,
);
assert_eq!(
constrained_stationary_certificate_decision(
&math,
objective_change,
objective_tol,
step_tol,
None,
4.0 * residual_tol + 1.0e-6,
residual_tol,
),
ConstrainedStationaryCertificate::RefusePhantomMultiplier,
);
}
#[test]
pub(crate) fn joint_newton_math_unconstrained_progress_does_not_match_certificate() {
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 1.0e3,
linearized_next_kkt_inf: 1.0e-9,
predicted_reduction: 5.0e-1,
actual_reduction: 5.0e-1,
trust_ratio: 1.0,
step_inf: 1.0e-1,
proposal_inf: 1.0e-1,
};
let linearized_rel = math.linearized_next_kkt_inf / (1.0 + math.old_kkt_inf);
assert!(
linearized_rel < 0.5,
"unconstrained Newton must have linearized_rel < 0.5 (was {:.3e})",
linearized_rel,
);
}
#[test]
pub(crate) fn projected_stationarity_inf_norm_projects_coupled_linear_kkt_multipliers() {
let constraints = LinearInequalityConstraints {
a: array![[1.0, 1.0]],
b: array![1.0],
};
let beta_active = array![0.25, 0.75];
let residual_valid_multiplier = array![3.0, 3.0];
let inf_valid = projected_stationarity_inf_norm(
&residual_valid_multiplier,
&beta_active,
Some(&constraints),
None,
);
assert_relative_eq!(inf_valid, 0.0_f64, epsilon = 1e-10);
let vec_valid = projected_linear_constraint_stationarity_vector(
&residual_valid_multiplier,
&beta_active,
&constraints,
None,
)
.expect("coupled active projection should succeed");
assert_relative_eq!(vec_valid[0], 0.0_f64, epsilon = 1e-10);
assert_relative_eq!(vec_valid[1], 0.0_f64, epsilon = 1e-10);
let residual_wrong_sign = array![-3.0, -3.0];
let inf_wrong = projected_stationarity_inf_norm(
&residual_wrong_sign,
&beta_active,
Some(&constraints),
None,
);
assert_relative_eq!(inf_wrong, 3.0_f64, epsilon = 1e-12);
let beta_interior = array![0.75, 0.75];
let inf_interior = projected_stationarity_inf_norm(
&residual_valid_multiplier,
&beta_interior,
Some(&constraints),
None,
);
assert_relative_eq!(inf_interior, 3.0_f64, epsilon = 1e-12);
}
#[test]
pub(crate) fn joint_stationarity_from_gradient_projects_coupled_linear_constraints() {
let spec = ParameterBlockSpec {
name: "coupled".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![
[1.0, 0.0],
[0.0, 1.0]
])),
offset: array![0.0, 0.0],
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let state = ParameterBlockState {
beta: array![0.25, 0.75],
eta: array![0.25, 0.75],
};
let constraints = LinearInequalityConstraints {
a: array![[1.0, 1.0]],
b: array![1.0],
};
let s_lambdas = vec![Array2::<f64>::zeros((2, 2))];
let residual_multiplier = array![4.0, 4.0];
let gradient = -&residual_multiplier;
let projected = exact_newton_joint_stationarity_inf_norm_from_gradient(
&gradient,
&[state.clone()],
std::slice::from_ref(&spec),
&s_lambdas,
0.0,
RidgePolicy::explicit_stabilization_full(),
&[Some(constraints.clone())],
None,
)
.expect("stationarity projection should succeed");
assert_relative_eq!(projected, 0.0_f64, epsilon = 1e-10);
let kkt_residual = exact_newton_joint_projected_kkt_residual_for_ift_from_gradient(
&gradient,
std::slice::from_ref(&spec),
&[state.clone()],
&s_lambdas,
0.0,
RidgePolicy::explicit_stabilization_full(),
&[Some(constraints.clone())],
None,
)
.expect("KKT residual assembly should succeed")
.expect("exact-gradient path should produce residual");
assert_relative_eq!(kkt_residual.as_array()[0], 0.0_f64, epsilon = 1e-10);
assert_relative_eq!(kkt_residual.as_array()[1], 0.0_f64, epsilon = 1e-10);
let wrong_signed_gradient = residual_multiplier;
let unprojected = exact_newton_joint_stationarity_inf_norm_from_gradient(
&wrong_signed_gradient,
&[state],
&[spec],
&s_lambdas,
0.0,
RidgePolicy::explicit_stabilization_full(),
&[Some(constraints)],
None,
)
.expect("stationarity projection should succeed");
assert_relative_eq!(unprojected, 4.0_f64, epsilon = 1e-12);
}
#[test]
pub(crate) fn kkt_residual_uses_cached_joint_gradient_without_re_evaluating_family() {
let spec = ParameterBlockSpec {
name: "cached-gradient".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![
[1.0, 0.0],
[0.0, 1.0]
])),
offset: array![0.0, 0.0],
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let state = ParameterBlockState {
beta: array![2.0, -1.0],
eta: array![2.0, -1.0],
};
let s_lambda = Array2::<f64>::eye(2);
let expected_residual = array![0.25, -0.5];
let cached_gradient = s_lambda.dot(&state.beta) - &expected_residual;
let residual = exact_newton_joint_kkt_residual_for_ift_from_cached_gradient(
&OneBlockAlwaysErrorFamily,
std::slice::from_ref(&spec),
std::slice::from_ref(&state),
std::slice::from_ref(&s_lambda),
0.0,
RidgePolicy::explicit_stabilization_full(),
None,
Some(&cached_gradient),
)
.expect("cached gradient path should not call family.evaluate()")
.expect("cached gradient should produce a KKT residual");
assert_relative_eq!(
residual.as_array()[0],
expected_residual[0],
epsilon = 1e-12
);
assert_relative_eq!(
residual.as_array()[1],
expected_residual[1],
epsilon = 1e-12
);
}
#[test]
pub(crate) fn projected_stationarity_vector_uses_penalized_residual_not_raw_score() {
let spec = ParameterBlockSpec {
name: "score-cancellation".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![
[1.0, 0.0],
[0.0, 1.0]
])),
offset: array![0.0, 0.0],
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let state = ParameterBlockState {
beta: array![10.0, -4.0],
eta: array![10.0, -4.0],
};
let s_lambda = array![[2.0, 0.0], [0.0, 3.0]];
let gradient = array![19.5, -12.25];
let residual = exact_newton_joint_projected_stationarity_vector_from_gradient(
&gradient,
std::slice::from_ref(&state),
std::slice::from_ref(&spec),
std::slice::from_ref(&s_lambda),
0.0,
RidgePolicy::explicit_stabilization_full(),
&[None],
None,
)
.expect("projected stationarity residual should assemble");
assert_relative_eq!(residual[0], 0.5, epsilon = 1e-12);
assert_relative_eq!(residual[1], 0.25, epsilon = 1e-12);
}
#[test]
pub(crate) fn zero_psi_derivative_operator_acts_as_zero_map() {
let n = 17usize;
let p = 5usize;
let op = ZeroPsiDerivativeOperator::new(n, p);
assert_eq!(op.n_data(), n);
assert_eq!(op.p_out(), p);
let u = Array1::from_iter((0..p).map(|k| 1.0 + k as f64));
let v = Array1::from_iter((0..n).map(|k| 1.0 - 0.5 * k as f64));
let fwd = op.forward_mul(0, &u.view()).expect("forward_mul");
assert_eq!(fwd.len(), n);
assert!(fwd.iter().all(|x| *x == 0.0));
let trn = op.transpose_mul(0, &v.view()).expect("transpose_mul");
assert_eq!(trn.len(), p);
assert!(trn.iter().all(|x| *x == 0.0));
let fwd2 = op
.forward_mul_second_diag(0, &u.view())
.expect("forward_mul_second_diag");
assert_eq!(fwd2.len(), n);
assert!(fwd2.iter().all(|x| *x == 0.0));
let trn2 = op
.transpose_mul_second_diag(0, &v.view())
.expect("transpose_mul_second_diag");
assert_eq!(trn2.len(), p);
assert!(trn2.iter().all(|x| *x == 0.0));
let fwd_cross = op
.forward_mul_second_cross(0, 1, &u.view())
.expect("forward_mul_second_cross");
assert_eq!(fwd_cross.len(), n);
assert!(fwd_cross.iter().all(|x| *x == 0.0));
let trn_cross = op
.transpose_mul_second_cross(0, 1, &v.view())
.expect("transpose_mul_second_cross");
assert_eq!(trn_cross.len(), p);
assert!(trn_cross.iter().all(|x| *x == 0.0));
let chunk = op.row_chunk_first(0, 3..7).expect("row_chunk_first");
assert_eq!(chunk.dim(), (4, p));
assert!(chunk.iter().all(|x| *x == 0.0));
let chunk_diag = op
.row_chunk_second_diag(0, 0..n)
.expect("row_chunk_second_diag");
assert_eq!(chunk_diag.dim(), (n, p));
assert!(chunk_diag.iter().all(|x| *x == 0.0));
let chunk_cross = op
.row_chunk_second_cross(0, 1, 1..3)
.expect("row_chunk_second_cross");
assert_eq!(chunk_cross.dim(), (2, p));
assert!(chunk_cross.iter().all(|x| *x == 0.0));
let mut row = Array1::from_elem(p, 9.5);
op.row_vector_first_into(0, 4, row.view_mut())
.expect("row_vector_first_into");
assert!(row.iter().all(|x| *x == 0.0));
assert!(op.as_materializable().is_none());
}
#[test]
pub(crate) fn spatial_adaptive_zero_xpsi_uses_zero_map_without_dense_allocation() {
let n = 320_000usize;
let p = 101usize;
let deriv = CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: Array2::<f64>::zeros((0, 0)),
s_psi: Array2::<f64>::zeros((0, 0)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: None,
s_psi_psi: None,
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
};
let policy = ResourcePolicy::default_library();
let map = resolve_custom_family_x_psi_map(
&deriv,
n,
p,
0..n,
"spatial-adaptive zero sentinel",
&policy,
)
.expect("resolve x_psi map for (0, 0)-sentinel deriv");
match map {
PsiDesignMap::Zero { nrows, ncols } => {
assert_eq!(nrows, n);
assert_eq!(ncols, p);
}
other => panic!(
"(0, 0) x_psi sentinel must resolve to PsiDesignMap::Zero, got {:?}",
std::mem::discriminant(&other)
),
}
}
#[test]
pub(crate) fn zero_psi_derivative_operator_resolves_to_zero_design_map() {
let n = 12usize;
let p = 4usize;
let zero_op: Arc<dyn CustomFamilyPsiDerivativeOperator> =
Arc::new(ZeroPsiDerivativeOperator::new(n, p));
let deriv = CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: Array2::<f64>::zeros((0, 0)),
s_psi: Array2::<f64>::zeros((0, 0)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: None,
s_psi_psi: None,
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: Some(Arc::clone(&zero_op)),
implicit_axis: 0,
implicit_group_id: None,
};
let policy = ResourcePolicy::default_library();
let map = resolve_custom_family_x_psi_map(&deriv, n, p, 0..n, "zero", &policy)
.expect("resolve x_psi map");
let u = Array1::from_iter((0..p).map(|k| 1.0 + k as f64));
let fwd = map.forward_mul(u.view()).expect("forward_mul map");
assert_eq!(fwd.len(), n);
assert!(fwd.iter().all(|x| *x == 0.0));
let chunk = map.row_chunk(2..5).expect("row_chunk map");
assert_eq!(chunk.dim(), (3, p));
assert!(chunk.iter().all(|x| *x == 0.0));
let map_second =
resolve_custom_family_x_psi_psi_map(&deriv, &deriv, 0, n, p, 0..n, "zero", &policy)
.expect("resolve x_psi_psi map");
let fwd_second = map_second
.forward_mul(u.view())
.expect("forward_mul second");
assert_eq!(fwd_second.len(), n);
assert!(fwd_second.iter().all(|x| *x == 0.0));
}
#[test]
pub(crate) fn rowwise_kronecker_psi_row_chunks_are_window_consistent() {
let first = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let second_diag = array![[0.5, 1.0], [1.5, 2.0], [2.5, 3.0]];
let second_cross = array![[-1.0, 0.25], [-1.5, 0.5], [-2.0, 0.75]];
let base = build_embedded_dense_psi_operator(
&first,
&second_diag,
Some(&vec![(1, second_cross.clone())]),
0..2,
2,
0,
)
.expect("embedded dense base");
let time_a = Arc::new(array![[1.0, 0.0], [0.5, 1.0], [1.5, -0.5]]);
let time_b = Arc::new(array![[0.25, 2.0], [-1.0, 0.75], [0.0, 1.25]]);
let op = build_rowwise_kronecker_psi_operator(base, vec![time_a, time_b])
.expect("rowwise kronecker psi operator");
let mat = op
.as_materializable()
.expect("rowwise operator dense reference");
let rows = 1..5;
let first_dense = mat.materialize_first(0).expect("dense first");
let first_chunk = op.row_chunk_first(0, rows.clone()).expect("chunk first");
assert_eq!(
first_chunk,
first_dense.slice(ndarray::s![rows.clone(), ..]).to_owned()
);
let diag_full = op
.row_chunk_second_diag(0, 0..op.n_data())
.expect("full row-chunk diag");
let diag_chunk = op
.row_chunk_second_diag(0, rows.clone())
.expect("chunk diag");
assert_eq!(
diag_chunk,
diag_full.slice(ndarray::s![rows.clone(), ..]).to_owned()
);
let cross_full = op
.row_chunk_second_cross(0, 1, 0..op.n_data())
.expect("full row-chunk cross");
let cross_chunk = op
.row_chunk_second_cross(0, 1, rows.clone())
.expect("chunk cross");
assert_eq!(
cross_chunk,
cross_full.slice(ndarray::s![rows, ..]).to_owned()
);
}
#[test]
pub(crate) fn joint_trust_region_radius_update_accept_reject_logic() {
let accepted = update_joint_trust_region_radius(1.0, 1.0, 2.0, 2.0, 1.0);
assert!(accepted.accepted);
assert!((accepted.rho - 1.0).abs() < 1.0e-12);
assert!((accepted.radius - 2.0).abs() < 1.0e-12);
assert_eq!(accepted.decision.label(), "grow_at_boundary");
let rejected = update_joint_trust_region_radius(1.0, 0.5, -0.1, 2.0, 1.0);
assert!(!rejected.accepted);
assert!(rejected.rho < 0.0);
assert!((rejected.radius - 0.25).abs() < 1.0e-12);
assert_eq!(rejected.decision.label(), "shrink_reject");
let rejected_inside_radius = update_joint_trust_region_radius(1.0, 1.0e-3, -0.1, 2.0, 1.0);
assert!(!rejected_inside_radius.accepted);
assert!(
rejected_inside_radius.radius < 1.0e-3,
"a rejected in-radius step must be outside the next trust region"
);
assert!((rejected_inside_radius.radius - 5.0e-4).abs() < 1.0e-12);
assert_eq!(rejected_inside_radius.decision.label(), "shrink_reject");
let poor = update_joint_trust_region_radius(1.0, 0.5, 0.1, 1.0, 1.0);
assert!(poor.accepted);
assert!((poor.rho - 0.1).abs() < 1.0e-12);
assert!((poor.radius - 0.25).abs() < 1.0e-12);
assert_eq!(poor.decision.label(), "shrink_marginal_accept");
}
#[test]
pub(crate) fn joint_newton_collapsed_trust_region_all_reject_exits_before_grinding_budget() {
let inner_max_cycles = 200usize;
assert!(
JOINT_COLLAPSED_FLOOR_ALL_REJECT_MAX_CYCLES < inner_max_cycles / 4,
"the collapsed-floor guard must exit well before the inner cycle budget"
);
let mut radius = 1.0_f64;
for _ in 0..200 {
let rejected = update_joint_trust_region_radius(radius, 0.5 * radius, -1.0, 2.0, 1.0);
assert!(!rejected.accepted, "a genuine objective increase must reject");
radius = rejected.radius;
}
assert!(
joint_trust_radius_at_absolute_floor(radius),
"sustained rejection must collapse the radius to its absolute 1e-12 floor"
);
assert_eq!(
update_joint_trust_region_radius(radius, 0.5 * radius, -1.0, 2.0, 1.0)
.decision
.label(),
"reject_floor",
"at the floor a rejected step must be classified reject_floor"
);
let mut consecutive = 0usize;
let mut exit_cycle: Option<usize> = None;
for cycle in 0..inner_max_cycles {
let all_attempts_rejected = true;
let at_floor = joint_trust_radius_at_absolute_floor(radius);
let all_attempts_rejected_at_floor = all_attempts_rejected && at_floor;
if all_attempts_rejected_at_floor {
consecutive += 1;
} else {
consecutive = 0;
}
if joint_collapsed_floor_all_reject_exit(consecutive, all_attempts_rejected_at_floor) {
exit_cycle = Some(cycle);
break;
}
}
let exit_cycle = exit_cycle.expect("collapsed-floor guard must exit, not grind to budget");
assert_eq!(
exit_cycle,
JOINT_COLLAPSED_FLOOR_ALL_REJECT_MAX_CYCLES - 1,
"guard must fire exactly at the threshold cycle (0-indexed)"
);
assert!(
exit_cycle + 1 < inner_max_cycles,
"guard must exit ({} cycles) well before the {} budget",
exit_cycle + 1,
inner_max_cycles
);
let progressing_radius = 1e-3_f64;
assert!(
!joint_trust_radius_at_absolute_floor(progressing_radius),
"an above-floor radius must not count as the absolute floor"
);
let mut progressing_consecutive = 0usize;
for _ in 0..(4 * JOINT_COLLAPSED_FLOOR_ALL_REJECT_MAX_CYCLES) {
let all_attempts_rejected = true;
let at_floor = joint_trust_radius_at_absolute_floor(progressing_radius);
let all_attempts_rejected_at_floor = all_attempts_rejected && at_floor;
if all_attempts_rejected_at_floor {
progressing_consecutive += 1;
} else {
progressing_consecutive = 0;
}
assert!(
!joint_collapsed_floor_all_reject_exit(
progressing_consecutive,
all_attempts_rejected_at_floor
),
"the collapsed-floor guard must never fire while the radius is above the floor"
);
}
let mut streak = JOINT_COLLAPSED_FLOOR_ALL_REJECT_MAX_CYCLES - 1;
let accepted_cycle_all_reject_at_floor = false;
streak = if accepted_cycle_all_reject_at_floor {
streak + 1
} else {
0
};
assert!(
!joint_collapsed_floor_all_reject_exit(streak, accepted_cycle_all_reject_at_floor),
"an accepted cycle must reset the streak and suppress the guard"
);
}
#[test]
pub(crate) fn joint_trust_region_noise_floor_accepts_round_off_negative_actual() {
let objective_scale = 1.66e5;
let noise_floor = objective_scale * 1e-14;
let predicted = noise_floor * 0.1;
let actual = -noise_floor * 0.5;
let update = update_joint_trust_region_radius(1.0, 0.05, actual, predicted, objective_scale);
assert!(
update.accepted,
"sub-noise-floor sign flip must not reject as failure"
);
assert!((update.rho - 1.0).abs() < 1.0e-12);
}
#[test]
pub(crate) fn joint_trust_region_noise_floor_rejects_genuine_increase() {
let objective_scale = 1.66e5;
let noise_floor = objective_scale * 1e-14;
let predicted = noise_floor * 0.1;
let actual = -1.0;
let update = update_joint_trust_region_radius(1.0, 0.5, actual, predicted, objective_scale);
assert!(
!update.accepted,
"objective increase beyond noise must reject"
);
assert!(update.rho.is_infinite() && update.rho < 0.0);
}
#[test]
pub(crate) fn joint_objective_roundoff_slack_accepts_large_scale_wobble() {
let old_objective = 1.218530e5;
let trial_objective = old_objective + 2.183e-10;
assert!(
trial_objective
<= old_objective + joint_objective_roundoff_slack(old_objective, trial_objective),
"sub-nanounit objective wobble at large scale should not burn all trust attempts"
);
}
#[test]
pub(crate) fn joint_objective_floor_only_accepts_sub_tolerance_model_steps() {
let old_objective = 1.218942e5_f64;
let objective_tol = 1e-6 * (1.0 + old_objective.abs());
let actual_reduction = -3.783e-10;
let predicted_reduction = 9.481e-15;
let trial_objective = old_objective - actual_reduction;
assert!(
joint_objective_floor_reached(
old_objective,
trial_objective,
actual_reduction,
predicted_reduction,
objective_tol,
),
"the repeated large-scale roundoff wobble should terminate immediately"
);
assert!(
!joint_objective_floor_reached(
old_objective,
old_objective + 2.0,
-2.0,
predicted_reduction,
objective_tol,
),
"real objective increases must still be rejected"
);
assert!(
!joint_objective_floor_reached(
old_objective,
trial_objective,
actual_reduction,
10.0 * objective_tol,
objective_tol,
),
"non-negligible predicted progress must not be hidden by the floor exit"
);
let positive_noise_actual = 3.783e-10_f64;
let positive_noise_trial = old_objective - positive_noise_actual;
assert!(
!joint_objective_floor_reached(
old_objective,
positive_noise_trial,
positive_noise_actual,
predicted_reduction,
objective_tol,
),
"positive-noise reductions must NOT trigger the floor; symmetric exit breaks rank-deficient FD identity"
);
}
#[test]
pub(crate) fn joint_inner_convergence_rejects_objective_flat_non_kkt_stall() {
let objective = 4.472714e5_f64;
let inner_tol = 1.0e-6_f64;
let objective_change = 5.381e-2_f64;
let accepted_step_inf = 2.794e-2_f64;
let residual = 5.980e1_f64;
let residual_tol = inner_tol * (1.0 + objective);
let step_tol = 1.242e-3_f64;
let objective_tol = residual_tol;
let old_flat_step_predicate = objective_change <= objective_tol
&& accepted_step_inf <= objective_tol.sqrt().max(step_tol);
assert!(
old_flat_step_predicate,
"the historical objective-flat/step-flat predicate would have accepted this stalled inner solve"
);
assert!(
!joint_inner_kkt_converged(residual, residual_tol),
"inner convergence must require KKT residual <= tolerance"
);
assert!(
!joint_inner_kkt_converged(1.5 * residual_tol, residual_tol),
"near-miss residual slack would still invalidate the outer envelope gradient"
);
}
#[test]
pub(crate) fn joint_trust_region_block_metric_does_not_starve_unrelated_blocks() {
const TIME_W: usize = 12;
const MARG_W: usize = 11;
const LOG_W: usize = 10;
const P: usize = TIME_W + MARG_W + LOG_W;
let mut h = Array2::<f64>::zeros((P, P));
let mut g = Array1::<f64>::zeros(P);
h[[0, 0]] = 2.24e8;
g[0] = -5.6e8;
for i in 1..TIME_W {
h[[i, i]] = 1.0 + 0.3 * i as f64;
g[i] = -0.3 - 0.07 * i as f64;
}
for j in 0..MARG_W {
let idx = TIME_W + j;
h[[idx, idx]] = 1.2 + 0.2 * j as f64;
g[idx] = -0.9;
}
let log0 = TIME_W + MARG_W;
h[[log0, log0]] = 1.0e-5;
g[log0] = -2.173;
for k in 1..LOG_W {
let idx = log0 + k;
h[[idx, idx]] = 1.5 + 0.1 * k as f64;
g[idx] = -0.4;
}
let mut newton = Array1::<f64>::zeros(P);
for i in 0..P {
newton[i] = -g[i] / h[[i, i]];
}
let mut raw_global = newton.clone();
let raw_norm = raw_global.iter().map(|v| v * v).sum::<f64>().sqrt();
if raw_norm.is_finite() && raw_norm > 20.0 {
raw_global.mapv_inplace(|v| v * (20.0 / raw_norm));
}
let raw_linearized = (&g + &h.dot(&raw_global))
.iter()
.map(|v| v.abs())
.fold(0.0_f64, f64::max)
/ (1.0 + g.iter().map(|v| v.abs()).fold(0.0_f64, f64::max));
assert!(
raw_linearized > 0.99,
"raw concatenated L2 truncation should reproduce the starvation mechanism"
);
let ranges = vec![(0, TIME_W), (TIME_W, TIME_W + MARG_W), (TIME_W + MARG_W, P)];
let metric_diag = h.diag().to_owned();
let full_block_norms = joint_trust_region_block_metric_norms(&newton, &ranges, &metric_diag);
let mut block_metric = newton.clone();
let block_radii = vec![full_block_norms[0], full_block_norms[1], 20.0];
truncate_joint_step_to_block_metric_radii(
&mut block_metric,
&ranges,
&metric_diag,
&block_radii,
);
let block_linearized = (&g + &h.dot(&block_metric))
.iter()
.map(|v| v.abs())
.fold(0.0_f64, f64::max)
/ (1.0 + g.iter().map(|v| v.abs()).fold(0.0_f64, f64::max));
assert!(
block_linearized < 1.0e-6,
"block-local curvature metric must let the time block neutralize its KKT defect; got {block_linearized:.3e}"
);
}
#[test]
pub(crate) fn shrink_active_joint_block_trust_radii_strictly_decreases_max_radius() {
let mut block_radii = vec![1.0, 1.0e-12];
let block_step_norms = vec![1.0e-3, 1.0e-12];
let old_max = block_radii.iter().copied().fold(0.0_f64, f64::max);
let new_max = shrink_active_joint_block_trust_radii(&mut block_radii, &block_step_norms, 0.25);
assert!(
new_max < old_max,
"joint trust radius must strictly decrease when a step is rejected (was {old_max:.3e}, now {new_max:.3e})"
);
assert!(
block_radii[0] < block_step_norms[0],
"interior block radius must drop below its step norm to force a strictly smaller next step (radius {:.3e}, step {:.3e})",
block_radii[0],
block_step_norms[0]
);
}
#[test]
pub(crate) fn shrink_active_joint_block_trust_radii_decreases_max_when_max_held_by_interior_block()
{
let mut block_radii = vec![1.562e-2, 1.562e-2];
let block_step_norms = vec![1.562e-2, 1.0e-6];
let old_max = block_radii.iter().copied().fold(0.0_f64, f64::max);
let new_max = shrink_active_joint_block_trust_radii(&mut block_radii, &block_step_norms, 0.25);
assert!(
new_max < old_max,
"joint trust radius (= scalar Moré–Sorensen constraint) must \
strictly decrease on rejection even when the max is held by \
an interior block (was {old_max:.3e}, now {new_max:.3e})"
);
}
#[test]
pub(crate) fn shrink_active_joint_block_trust_radii_pulls_radius_below_step_norm() {
let mut block_radii = vec![1.0];
let block_step_norms = vec![1.0e-3];
let new_max = shrink_active_joint_block_trust_radii(&mut block_radii, &block_step_norms, 0.25);
assert!(
new_max <= 0.5 * block_step_norms[0],
"shrunken radius must be ≤ 0.5 · step_norm to force a strictly smaller next step (was {new_max:.3e}, step {:.3e})",
block_step_norms[0]
);
}
#[test]
pub(crate) fn blockwise_trust_region_uses_penalized_metric_not_raw_coefficient_size() {
let spec = ParameterBlockSpec {
name: "single_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::<f64>::zeros((1, 3)),
)),
offset: Array1::zeros(1),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let h: Array2<f64> = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0e-10]];
let work = BlockWorkingSet::ExactNewton {
gradient: array![0.0, 0.0, 0.0],
hessian: SymmetricMatrix::Dense(h.clone()),
};
let s_lambda = Array2::<f64>::zeros((3, 3));
let raw_delta: Array1<f64> = array![2.0, -1.0, 2.0e5];
let raw_inf = raw_delta.iter().fold(0.0_f64, |m, v| {
let value: f64 = *v;
m.max(value.abs())
});
let radius = 20.0_f64;
let raw_inf_scaled = &raw_delta * (radius / raw_inf);
assert!(
raw_inf_scaled[0].abs() < 1.0e-3,
"the old raw coefficient cap would starve ordinary coordinates inside the block"
);
let (metric_delta, metric_norm) = truncate_block_step_to_metric_radius(
&spec,
&work,
&s_lambda,
raw_delta,
radius,
0.0,
RidgePolicy::explicit_stabilization_pospart(),
)
.expect("block metric truncation should succeed");
assert!(
metric_norm < radius,
"the near-null coordinate is large in beta-space but small in the block's penalized-Hessian metric"
);
assert!(
(metric_delta[0] - 2.0).abs() < 1.0e-12
&& (metric_delta[1] + 1.0).abs() < 1.0e-12
&& (metric_delta[2] - 2.0e5).abs() < 1.0e-6,
"blockwise trust regions must size steps in objective curvature units, not raw coefficient units"
);
}
#[test]
pub(crate) fn blockwise_trust_region_never_reverts_to_raw_beta_norm_on_indefinite_curvature() {
let spec = ParameterBlockSpec {
name: "single_block".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::<f64>::zeros((1, 3)),
)),
offset: Array1::zeros(1),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let h: Array2<f64> = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, -1.0e-8]];
let work = BlockWorkingSet::ExactNewton {
gradient: array![0.0, 0.0, 0.0],
hessian: SymmetricMatrix::Dense(h),
};
let s_lambda = Array2::<f64>::zeros((3, 3));
let raw_delta: Array1<f64> = array![2.0, -1.0, 2.0e5];
let radius = 20.0_f64;
let old_quadratic = raw_delta.dot(&array![2.0, -1.0, -2.0e-3]);
assert!(
old_quadratic < 0.0,
"fixture must hit the historical non-SPD branch"
);
let (metric_delta, metric_norm) = truncate_block_step_to_metric_radius(
&spec,
&work,
&s_lambda,
raw_delta,
radius,
0.0,
RidgePolicy::explicit_stabilization_pospart(),
)
.expect("block metric truncation should succeed");
assert!(
metric_norm < radius,
"indefinite curvature must still use the positive penalized diagonal metric, not raw beta length"
);
assert!(
(metric_delta[0] - 2.0).abs() < 1.0e-12
&& (metric_delta[1] + 1.0).abs() < 1.0e-12
&& (metric_delta[2] - 2.0e5).abs() < 1.0e-6,
"non-SPD local curvature must not resurrect coefficient-space trust-region scaling"
);
}
#[test]
pub(crate) fn joint_trust_region_rosenbrock_like_quadratic_is_armijo_safe() {
let h = array![[802.0, -400.0], [-400.0, 200.1]];
let unconstrained = array![1.0, 1.0];
let gradient = -h.dot(&unconstrained);
let rhs = -&gradient;
let mut step = unconstrained.clone();
let unconstrained_norm = unconstrained.iter().map(|v| v * v).sum::<f64>().sqrt();
assert!(unconstrained_norm > 0.25);
step.mapv_inplace(|v| v * (0.25 / unconstrained_norm));
let step_norm = step.iter().map(|v| v * v).sum::<f64>().sqrt();
assert!(step_norm <= 0.25 + 1.0e-12);
let h_step = h.dot(&step);
let predicted = joint_quadratic_predicted_reduction(&rhs, &h_step, &step);
let old_objective = 0.0;
let trial_objective = gradient.dot(&step) + 0.5 * step.dot(&h_step);
let actual = old_objective - trial_objective;
assert!(predicted > 0.0);
assert!((predicted - actual).abs() < 1.0e-10);
let update =
update_joint_trust_region_radius(0.25, step_norm, actual, predicted, old_objective);
assert!(update.accepted);
assert!(trial_objective < old_objective);
}
#[test]
pub(crate) fn kkt_refusal_report_classifies_rank_deficient_hpen_third_block() {
let block_widths = [3usize, 3, 3];
let total_p: usize = block_widths.iter().sum();
let block_count = block_widths.len();
let mut specs: Vec<ParameterBlockSpec> = Vec::with_capacity(block_count);
let mut states: Vec<ParameterBlockState> = Vec::with_capacity(block_count);
let mut s_lambdas: Vec<Array2<f64>> = Vec::with_capacity(block_count);
let mut ranges: Vec<(usize, usize)> = Vec::with_capacity(block_count);
let names = ["block_a", "block_b", "block_c_rank_deficient"];
let mut offset = 0usize;
for (b, &width) in block_widths.iter().enumerate() {
let start = offset;
let end = start + width;
offset = end;
ranges.push((start, end));
specs.push(ParameterBlockSpec {
name: names[b].to_string(),
design: DesignMatrix::from(Array2::<f64>::zeros((1, width))),
offset: Array1::zeros(1),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
});
states.push(ParameterBlockState {
beta: Array1::zeros(width),
eta: Array1::zeros(1),
});
s_lambdas.push(Array2::<f64>::zeros((width, width)));
}
let mut h = Array2::<f64>::zeros((total_p, total_p));
for i in 0..3 {
h[[i, i]] = 1.0;
h[[3 + i, 3 + i]] = 1.0;
}
h[[6, 6]] = 1.0;
let source = JointHessianSource::Dense(h);
let mut joint_grad = Array1::<f64>::zeros(total_p);
joint_grad[7] = 5.0;
joint_grad[8] = 3.0;
joint_grad[0] = 1.0e-6;
let cached_active_sets: Vec<Option<Vec<usize>>> = vec![None; block_count];
let block_constraints: Vec<Option<LinearInequalityConstraints>> = vec![None; block_count];
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 5.0,
linearized_next_kkt_inf: 4.9,
predicted_reduction: 1.0e-4,
actual_reduction: 1.0e-4,
trust_ratio: 1.0,
step_inf: 1.0e-9,
proposal_inf: 1.0e-3,
};
let residual_tol = 1.0e-6;
let projected_residual_inf = 5.0;
let report = compute_kkt_refusal_report(
42,
&states,
&specs,
&s_lambdas,
&ranges,
Some(&joint_grad),
&cached_active_sets,
&block_constraints,
Some(&source),
total_p,
0.0,
RidgePolicy::explicit_stabilization_full(),
1.0e-9,
1.0e-3,
1.0,
residual_tol,
1.0e-6,
1.0e-6,
1.0e-8,
projected_residual_inf,
Some(&math),
);
assert_eq!(
report.diagnosis,
KktRefusalDiagnosis::RankDeficientHPen,
"block-2 rank-1 H_pen with zero s_lambdas must classify as RankDeficientHPen, got {:?}",
report.diagnosis,
);
assert!(
report.hpen_nullity_at_rank_tol > 0,
"rank-1 block embedded in 9x9 block-diagonal H must register nullity > 0, got {}",
report.hpen_nullity_at_rank_tol,
);
assert_eq!(
report.block_carrying_residual,
Some(2),
"block 2 must carry the largest |∇L − Sβ|∞ component; got {:?}, residuals={:?}",
report.block_carrying_residual,
report.block_residual_inf,
);
assert_eq!(report.block_names.len(), block_count);
assert_eq!(
report.block_names[2], "block_c_rank_deficient",
"carrying-block name should be the third block",
);
assert!(
report
.format_structured_log(residual_tol)
.contains("rank_deficient_H_pen"),
"structured log must surface the diagnosis label",
);
assert!(
report
.format_bubbled_error()
.contains("block_c_rank_deficient"),
"bubbled error must name the carrying block by spec.name",
);
assert!(
report
.format_bubbled_error()
.contains("structural or numerical null direction"),
"rank-deficient refusals should no longer emit the old polynomial-only guidance",
);
}
#[test]
pub(crate) fn kkt_refusal_diagnosis_string_round_trip_through_bubbled_error_parser() {
for diagnosis in [
KktRefusalDiagnosis::RankDeficientHPen,
KktRefusalDiagnosis::PhantomMultiplierWithWellConditionedH,
KktRefusalDiagnosis::ActiveSetIncomplete,
KktRefusalDiagnosis::AliasingDetectedAtFit,
] {
let label = diagnosis.as_str();
let synthetic_error = format!(
"coupled exact-joint inner solve exited the joint Newton path before convergence \
— cycle=7 cert REFUSED: residual=1.0e-2 > tol=1.0e-6; \
diagnosis: {label}"
);
let parsed = KktRefusalDiagnosis::parse_from_error(&synthetic_error);
assert_eq!(
parsed,
Some(diagnosis),
"label '{label}' must round-trip through parse_from_error; got {:?}",
parsed,
);
}
}
#[test]
pub(crate) fn kkt_refusal_guidance_distinguishes_marginal_slope_coupling_from_polynomial_nullspace()
{
let phantom = KktRefusalDiagnosis::PhantomMultiplierWithWellConditionedH.guidance();
assert!(phantom.contains("marginal/logslope coupling"));
assert!(phantom.contains("rather than a"));
assert!(phantom.contains("Matérn/Duchon polynomial-nullspace failure"));
let active = KktRefusalDiagnosis::ActiveSetIncomplete.guidance();
assert!(active.contains("active-set certification failure"));
assert!(active.contains("not a polynomial-nullspace diagnosis"));
let alias = KktRefusalDiagnosis::AliasingDetectedAtFit.guidance();
assert!(alias.contains("drop or reparameterize"));
}
#[test]
pub(crate) fn rank_deficient_hpen_canary_fires_on_large_scale_shaped_failure() {
let block_widths = [4usize, 4, 4];
let total_p: usize = block_widths.iter().sum();
let block_count = block_widths.len();
let mut specs: Vec<ParameterBlockSpec> = Vec::with_capacity(block_count);
let mut states: Vec<ParameterBlockState> = Vec::with_capacity(block_count);
let mut s_lambdas: Vec<Array2<f64>> = Vec::with_capacity(block_count);
let mut ranges: Vec<(usize, usize)> = Vec::with_capacity(block_count);
let names = ["location_block", "scale_block", "marginal_slope_block"];
let mut offset = 0usize;
for (b, &width) in block_widths.iter().enumerate() {
let start = offset;
let end = start + width;
offset = end;
ranges.push((start, end));
specs.push(ParameterBlockSpec {
name: names[b].to_string(),
design: DesignMatrix::from(Array2::<f64>::zeros((1, width))),
offset: Array1::zeros(1),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
});
states.push(ParameterBlockState {
beta: Array1::zeros(width),
eta: Array1::zeros(1),
});
s_lambdas.push(Array2::<f64>::zeros((width, width)));
}
let mut h = Array2::<f64>::zeros((total_p, total_p));
for i in 0..4 {
h[[i, i]] = 1.0;
h[[4 + i, 4 + i]] = 1.0;
}
let source = JointHessianSource::Dense(h);
let mut joint_grad = Array1::<f64>::zeros(total_p);
joint_grad[8] = 4.2;
joint_grad[9] = 1.7;
joint_grad[10] = -2.5;
joint_grad[11] = 0.9;
let cached_active_sets: Vec<Option<Vec<usize>>> = vec![None; block_count];
let block_constraints: Vec<Option<LinearInequalityConstraints>> = vec![None; block_count];
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 4.2,
linearized_next_kkt_inf: 4.2,
predicted_reduction: 0.0,
actual_reduction: 0.0,
trust_ratio: 0.0,
step_inf: 0.0,
proposal_inf: 1.0e-3,
};
let report = compute_kkt_refusal_report(
123,
&states,
&specs,
&s_lambdas,
&ranges,
Some(&joint_grad),
&cached_active_sets,
&block_constraints,
Some(&source),
total_p,
0.0,
RidgePolicy::explicit_stabilization_full(),
0.0,
1.0e-3,
1.0,
1.0e-6,
1.0e-6,
1.0e-6,
0.0,
4.2,
Some(&math),
);
assert_eq!(
report.diagnosis,
KktRefusalDiagnosis::RankDeficientHPen,
"large-scale-shaped marginal-slope failure must classify as RankDeficientHPen \
(this is the canary nullspace-lead's smooth-construction rework targets)",
);
assert!(
report.hpen_nullity_at_rank_tol >= 4,
"fully degenerate marginal-slope block (4 zero eigenvalues) must contribute \
nullity >= 4; got {}",
report.hpen_nullity_at_rank_tol,
);
assert_eq!(
report.block_carrying_residual,
Some(2),
"marginal_slope_block (idx 2) must carry the residual; got {:?}, residuals={:?}",
report.block_carrying_residual,
report.block_residual_inf,
);
let bubbled = report.format_bubbled_error();
assert_eq!(
KktRefusalDiagnosis::parse_from_error(&bubbled),
Some(KktRefusalDiagnosis::RankDeficientHPen),
"canary's bubbled-error string must parse back via the classifier's parser",
);
assert!(
bubbled.contains("marginal-slope fits can also expose callback-owned weak directions"),
"BMS-shaped refusal should mention the callback-owned weak-direction mechanism"
);
}
#[test]
pub(crate) fn rank_deficient_hpen_canary_disappears_after_nullspace_absorption() {
let block_widths = [4usize, 4, 4];
let total_p: usize = block_widths.iter().sum();
let block_count = block_widths.len();
let mut specs: Vec<ParameterBlockSpec> = Vec::with_capacity(block_count);
let mut states: Vec<ParameterBlockState> = Vec::with_capacity(block_count);
let mut s_lambdas: Vec<Array2<f64>> = Vec::with_capacity(block_count);
let mut ranges: Vec<(usize, usize)> = Vec::with_capacity(block_count);
let names = ["location_block", "scale_block", "marginal_slope_block"];
let mut offset = 0usize;
for (b, &width) in block_widths.iter().enumerate() {
let start = offset;
let end = start + width;
offset = end;
ranges.push((start, end));
specs.push(ParameterBlockSpec {
name: names[b].to_string(),
design: DesignMatrix::from(Array2::<f64>::zeros((1, width))),
offset: Array1::zeros(1),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
});
states.push(ParameterBlockState {
beta: Array1::zeros(width),
eta: Array1::zeros(1),
});
s_lambdas.push(Array2::<f64>::zeros((width, width)));
}
let h = Array2::<f64>::eye(total_p);
let source = JointHessianSource::Dense(h);
let joint_grad = Array1::<f64>::zeros(total_p);
let cached_active_sets: Vec<Option<Vec<usize>>> = vec![None; block_count];
let block_constraints: Vec<Option<LinearInequalityConstraints>> = vec![None; block_count];
let math = JointNewtonMathDiagnostic {
old_kkt_inf: 0.0,
linearized_next_kkt_inf: 0.0,
predicted_reduction: 0.0,
actual_reduction: 0.0,
trust_ratio: 1.0,
step_inf: 0.0,
proposal_inf: 0.0,
};
let report = compute_kkt_refusal_report(
0,
&states,
&specs,
&s_lambdas,
&ranges,
Some(&joint_grad),
&cached_active_sets,
&block_constraints,
Some(&source),
total_p,
0.0,
RidgePolicy::explicit_stabilization_full(),
0.0,
0.0,
1.0,
1.0e-6,
1.0e-6,
1.0e-6,
0.0,
0.0,
Some(&math),
);
assert_eq!(
report.hpen_nullity_at_rank_tol, 0,
"post-absorption: full-rank H_pen must register nullity 0",
);
assert_ne!(
report.diagnosis,
KktRefusalDiagnosis::RankDeficientHPen,
"post-absorption: the rank-deficiency diagnosis must no longer fire",
);
}
#[test]
pub(crate) fn structural_edf_matches_trace_identity_noncommuting_pair() {
let s = array![[1.0, 0.0], [0.0, 4.0]];
let off = 0.5 * (1.8_f64.sqrt() - 0.2_f64.sqrt());
let diag = 0.5 * (1.8_f64.sqrt() + 0.2_f64.sqrt());
let x = array![[diag, off], [off, diag]];
let design = DesignMatrix::from(x);
let penalty = PenaltyMatrix::Dense(s.clone());
let gammas = design_penalty_range_gammas(&design, &penalty)
.expect("2x2 full-rank p×p pair must yield generalized eigenvalues");
assert_eq!(gammas.len(), 2, "range(S) is full rank ⇒ two γ_j");
let g = array![[1.0, 0.8], [0.8, 1.0]];
let trace_g_minv = |lambda: f64| -> f64 {
let m00 = g[(0, 0)] + lambda * s[(0, 0)];
let m01 = g[(0, 1)] + lambda * s[(0, 1)];
let m10 = g[(1, 0)] + lambda * s[(1, 0)];
let m11 = g[(1, 1)] + lambda * s[(1, 1)];
let det = m00 * m11 - m01 * m10;
(g[(0, 0)] * m11 - g[(0, 1)] * m10 - g[(1, 0)] * m01 + g[(1, 1)] * m00) / det
};
for &lambda in &[1.0_f64, 0.3] {
let rho = lambda.ln();
let edf = unit_weight_term_edf(&gammas, rho);
let trace = trace_g_minv(lambda);
assert!(
(edf - trace).abs() < 1e-9,
"structural edf {edf} must equal tr(G(G+λS)⁻¹) {trace} at λ={lambda}",
);
}
let edf_at_one = unit_weight_term_edf(&gammas, 0.0_f64);
assert!(
(edf_at_one - 0.611111_f64).abs() < 1e-5,
"edf at λ=1 must be ≈0.6111 (true), not 0.7000 (Rayleigh-quotient bug): got {edf_at_one}",
);
}