use crate::estimate::EstimationError;
use crate::solver::estimate::reml::unified::BarrierConfig;
use ::opt::{
Arc as ArcOptimizer, ArcError, Bfgs, BfgsError, Bounds, FirstOrderObjective, FirstOrderSample,
FixedPoint, FixedPointError, FixedPointObjective, FixedPointSample, FixedPointStatus,
MaxIterations, ObjectiveEvalError, SecondOrderObjective, SecondOrderSample, Solution,
Tolerance, ZerothOrderObjective,
};
use ndarray::{Array1, Array2, ArrayView2};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
#[derive(Clone, Debug)]
pub struct InnerProgressFeedback {
pub cap: Arc<AtomicUsize>,
pub last_iters: Arc<AtomicUsize>,
pub last_converged: Arc<AtomicBool>,
pub ift_residual: Arc<AtomicU64>,
pub accept_rho: Arc<AtomicU64>,
}
impl InnerProgressFeedback {
fn snapshot(&self) -> Option<InnerProgressSnapshot> {
let iters = self.last_iters.load(Ordering::Relaxed);
if iters == 0 {
None
} else {
let residual_bits = self.ift_residual.load(Ordering::Relaxed);
let r = f64::from_bits(residual_bits);
let last_ift_residual = if r.is_finite() && r >= 0.0 {
Some(r)
} else {
None
};
let accept_rho_bits = self.accept_rho.load(Ordering::Relaxed);
let ar = f64::from_bits(accept_rho_bits);
let last_accept_rho = if ar.is_finite() && ar >= 0.0 {
Some(ar)
} else {
None
};
Some(InnerProgressSnapshot {
last_iters: iters,
last_converged: self.last_converged.load(Ordering::Relaxed),
last_ift_residual,
last_accept_rho,
})
}
}
}
#[derive(Clone, Copy, Debug)]
struct InnerProgressSnapshot {
last_iters: usize,
last_converged: bool,
last_ift_residual: Option<f64>,
last_accept_rho: Option<f64>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OuterHessianMaterialization {
Unavailable,
RepeatedHvp,
BatchedHvp,
Explicit,
}
impl OuterHessianMaterialization {
fn is_available(self) -> bool {
!matches!(self, Self::Unavailable)
}
}
pub trait OuterHessianOperator: Send + Sync {
fn dim(&self) -> usize;
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String>;
fn is_cheap_to_materialize(&self) -> bool {
false
}
fn materialization_capability(&self) -> OuterHessianMaterialization {
if self.is_cheap_to_materialize() {
OuterHessianMaterialization::RepeatedHvp
} else {
OuterHessianMaterialization::Unavailable
}
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let dim = self.dim();
if factor.nrows() != dim {
return Err(format!(
"outer Hessian operator factor row count mismatch: got {}, expected {}",
factor.nrows(),
dim
));
}
let m = factor.ncols();
let cols: Result<Vec<Array1<f64>>, String> = (0..m)
.into_par_iter()
.map(|j| {
let col = factor.column(j).to_owned();
let hv = self.matvec(&col)?;
if hv.len() != dim {
return Err(format!(
"outer Hessian operator matvec length mismatch: got {}, expected {}",
hv.len(),
dim
));
}
Ok(hv)
})
.collect();
let cols = cols?;
let mut out = Array2::<f64>::zeros((dim, m));
for (j, hv) in cols.into_iter().enumerate() {
out.column_mut(j).assign(&hv);
}
Ok(out)
}
fn materialize_dense(&self) -> Result<Array2<f64>, String> {
let dim = self.dim();
let identity = Array2::<f64>::eye(dim);
let mut dense = self.mul_mat(identity.view())?;
if dense.nrows() != dim || dense.ncols() != dim {
return Err(format!(
"outer Hessian operator mul_mat returned {}x{}, expected {}x{}",
dense.nrows(),
dense.ncols(),
dim,
dim
));
}
for row in 0..dim {
for col in (row + 1)..dim {
let sym = 0.5 * (dense[[row, col]] + dense[[col, row]]);
dense[[row, col]] = sym;
dense[[col, row]] = sym;
}
}
if !dense.iter().all(|value| value.is_finite()) {
return Err(
"outer Hessian dense materialization produced non-finite entries".to_string(),
);
}
Ok(dense)
}
}
struct RhoBlockAdditiveOuterHessian {
base: Arc<dyn OuterHessianOperator>,
rho_block: Array2<f64>,
dim: usize,
}
impl OuterHessianOperator for RhoBlockAdditiveOuterHessian {
fn dim(&self) -> usize {
self.dim
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
if v.len() != self.dim {
return Err(format!(
"outer Hessian operator input length mismatch: got {}, expected {}",
v.len(),
self.dim
));
}
let mut out = self.base.matvec(v)?;
let k = self.rho_block.nrows();
if k > 0 {
let rho_v = v.slice(ndarray::s![..k]).to_owned();
let rho_out = self.rho_block.dot(&rho_v);
out.slice_mut(ndarray::s![..k]).scaled_add(1.0, &rho_out);
}
Ok(out)
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
let mut out = self.base.mul_mat(factor)?;
let k = self.rho_block.nrows();
if k > 0 {
if k > out.nrows() {
return Err(format!(
"rho-block Hessian update shape mismatch: rho_block is {}x{}, mul_mat output has {} rows",
self.rho_block.nrows(),
self.rho_block.ncols(),
out.nrows()
));
}
let factor_top = factor.slice(ndarray::s![..k, ..]);
let rho_contrib = self.rho_block.dot(&factor_top);
out.slice_mut(ndarray::s![..k, ..])
.scaled_add(1.0, &rho_contrib);
}
Ok(out)
}
fn is_cheap_to_materialize(&self) -> bool {
self.base.is_cheap_to_materialize()
}
fn materialization_capability(&self) -> OuterHessianMaterialization {
self.base.materialization_capability()
}
}
pub(crate) const OUTER_HVP_MATERIALIZE_MAX_DIM: usize = 64;
struct HvApplyCounter<'a> {
inner: &'a dyn OuterHessianOperator,
count: std::sync::atomic::AtomicU64,
}
impl<'a> HvApplyCounter<'a> {
fn new(inner: &'a dyn OuterHessianOperator) -> Self {
Self {
inner,
count: std::sync::atomic::AtomicU64::new(0),
}
}
fn count(&self) -> u64 {
self.count.load(std::sync::atomic::Ordering::Relaxed)
}
}
impl<'a> OuterHessianOperator for HvApplyCounter<'a> {
fn dim(&self) -> usize {
self.inner.dim()
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
self.count
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
self.inner.matvec(v)
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
self.count
.fetch_add(factor.ncols() as u64, std::sync::atomic::Ordering::Relaxed);
self.inner.mul_mat(factor)
}
fn materialization_capability(&self) -> OuterHessianMaterialization {
self.inner.materialization_capability()
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Derivative {
Analytic,
Unavailable,
}
const SMALL_OUTER_BFGS_MAX_PARAMS: usize = 8;
const SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS: usize = 64;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct OuterThetaLayout {
pub n_params: usize,
pub psi_dim: usize,
}
impl OuterThetaLayout {
pub fn new(n_params: usize, psi_dim: usize) -> Self {
Self { n_params, psi_dim }
}
pub fn rho_dim(&self) -> usize {
self.n_params.saturating_sub(self.psi_dim)
}
fn validate_capability(&self, context: &str) -> Result<(), EstimationError> {
if self.psi_dim > self.n_params {
return Err(EstimationError::RemlOptimizationFailed(format!(
"{context}: invalid outer theta layout (psi_dim={} exceeds n_params={})",
self.psi_dim, self.n_params
)));
}
Ok(())
}
fn validate_point_len(
&self,
theta: &Array1<f64>,
context: &str,
) -> Result<(), ObjectiveEvalError> {
if theta.len() != self.n_params {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer theta length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
theta.len(),
self.n_params,
self.rho_dim(),
self.psi_dim
)));
}
Ok(())
}
fn validate_gradient_len(
&self,
gradient: &Array1<f64>,
context: &str,
) -> Result<(), ObjectiveEvalError> {
if gradient.len() != self.n_params {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer gradient length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
gradient.len(),
self.n_params,
self.rho_dim(),
self.psi_dim
)));
}
Ok(())
}
fn validate_hessian_shape(
&self,
hessian: &Array2<f64>,
context: &str,
) -> Result<(), ObjectiveEvalError> {
if hessian.nrows() != self.n_params || hessian.ncols() != self.n_params {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer Hessian shape mismatch: got {}x{}, expected {}x{} (rho_dim={}, psi_dim={})",
hessian.nrows(),
hessian.ncols(),
self.n_params,
self.n_params,
self.rho_dim(),
self.psi_dim
)));
}
Ok(())
}
fn validate_efs_eval(&self, eval: &EfsEval, context: &str) -> Result<(), ObjectiveEvalError> {
if eval.steps.len() != self.n_params {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer EFS step length mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
eval.steps.len(),
self.n_params,
self.rho_dim(),
self.psi_dim
)));
}
if let Some(ref psi_gradient) = eval.psi_gradient {
if psi_gradient.len() != self.psi_dim {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer EFS psi-gradient length mismatch: got {}, expected {}",
psi_gradient.len(),
self.psi_dim
)));
}
}
if let Some(ref psi_indices) = eval.psi_indices {
if psi_indices.len() != self.psi_dim {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer EFS psi-index count mismatch: got {}, expected {}",
psi_indices.len(),
self.psi_dim
)));
}
if psi_indices.iter().any(|&idx| idx >= self.n_params) {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer EFS psi index out of range for n_params={}",
self.n_params
)));
}
}
Ok(())
}
}
#[derive(Clone, Debug)]
pub struct OuterCapability {
pub gradient: Derivative,
pub hessian: Derivative,
pub n_params: usize,
pub psi_dim: usize,
pub fixed_point_available: bool,
pub barrier_config: Option<BarrierConfig>,
pub prefer_gradient_only: bool,
pub disable_fixed_point: bool,
}
impl OuterCapability {
pub fn theta_layout(&self) -> OuterThetaLayout {
OuterThetaLayout::new(self.n_params, self.psi_dim)
}
pub fn validate_layout(&self, context: &str) -> Result<(), EstimationError> {
self.theta_layout().validate_capability(context)
}
pub fn all_penalty_like(&self) -> bool {
self.psi_dim == 0
}
pub fn has_psi_coords(&self) -> bool {
self.psi_dim > 0
}
fn efs_plan_eligible(&self) -> bool {
self.fixed_point_available
&& !self.disable_fixed_point
&& self.all_penalty_like()
&& self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
}
fn hybrid_efs_plan_eligible(&self) -> bool {
self.fixed_point_available
&& !self.disable_fixed_point
&& self.has_psi_coords()
&& self.n_params > SMALL_OUTER_BFGS_MAX_PARAMS
}
fn declared_hessian_for_planning(&self) -> Derivative {
match self.hessian {
Derivative::Analytic => Derivative::Analytic,
Derivative::Unavailable => Derivative::Unavailable,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Solver {
Arc,
Bfgs,
Efs,
HybridEfs,
CompassSearch,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub enum SolverClass {
#[default]
Primary,
AuxiliaryGradientFree,
}
#[inline]
fn effective_seed_budget(
requested_budget: usize,
solver: Solver,
risk_profile: crate::seeding::SeedRiskProfile,
screening_enabled: bool,
) -> usize {
let requested_budget = requested_budget.max(1);
let capped = match (solver, risk_profile) {
(Solver::Efs | Solver::HybridEfs, _) => 1,
(Solver::Arc, crate::seeding::SeedRiskProfile::Survival) => 1,
(Solver::Arc, crate::seeding::SeedRiskProfile::GeneralizedLinear) if screening_enabled => 1,
(Solver::Arc, crate::seeding::SeedRiskProfile::GeneralizedLinear) => 2,
(Solver::CompassSearch, _) => 1,
_ => requested_budget,
};
requested_budget.min(capped)
}
#[inline]
fn should_screen_seeds(
config: &OuterConfig,
solver: Solver,
generated_seed_count: usize,
seed_budget: usize,
) -> bool {
config.screening_cap.is_some()
&& generated_seed_count > seed_budget
&& matches!(solver, Solver::Arc | Solver::Efs | Solver::HybridEfs)
}
#[inline]
fn expensive_unsuccessful_seed_limit(
solver: Solver,
risk_profile: crate::seeding::SeedRiskProfile,
) -> Option<usize> {
match (solver, risk_profile) {
(Solver::Efs | Solver::HybridEfs, _) => Some(1),
(Solver::Arc, crate::seeding::SeedRiskProfile::Survival) => Some(1),
(Solver::Arc, crate::seeding::SeedRiskProfile::GeneralizedLinear) => Some(2),
(Solver::CompassSearch, _) => Some(1),
_ => None,
}
}
const SEED_SCREENING_CASCADE_MULTIPLIERS: [usize; 3] = [1, 4, 16];
const SEED_SCREENING_UNCAPPED: usize = 0;
fn rank_seeds_with_screening(
obj: &mut dyn OuterObjective,
config: &OuterConfig,
context: &str,
seeds: &[Array1<f64>],
) -> Vec<Array1<f64>> {
let Some(screening_cap) = config.screening_cap.as_ref() else {
return seeds.to_vec();
};
let initial_cap = config.seed_config.screen_max_inner_iterations.max(1);
let previous_cap = screening_cap.swap(initial_cap, Ordering::Relaxed);
let cascade_caps = [
initial_cap.saturating_mul(SEED_SCREENING_CASCADE_MULTIPLIERS[0]),
initial_cap.saturating_mul(SEED_SCREENING_CASCADE_MULTIPLIERS[1]),
initial_cap.saturating_mul(SEED_SCREENING_CASCADE_MULTIPLIERS[2]),
SEED_SCREENING_UNCAPPED,
];
let mut ranked: Vec<(usize, f64)> = Vec::with_capacity(seeds.len());
let mut rejected = 0usize;
let mut final_cap_used = initial_cap;
let mut stages_consumed = 0usize;
let cascade_start = std::time::Instant::now();
for (stage, &cap) in cascade_caps.iter().enumerate() {
screening_cap.store(cap, Ordering::Relaxed);
ranked.clear();
rejected = 0;
for (idx, seed) in seeds.iter().enumerate() {
obj.reset();
match obj.eval_screening_proxy(seed) {
Ok(cost) if cost.is_finite() => ranked.push((idx, cost)),
Ok(_) | Err(_) => rejected += 1,
}
}
final_cap_used = cap;
stages_consumed = stage + 1;
if !ranked.is_empty() {
if stage > 0 {
log::info!(
"[OUTER] {context}: seed screening cap escalated from {} to {} \
(initial cap was too shallow for this problem; {}/{} seeds ranked)",
initial_cap,
if cap == 0 {
"uncapped".to_string()
} else {
cap.to_string()
},
ranked.len(),
seeds.len(),
);
}
break;
}
}
screening_cap.store(previous_cap, Ordering::Relaxed);
obj.reset();
log::info!(
"[OUTER] {context}: seed screening cascade complete elapsed={:.3}s stages_used={} final_cap={} ranked={}/{}",
cascade_start.elapsed().as_secs_f64(),
stages_consumed,
if final_cap_used == 0 {
"uncapped".to_string()
} else {
final_cap_used.to_string()
},
ranked.len(),
seeds.len(),
);
if ranked.is_empty() {
log::info!(
"[OUTER] {context}: no finite seed cost even with full inner budget \
({} seeds, {} rejected, {} cascade stages tried); keeping heuristic order",
seeds.len(),
rejected,
stages_consumed,
);
return seeds.to_vec();
}
ranked.sort_by(|(idx_a, cost_a), (idx_b, cost_b)| {
cost_a.total_cmp(cost_b).then_with(|| idx_a.cmp(idx_b))
});
let mut ordered = Vec::with_capacity(seeds.len());
let mut seen = vec![false; seeds.len()];
for (idx, _) in ranked {
seen[idx] = true;
ordered.push(seeds[idx].clone());
}
for (idx, seed) in seeds.iter().enumerate() {
if !seen[idx] {
ordered.push(seed.clone());
}
}
log::debug!(
"[OUTER] {context}: seed screening ranked {}/{} candidates at cap={} \
(initial cap={}, stages used={}); rejected={}",
ordered.len() - rejected,
seeds.len(),
if final_cap_used == 0 {
"uncapped".to_string()
} else {
final_cap_used.to_string()
},
initial_cap,
stages_consumed,
rejected,
);
ordered
}
#[inline]
fn candidate_improves_best(candidate: &OuterResult, best: Option<&OuterResult>) -> bool {
match best {
None => true,
Some(best) if candidate.converged != best.converged => candidate.converged,
Some(best) => candidate.final_value < best.final_value,
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum HessianSource {
Analytic,
BfgsApprox,
EfsFixedPoint,
HybridEfsFixedPoint,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OuterEvalOrder {
ValueAndGradient,
ValueGradientHessian,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct OuterPlan {
pub solver: Solver,
pub hessian_source: HessianSource,
}
pub(crate) const EFS_FIRST_ORDER_FALLBACK_MARKER: &str = "[outer-efs-first-order-fallback]";
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum FallbackPolicy {
Automatic,
Disabled,
}
impl std::fmt::Display for OuterPlan {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"solver={:?}, hessian_source={:?}",
self.solver, self.hessian_source
)
}
}
impl OuterPlan {
pub fn routing_log_line(&self) -> String {
let matrix_free = matches!(self.hessian_source, HessianSource::Analytic);
format!(
"solver={:?};hessian={:?};matrix-free={}",
self.solver, self.hessian_source, matrix_free
)
}
}
pub fn plan(cap: &OuterCapability) -> OuterPlan {
use Derivative::*;
use HessianSource as H;
use Solver as S;
match (cap.gradient, cap.declared_hessian_for_planning()) {
(Analytic, Analytic) => OuterPlan {
solver: S::Arc,
hessian_source: H::Analytic,
},
(Analytic, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
solver: S::Efs,
hessian_source: H::EfsFixedPoint,
},
(Unavailable, Unavailable) if cap.efs_plan_eligible() => OuterPlan {
solver: S::Efs,
hessian_source: H::EfsFixedPoint,
},
(Analytic, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
solver: S::HybridEfs,
hessian_source: H::HybridEfsFixedPoint,
},
(Unavailable, Unavailable) if cap.hybrid_efs_plan_eligible() => OuterPlan {
solver: S::HybridEfs,
hessian_source: H::HybridEfsFixedPoint,
},
(Analytic, Unavailable) => OuterPlan {
solver: S::Bfgs,
hessian_source: H::BfgsApprox,
},
(Unavailable, _) => OuterPlan {
solver: S::Bfgs,
hessian_source: H::BfgsApprox,
},
}
}
pub fn plan_with_class(cap: &OuterCapability, class: SolverClass) -> OuterPlan {
use Derivative::*;
if class == SolverClass::AuxiliaryGradientFree
&& cap.gradient == Unavailable
&& cap.declared_hessian_for_planning() == Unavailable
&& !cap.efs_plan_eligible()
&& !cap.hybrid_efs_plan_eligible()
{
return OuterPlan {
solver: Solver::CompassSearch,
hessian_source: HessianSource::BfgsApprox,
};
}
plan(cap)
}
pub fn log_plan(context: &str, cap: &OuterCapability, the_plan: &OuterPlan) {
let hess_warning = match the_plan.hessian_source {
HessianSource::BfgsApprox if cap.n_params > 0 => {
" [no Hessian: BFGS approximation]".to_string()
}
_ => String::new(),
};
let barrier_note = if cap.barrier_config.is_some() && cap.efs_plan_eligible() {
" [EFS with runtime barrier-curvature guard]"
} else {
""
};
let hybrid_note = if the_plan.solver == Solver::HybridEfs {
" [hybrid EFS(ρ) + preconditioned-gradient(ψ)]"
} else {
""
};
log::info!(
"[OUTER] {context}: n_params={}, gradient={:?}, hessian={:?} -> {} [{}]{hess_warning}{barrier_note}{hybrid_note}",
cap.n_params,
cap.gradient,
cap.hessian,
the_plan,
the_plan.routing_log_line(),
);
}
fn requests_immediate_first_order_fallback(message: &str) -> bool {
message.contains(EFS_FIRST_ORDER_FALLBACK_MARKER)
}
fn disable_fixed_point(cap: &OuterCapability) -> Option<OuterCapability> {
(!cap.disable_fixed_point && (cap.efs_plan_eligible() || cap.hybrid_efs_plan_eligible())).then(
|| {
let mut degraded = cap.clone();
degraded.disable_fixed_point = true;
degraded
},
)
}
fn automatic_fallback_attempts(cap: &OuterCapability) -> Vec<OuterCapability> {
let mut attempts = Vec::new();
if cap.gradient == Derivative::Analytic
&& matches!(plan(cap).solver, Solver::Efs | Solver::HybridEfs)
{
if let Some(no_fp_cap) = disable_fixed_point(cap) {
attempts.push(no_fp_cap.clone());
return attempts;
}
}
if matches!(plan(cap).solver, Solver::Arc) {
return attempts;
}
attempts
}
fn disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(
cap: &OuterCapability,
config: &OuterConfig,
) -> bool {
config.solver_class == SolverClass::Primary
&& config.fallback_policy == FallbackPolicy::Disabled
&& cap.gradient == Derivative::Analytic
&& matches!(plan(cap).solver, Solver::HybridEfs)
}
fn primary_capability_for_config(
mut cap: OuterCapability,
config: &OuterConfig,
context: &str,
) -> OuterCapability {
if disabled_fallback_hybrid_efs_has_standalone_bfgs_primary(&cap, config) {
log::info!(
"[OUTER] {context}: HybridEFS requires the automatic first-order \
escape path for ψ coordinates; fallback is disabled, so routing the \
primary attempt to analytic-gradient BFGS"
);
cap.disable_fixed_point = true;
}
cap
}
pub struct OuterEval {
pub cost: f64,
pub gradient: Array1<f64>,
pub hessian: HessianResult,
}
impl OuterEval {
pub fn infeasible(n_params: usize) -> Self {
Self {
cost: f64::INFINITY,
gradient: Array1::zeros(n_params),
hessian: HessianResult::Unavailable,
}
}
}
pub enum HessianResult {
Analytic(Array2<f64>),
Operator(Arc<dyn OuterHessianOperator>),
Unavailable,
}
impl Clone for OuterEval {
fn clone(&self) -> Self {
Self {
cost: self.cost,
gradient: self.gradient.clone(),
hessian: self.hessian.clone(),
}
}
}
impl std::fmt::Debug for OuterEval {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OuterEval")
.field("cost", &self.cost)
.field("gradient", &self.gradient)
.field("hessian", &self.hessian)
.finish()
}
}
impl Clone for HessianResult {
fn clone(&self) -> Self {
match self {
Self::Analytic(h) => Self::Analytic(h.clone()),
Self::Operator(op) => Self::Operator(Arc::clone(op)),
Self::Unavailable => Self::Unavailable,
}
}
}
impl std::fmt::Debug for HessianResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Analytic(h) => f
.debug_tuple("Analytic")
.field(&format!("{}x{}", h.nrows(), h.ncols()))
.finish(),
Self::Operator(op) => f
.debug_tuple("Operator")
.field(&format!("dim={}", op.dim()))
.finish(),
Self::Unavailable => f.write_str("Unavailable"),
}
}
}
impl HessianResult {
pub fn unwrap_analytic(self) -> Array2<f64> {
match self {
HessianResult::Analytic(h) => h,
HessianResult::Operator(_) => {
panic!("expected dense analytic Hessian but got HessianResult::Operator")
}
HessianResult::Unavailable => {
panic!("expected analytic Hessian but got HessianResult::Unavailable")
}
}
}
pub fn is_analytic(&self) -> bool {
matches!(
self,
HessianResult::Analytic(_) | HessianResult::Operator(_)
)
}
pub fn into_option(self) -> Option<Array2<f64>> {
match self {
HessianResult::Analytic(h) => Some(h),
HessianResult::Operator(_) => None,
HessianResult::Unavailable => None,
}
}
pub fn dim(&self) -> Option<usize> {
match self {
HessianResult::Analytic(h) => Some(h.nrows()),
HessianResult::Operator(op) => Some(op.dim()),
HessianResult::Unavailable => None,
}
}
pub fn materialize_dense(&self) -> Result<Option<Array2<f64>>, String> {
match self {
HessianResult::Analytic(h) => Ok(Some(h.clone())),
HessianResult::Operator(op) => op.materialize_dense().map(Some),
HessianResult::Unavailable => Ok(None),
}
}
pub fn add_rho_block_dense(&mut self, rho_block: &Array2<f64>) -> Result<(), String> {
if rho_block.nrows() != rho_block.ncols() {
return Err(format!(
"rho-block Hessian update must be square, got {}x{}",
rho_block.nrows(),
rho_block.ncols()
));
}
match self {
HessianResult::Analytic(h) => {
if rho_block.nrows() > h.nrows() || rho_block.ncols() > h.ncols() {
return Err(format!(
"rho-block Hessian update shape mismatch: got {}x{}, outer Hessian is {}x{}",
rho_block.nrows(),
rho_block.ncols(),
h.nrows(),
h.ncols()
));
}
let k = rho_block.nrows();
let mut sl = h.slice_mut(ndarray::s![..k, ..k]);
sl += rho_block;
Ok(())
}
HessianResult::Operator(op) => {
let base = Arc::clone(op);
let dim = base.dim();
if rho_block.nrows() > dim {
return Err(format!(
"rho-block Hessian update dimension mismatch: got {}x{}, operator dim is {}",
rho_block.nrows(),
rho_block.ncols(),
dim
));
}
*self = HessianResult::Operator(Arc::new(RhoBlockAdditiveOuterHessian {
base,
rho_block: rho_block.clone(),
dim,
}));
Ok(())
}
HessianResult::Unavailable => Ok(()),
}
}
}
#[derive(Clone, Debug)]
pub struct EfsEval {
pub cost: f64,
pub steps: Vec<f64>,
pub beta: Option<Array1<f64>>,
pub psi_gradient: Option<Array1<f64>>,
pub psi_indices: Option<Vec<usize>>,
}
pub trait OuterObjective {
fn capability(&self) -> OuterCapability;
fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError>;
fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
self.eval_cost(rho)
}
fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError>;
fn eval_with_order(
&mut self,
rho: &Array1<f64>,
order: OuterEvalOrder,
) -> Result<OuterEval, EstimationError> {
match order {
OuterEvalOrder::ValueAndGradient | OuterEvalOrder::ValueGradientHessian => {
self.eval(rho)
}
}
}
fn eval_efs(&mut self, _: &Array1<f64>) -> Result<EfsEval, EstimationError> {
Err(EstimationError::RemlOptimizationFailed(
"EFS evaluation not implemented for this objective".to_string(),
))
}
fn reset(&mut self);
}
pub struct ClosureObjective<
S,
Fc,
Fe,
Fr = fn(&mut S),
Fefs = fn(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
Feo = fn(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
Fsp = fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
> {
pub(crate) state: S,
pub(crate) cap: OuterCapability,
pub(crate) cost_fn: Fc,
pub(crate) eval_fn: Fe,
pub(crate) eval_order_fn: Option<Feo>,
pub(crate) reset_fn: Option<Fr>,
pub(crate) efs_fn: Option<Fefs>,
pub(crate) screening_proxy_fn: Option<Fsp>,
}
impl<S, Fc, Fe, Fr, Fefs, Feo, Fsp> OuterObjective
for ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp>
where
Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
Fr: FnMut(&mut S),
Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
{
fn capability(&self) -> OuterCapability {
self.cap.clone()
}
fn eval_cost(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
(self.cost_fn)(&mut self.state, rho)
}
fn eval_screening_proxy(&mut self, rho: &Array1<f64>) -> Result<f64, EstimationError> {
match self.screening_proxy_fn.as_mut() {
Some(f) => f(&mut self.state, rho),
None => (self.cost_fn)(&mut self.state, rho),
}
}
fn eval(&mut self, rho: &Array1<f64>) -> Result<OuterEval, EstimationError> {
(self.eval_fn)(&mut self.state, rho)
}
fn eval_with_order(
&mut self,
rho: &Array1<f64>,
order: OuterEvalOrder,
) -> Result<OuterEval, EstimationError> {
match self.eval_order_fn.as_mut() {
Some(f) => f(&mut self.state, rho, order),
None => (self.eval_fn)(&mut self.state, rho),
}
}
fn eval_efs(&mut self, rho: &Array1<f64>) -> Result<EfsEval, EstimationError> {
match self.efs_fn.as_mut() {
Some(f) => f(&mut self.state, rho),
None => Err(EstimationError::RemlOptimizationFailed(
"EFS evaluation not implemented for this objective".to_string(),
)),
}
}
fn reset(&mut self) {
if let Some(f) = self.reset_fn.as_mut() {
f(&mut self.state);
}
}
}
fn into_objective_error(context: &str, err: EstimationError) -> ObjectiveEvalError {
ObjectiveEvalError::recoverable(format!("{context}: {err}"))
}
fn finite_cost_or_error(context: &str, cost: f64) -> Result<f64, ObjectiveEvalError> {
if cost.is_finite() {
Ok(cost)
} else {
Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite cost"
)))
}
}
fn finite_outer_eval_or_error(
context: &str,
layout: OuterThetaLayout,
eval: OuterEval,
) -> Result<OuterEval, ObjectiveEvalError> {
layout.validate_gradient_len(&eval.gradient, context)?;
if !eval.cost.is_finite() {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite cost"
)));
}
if !eval.gradient.iter().all(|v| v.is_finite()) {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite gradient"
)));
}
match &eval.hessian {
HessianResult::Analytic(hessian) => {
layout.validate_hessian_shape(hessian, context)?;
if !hessian.iter().all(|v| v.is_finite()) {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite Hessian"
)));
}
}
HessianResult::Operator(op) => {
if op.dim() != layout.n_params {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: outer Hessian operator dimension mismatch: got {}, expected {} (rho_dim={}, psi_dim={})",
op.dim(),
layout.n_params,
layout.rho_dim(),
layout.psi_dim
)));
}
}
HessianResult::Unavailable => {}
}
Ok(eval)
}
fn finite_outer_first_order_eval_or_error(
context: &str,
layout: OuterThetaLayout,
eval: OuterEval,
) -> Result<OuterEval, ObjectiveEvalError> {
layout.validate_gradient_len(&eval.gradient, context)?;
if !eval.cost.is_finite() {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite cost"
)));
}
if !eval.gradient.iter().all(|v| v.is_finite()) {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite gradient"
)));
}
Ok(eval)
}
fn validate_second_order_seed_hessian(
context: &str,
layout: OuterThetaLayout,
eval: &OuterEval,
) -> Result<(), ObjectiveEvalError> {
if layout.n_params > SECOND_ORDER_GEOMETRY_PROBE_MAX_PARAMS || !eval.hessian.is_analytic() {
return Ok(());
}
if matches!(
&eval.hessian,
HessianResult::Operator(op) if !op.materialization_capability().is_available()
) {
return Ok(());
}
let Some(hessian) = eval.hessian.materialize_dense().map_err(|message| {
ObjectiveEvalError::recoverable(format!(
"{context}: analytic outer Hessian materialization failed during second-order seed validation: {message}"
))
})?
else {
return Ok(());
};
layout.validate_hessian_shape(&hessian, context)?;
if !hessian.iter().all(|value| value.is_finite()) {
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: analytic outer Hessian probe encountered non-finite entries"
)));
}
Ok(())
}
fn finite_efs_eval_or_error(
context: &str,
layout: OuterThetaLayout,
eval: EfsEval,
) -> Result<EfsEval, ObjectiveEvalError> {
layout.validate_efs_eval(&eval, context)?;
finite_cost_or_error(context, eval.cost)?;
if let Some((idx, value)) = eval.steps.iter().enumerate().find(|(_, v)| !v.is_finite()) {
let coord_kind = match eval.psi_indices.as_deref() {
Some(indices) if indices.contains(&idx) => "ψ",
Some(_) => "ρ/τ",
None => "ρ",
};
return Err(ObjectiveEvalError::recoverable(format!(
"{context}: objective returned a non-finite {coord_kind} EFS step at \
coord {idx} (step[{idx}]={value}, rho_dim={}, psi_dim={}, n_params={})",
layout.rho_dim(),
layout.psi_dim,
layout.n_params,
)));
}
Ok(eval)
}
struct OuterFirstOrderBridge<'a> {
obj: &'a mut dyn OuterObjective,
layout: OuterThetaLayout,
outer_inner_cap: Option<InnerProgressFeedback>,
iter_count: usize,
g_norm_initial: Option<f64>,
last_g_norm: Option<f64>,
}
impl ZerothOrderObjective for OuterFirstOrderBridge<'_> {
fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
self.layout
.validate_point_len(x, "outer eval_cost failed")?;
let cost = self
.obj
.eval_cost(x)
.map_err(|err| into_objective_error("outer eval_cost failed", err))?;
finite_cost_or_error("outer eval_cost failed", cost)
}
}
impl FirstOrderObjective for OuterFirstOrderBridge<'_> {
fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
self.layout.validate_point_len(x, "outer eval failed")?;
if let Some(feedback) = self.outer_inner_cap.as_ref() {
let g_ratio = match (self.last_g_norm, self.g_norm_initial) {
(Some(g), Some(g0)) if g0 > 0.0 => Some(g / g0),
_ => None,
};
let snapshot = feedback.snapshot();
let cap = first_order_inner_cap_schedule(self.iter_count, g_ratio, snapshot);
let prev = feedback.cap.swap(cap, Ordering::Relaxed);
if prev != cap {
let ratio_str = match g_ratio {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
};
let snap_str = match snapshot {
Some(s) => format!(
"last_iters={} converged={} ift_residual={} accept_rho={}",
s.last_iters,
s.last_converged,
match s.last_ift_residual {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
},
match s.last_accept_rho {
Some(r) => format!("{:.3}", r),
None => "n/a".to_string(),
},
),
None => "no-history".to_string(),
};
log::info!(
"[OUTER schedule] inner-PIRLS cap transition iter={} g_ratio={} {} prev={} new={} ({})",
self.iter_count,
ratio_str,
snap_str,
prev,
cap,
if cap == 0 { "uncapped" } else { "capped" }
);
}
}
let stage_start = std::time::Instant::now();
log::info!(
"[STAGE] outer eval start order=ValueAndGradient dim={} (first-order bridge, iter={})",
x.len(),
self.iter_count
);
let eval = self
.obj
.eval_with_order(x, OuterEvalOrder::ValueAndGradient)
.map_err(|err| into_objective_error("outer eval failed", err))?;
let eval = finite_outer_first_order_eval_or_error("outer eval failed", self.layout, eval)?;
let g_norm = eval.gradient.iter().map(|v| v * v).sum::<f64>().sqrt();
if self.g_norm_initial.is_none() && g_norm.is_finite() && g_norm > 0.0 {
self.g_norm_initial = Some(g_norm);
}
if g_norm.is_finite() {
self.last_g_norm = Some(g_norm);
}
log::info!(
"[STAGE] outer eval end order=ValueAndGradient elapsed={:.3}s cost={:.6e} |g|={:.3e} (first-order bridge, iter={})",
stage_start.elapsed().as_secs_f64(),
eval.cost,
g_norm,
self.iter_count,
);
self.iter_count = self.iter_count.saturating_add(1);
Ok(FirstOrderSample {
value: eval.cost,
gradient: eval.gradient,
})
}
}
fn first_order_inner_cap_schedule(
iter_count: usize,
g_ratio: Option<f64>,
last: Option<InnerProgressSnapshot>,
) -> usize {
if matches!(g_ratio, Some(r) if r < 0.01) {
return 0;
}
if let Some(snap) = last {
let next = if snap.last_converged {
let mut margin = match snap.last_ift_residual {
Some(r) if r < 0.01 => 1usize,
Some(r) if r >= 0.10 => 4usize,
_ => 2usize,
};
if matches!(snap.last_accept_rho, Some(r) if r < 0.5) {
margin = margin.saturating_add(2);
}
snap.last_iters.saturating_add(margin)
} else {
let multiplier = if matches!(snap.last_accept_rho, Some(r) if r < 0.3) {
3
} else {
2
};
snap.last_iters
.saturating_mul(multiplier)
.max(snap.last_iters.saturating_add(4))
};
return next.clamp(3, 64);
}
match iter_count {
0 => 3,
1 => 5,
_ => 10,
}
}
#[cfg(test)]
mod outer_inner_cap_schedule_tests {
use super::{InnerProgressSnapshot, first_order_inner_cap_schedule};
fn snap(last_iters: usize, last_converged: bool) -> Option<InnerProgressSnapshot> {
Some(InnerProgressSnapshot {
last_iters,
last_converged,
last_ift_residual: None,
last_accept_rho: None,
})
}
fn snap_with_accept_rho(
last_iters: usize,
last_converged: bool,
accept_rho: f64,
) -> Option<InnerProgressSnapshot> {
Some(InnerProgressSnapshot {
last_iters,
last_converged,
last_ift_residual: None,
last_accept_rho: Some(accept_rho),
})
}
fn snap_with_residual(
last_iters: usize,
last_converged: bool,
residual: f64,
) -> Option<InnerProgressSnapshot> {
Some(InnerProgressSnapshot {
last_iters,
last_converged,
last_ift_residual: Some(residual),
last_accept_rho: None,
})
}
#[test]
fn snapshot_distinguishes_zero_residual_from_no_signal() {
use super::InnerProgressFeedback;
use crate::solver::estimate::reml::runtime::IFT_RESIDUAL_NO_SIGNAL_BITS;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize};
let make_feedback =
|iters: usize, converged: bool, residual_bits: u64| InnerProgressFeedback {
cap: Arc::new(AtomicUsize::new(0)),
last_iters: Arc::new(AtomicUsize::new(iters)),
last_converged: Arc::new(AtomicBool::new(converged)),
ift_residual: Arc::new(AtomicU64::new(residual_bits)),
accept_rho: Arc::new(AtomicU64::new(
crate::solver::estimate::reml::runtime::IFT_RESIDUAL_NO_SIGNAL_BITS,
)),
};
let fb = make_feedback(5, true, IFT_RESIDUAL_NO_SIGNAL_BITS);
let snap = fb.snapshot().expect("iters > 0, snapshot present");
assert!(
snap.last_ift_residual.is_none(),
"sentinel must decode to None"
);
let fb = make_feedback(5, true, 0.0_f64.to_bits());
let snap = fb.snapshot().expect("iters > 0, snapshot present");
assert_eq!(
snap.last_ift_residual,
Some(0.0),
"residual of exactly 0.0 must round-trip as a real signal, \
not be confused with the no-signal sentinel",
);
let fb = make_feedback(5, true, 0.05_f64.to_bits());
let snap = fb.snapshot().expect("snapshot present");
assert_eq!(snap.last_ift_residual, Some(0.05));
let fb = make_feedback(0, false, IFT_RESIDUAL_NO_SIGNAL_BITS);
assert!(fb.snapshot().is_none());
}
#[test]
fn schedule_falls_back_to_iter_tier_without_feedback() {
assert_eq!(first_order_inner_cap_schedule(0, None, None), 3);
assert_eq!(first_order_inner_cap_schedule(1, None, None), 5);
assert_eq!(first_order_inner_cap_schedule(2, None, None), 10);
assert_eq!(first_order_inner_cap_schedule(20, None, None), 10);
}
#[test]
fn schedule_uses_last_iters_plus_margin_when_converged() {
assert_eq!(first_order_inner_cap_schedule(2, None, snap(4, true)), 6);
assert_eq!(first_order_inner_cap_schedule(5, None, snap(12, true)), 14);
}
#[test]
fn schedule_geometric_backoff_when_last_hit_cap() {
assert_eq!(first_order_inner_cap_schedule(2, None, snap(5, false)), 10);
assert_eq!(first_order_inner_cap_schedule(2, None, snap(1, false)), 5);
assert_eq!(first_order_inner_cap_schedule(2, None, snap(30, false)), 60);
}
#[test]
fn schedule_clamps_floor_and_ceiling() {
assert_eq!(first_order_inner_cap_schedule(2, None, snap(0, true)), 3);
assert_eq!(first_order_inner_cap_schedule(2, None, snap(100, true)), 64);
}
#[test]
fn schedule_uncaps_when_outer_converged() {
assert_eq!(first_order_inner_cap_schedule(0, Some(0.0001), None), 0);
assert_eq!(
first_order_inner_cap_schedule(0, Some(0.005), snap(4, true)),
0
);
assert_eq!(
first_order_inner_cap_schedule(20, Some(0.001), snap(50, false)),
0
);
}
#[test]
fn schedule_ignores_modest_g_ratio_decay() {
assert_eq!(
first_order_inner_cap_schedule(2, Some(0.30), snap(4, true)),
6
);
assert_eq!(
first_order_inner_cap_schedule(2, Some(0.05), snap(4, true)),
6
);
}
#[test]
fn schedule_uses_ift_residual_to_pick_margin() {
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, 0.005)),
5
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, 0.0001)),
5
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, 0.05)),
6
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, 0.20)),
8
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, 0.80)),
8
);
let residuals = [0.001, 0.05, 0.30];
let caps: Vec<usize> = residuals
.iter()
.map(|&r| first_order_inner_cap_schedule(2, None, snap_with_residual(4, true, r)))
.collect();
for w in caps.windows(2) {
assert!(
w[0] <= w[1],
"ift-residual margin policy regressed monotonicity: {caps:?}"
);
}
}
#[test]
fn schedule_bumps_margin_on_poor_lm_accept_rho() {
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_accept_rho(4, true, 0.95)),
6
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_accept_rho(4, true, 0.5)),
6
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_accept_rho(4, true, 0.4)),
8
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_accept_rho(4, true, 0.1)),
8
);
assert_eq!(
first_order_inner_cap_schedule(2, None, snap_with_accept_rho(4, true, 0.49)),
8
);
}
#[test]
fn schedule_escalates_geometric_backoff_on_very_poor_accept_rho() {
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: false,
last_ift_residual: None,
last_accept_rho: Some(0.15),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 12);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: false,
last_ift_residual: None,
last_accept_rho: Some(0.4),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 8);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: false,
last_ift_residual: None,
last_accept_rho: Some(0.9),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 8);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: false,
last_ift_residual: None,
last_accept_rho: None,
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 8);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: false,
last_ift_residual: None,
last_accept_rho: Some(0.3),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 8);
}
#[test]
fn schedule_skips_lm_accept_rho_bump_when_signal_absent() {
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: true,
last_ift_residual: None,
last_accept_rho: None,
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 6);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: true,
last_ift_residual: None,
last_accept_rho: Some(0.5),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 6);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: true,
last_ift_residual: None,
last_accept_rho: Some(1.0),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 6);
}
#[test]
fn schedule_combines_ift_residual_and_lm_accept_rho() {
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: true,
last_ift_residual: Some(0.30),
last_accept_rho: Some(0.20),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 10);
let snap = Some(InnerProgressSnapshot {
last_iters: 4,
last_converged: true,
last_ift_residual: Some(0.005),
last_accept_rho: Some(0.30),
});
assert_eq!(first_order_inner_cap_schedule(2, None, snap), 7);
}
}
struct OuterSecondOrderBridge<'a> {
obj: &'a mut dyn OuterObjective,
layout: OuterThetaLayout,
hessian_source: HessianSource,
materialize_operator_max_dim: usize,
eval_count: usize,
outer_inner_cap: Option<InnerProgressFeedback>,
g_norm_initial: Option<f64>,
last_g_norm: Option<f64>,
}
impl ZerothOrderObjective for OuterSecondOrderBridge<'_> {
fn eval_cost(&mut self, x: &Array1<f64>) -> Result<f64, ObjectiveEvalError> {
self.layout
.validate_point_len(x, "outer eval_cost failed")?;
let cost = self
.obj
.eval_cost(x)
.map_err(|err| into_objective_error("outer eval_cost failed", err))?;
finite_cost_or_error("outer eval_cost failed", cost)
}
}
impl FirstOrderObjective for OuterSecondOrderBridge<'_> {
fn eval_grad(&mut self, x: &Array1<f64>) -> Result<FirstOrderSample, ObjectiveEvalError> {
self.layout.validate_point_len(x, "outer eval failed")?;
if let Some(feedback) = self.outer_inner_cap.as_ref() {
let arc_iter = self.eval_count / 2;
let g_ratio = match (self.last_g_norm, self.g_norm_initial) {
(Some(g), Some(g0)) if g0 > 0.0 => Some(g / g0),
_ => None,
};
let snapshot = feedback.snapshot();
let cap = first_order_inner_cap_schedule(arc_iter, g_ratio, snapshot);
let prev = feedback.cap.swap(cap, Ordering::Relaxed);
if prev != cap {
let ratio_str = match g_ratio {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
};
let snap_str = match snapshot {
Some(s) => format!(
"last_iters={} converged={} ift_residual={} accept_rho={}",
s.last_iters,
s.last_converged,
match s.last_ift_residual {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
},
match s.last_accept_rho {
Some(r) => format!("{:.3}", r),
None => "n/a".to_string(),
},
),
None => "no-history".to_string(),
};
log::info!(
"[OUTER schedule] inner-PIRLS cap transition (ARC bridge) arc_iter={} g_ratio={} {} prev={} new={} ({})",
arc_iter,
ratio_str,
snap_str,
prev,
cap,
if cap == 0 { "uncapped" } else { "capped" }
);
}
}
let stage_start = std::time::Instant::now();
log::info!(
"[STAGE] outer eval start order=ValueAndGradient dim={}",
x.len()
);
let eval = self
.obj
.eval_with_order(x, OuterEvalOrder::ValueAndGradient)
.map_err(|err| into_objective_error("outer eval failed", err))?;
let eval = finite_outer_first_order_eval_or_error("outer eval failed", self.layout, eval)?;
self.eval_count += 1;
let g_norm = eval.gradient.iter().map(|v| v * v).sum::<f64>().sqrt();
if self.g_norm_initial.is_none() && g_norm.is_finite() && g_norm > 0.0 {
self.g_norm_initial = Some(g_norm);
}
if g_norm.is_finite() {
self.last_g_norm = Some(g_norm);
}
log::info!(
"[STAGE] outer eval end order=ValueAndGradient elapsed={:.3}s cost={:.6e} |g|={:.3e}",
stage_start.elapsed().as_secs_f64(),
eval.cost,
g_norm,
);
log::info!(
"[OUTER] eval#{n} (grad) cost={cost:.6e} |g|={gnorm:.3e}",
n = self.eval_count,
cost = eval.cost,
gnorm = g_norm,
);
Ok(FirstOrderSample {
value: eval.cost,
gradient: eval.gradient,
})
}
}
impl SecondOrderObjective for OuterSecondOrderBridge<'_> {
fn eval_hessian(&mut self, x: &Array1<f64>) -> Result<SecondOrderSample, ObjectiveEvalError> {
self.layout.validate_point_len(x, "outer eval failed")?;
if let Some(feedback) = self.outer_inner_cap.as_ref() {
let arc_iter = self.eval_count / 2;
let g_ratio = match (self.last_g_norm, self.g_norm_initial) {
(Some(g), Some(g0)) if g0 > 0.0 => Some(g / g0),
_ => None,
};
let snapshot = feedback.snapshot();
let cap = first_order_inner_cap_schedule(arc_iter, g_ratio, snapshot);
let prev = feedback.cap.swap(cap, Ordering::Relaxed);
if prev != cap {
let ratio_str = match g_ratio {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
};
let snap_str = match snapshot {
Some(s) => format!(
"last_iters={} converged={} ift_residual={} accept_rho={}",
s.last_iters,
s.last_converged,
match s.last_ift_residual {
Some(r) => format!("{:.3e}", r),
None => "n/a".to_string(),
},
match s.last_accept_rho {
Some(r) => format!("{:.3}", r),
None => "n/a".to_string(),
},
),
None => "no-history".to_string(),
};
log::info!(
"[OUTER schedule] inner-PIRLS cap transition (ARC bridge) arc_iter={} g_ratio={} {} prev={} new={} ({})",
arc_iter,
ratio_str,
snap_str,
prev,
cap,
if cap == 0 { "uncapped" } else { "capped" }
);
}
}
let stage_start = std::time::Instant::now();
log::info!(
"[STAGE] outer eval start order=ValueGradientHessian dim={}",
x.len()
);
let eval = self
.obj
.eval_with_order(x, OuterEvalOrder::ValueGradientHessian)
.map_err(|err| into_objective_error("outer eval failed", err))?;
let eval = finite_outer_eval_or_error("outer eval failed", self.layout, eval)?;
self.eval_count += 1;
let g_norm = eval.gradient.iter().map(|v| v * v).sum::<f64>().sqrt();
if self.g_norm_initial.is_none() && g_norm.is_finite() && g_norm > 0.0 {
self.g_norm_initial = Some(g_norm);
}
if g_norm.is_finite() {
self.last_g_norm = Some(g_norm);
}
log::info!(
"[STAGE] outer eval end order=ValueGradientHessian elapsed={:.3}s cost={:.6e} |g|={:.3e}",
stage_start.elapsed().as_secs_f64(),
eval.cost,
g_norm,
);
log::info!(
"[OUTER] eval#{n} (hess) cost={cost:.6e} |g|={gnorm:.3e}",
n = self.eval_count,
cost = eval.cost,
gnorm = g_norm,
);
let hessian = match self.hessian_source {
HessianSource::Analytic => match eval.hessian {
HessianResult::Analytic(h) => Some(h),
HessianResult::Operator(ref op)
if op.materialization_capability().is_available()
&& op.dim() <= self.materialize_operator_max_dim =>
{
Some(
op.materialize_dense()
.map_err(|message| ObjectiveEvalError::Fatal {
message: format!(
"outer Hessian operator materialization failed: {message}"
),
})?,
)
}
HessianResult::Operator(_) | HessianResult::Unavailable => None,
},
HessianSource::BfgsApprox
| HessianSource::EfsFixedPoint
| HessianSource::HybridEfsFixedPoint => None,
};
Ok(SecondOrderSample {
value: eval.cost,
gradient: eval.gradient,
hessian,
})
}
}
struct OuterFixedPointBridge<'a> {
obj: &'a mut dyn OuterObjective,
layout: OuterThetaLayout,
barrier_config: Option<BarrierConfig>,
consecutive_psi_zero_iters: usize,
}
const MAX_EFS_BACKTRACK: usize = 8;
const EFS_NEGLIGIBLE_STEP: f64 = 1e-12;
const EFS_LINESEARCH_THRESHOLD: f64 = 0.5;
const EFS_COST_DESCENT_TOL: f64 = 1e-12;
const MAX_CONSECUTIVE_PSI_STAGNATION: usize = 1;
impl FixedPointObjective for OuterFixedPointBridge<'_> {
fn eval_step(&mut self, x: &Array1<f64>) -> Result<FixedPointSample, ObjectiveEvalError> {
self.layout.validate_point_len(x, "outer EFS eval failed")?;
let eval = self
.obj
.eval_efs(x)
.map_err(|err| into_objective_error("outer EFS eval failed", err))?;
self.layout
.validate_efs_eval(&eval, "outer EFS eval failed")?;
if !eval.cost.is_finite() {
return Err(ObjectiveEvalError::recoverable(
"outer EFS eval failed: objective returned a non-finite cost".to_string(),
));
}
if let Some((idx, value)) = eval.steps.iter().enumerate().find(|(_, v)| !v.is_finite()) {
let psi_indices = eval.psi_indices.as_deref();
let coord_kind = match psi_indices {
Some(indices) if indices.contains(&idx) => "ψ",
Some(_) => "ρ/τ",
None => "ρ",
};
return Err(ObjectiveEvalError::recoverable(format!(
"outer EFS eval failed: non-finite {coord_kind} step at coord {idx} \
(step[{idx}]={value}, rho_dim={}, psi_dim={}, n_params={}, cost={:.6e})",
self.layout.rho_dim(),
self.layout.psi_dim,
self.layout.n_params,
eval.cost,
)));
}
let status = if let Some(ref barrier_cfg) = self.barrier_config {
if let Some(ref beta) = eval.beta {
let ref_diag = 1.0;
let threshold = 0.01;
if barrier_cfg.barrier_curvature_is_significant(beta, ref_diag, threshold) {
FixedPointStatus::Stop
} else {
FixedPointStatus::Continue
}
} else {
FixedPointStatus::Continue
}
} else {
FixedPointStatus::Continue
};
if matches!(status, FixedPointStatus::Stop) {
return Ok(FixedPointSample {
value: eval.cost,
step: Array1::zeros(x.len()),
status,
});
}
let raw_step = Array1::from_vec(eval.steps);
let psi_indices = eval.psi_indices.clone();
let max_step_abs = raw_step.iter().map(|s| s.abs()).fold(0.0_f64, f64::max);
let current_cost = eval.cost;
if max_step_abs < EFS_NEGLIGIBLE_STEP {
if psi_indices.is_some() {
self.consecutive_psi_zero_iters = 0;
}
return Ok(FixedPointSample {
value: current_cost,
step: raw_step,
status,
});
}
if max_step_abs < EFS_LINESEARCH_THRESHOLD {
if psi_indices.is_some() {
self.consecutive_psi_zero_iters = 0;
}
return Ok(FixedPointSample {
value: current_cost,
step: raw_step,
status,
});
}
if let Some(scaled) = self.efs_backtrack(x, &raw_step, current_cost, MAX_EFS_BACKTRACK)? {
if psi_indices.is_some() {
self.consecutive_psi_zero_iters = 0;
}
return Ok(FixedPointSample {
value: current_cost,
step: scaled,
status,
});
}
if let Some(psi_idx) = psi_indices.as_ref() {
let mut rho_only = raw_step.clone();
for &i in psi_idx {
rho_only[i] = 0.0;
}
let max_rho_abs = rho_only.iter().map(|s| s.abs()).fold(0.0_f64, f64::max);
if max_rho_abs >= EFS_NEGLIGIBLE_STEP {
if let Some(scaled) =
self.efs_backtrack(x, &rho_only, current_cost, MAX_EFS_BACKTRACK)?
{
self.consecutive_psi_zero_iters =
self.consecutive_psi_zero_iters.saturating_add(1);
log::info!(
"[HYBRID-EFS] full-vector backtrack exhausted; ρ/τ-only step \
accepted. Consecutive ψ-zero iters = {}",
self.consecutive_psi_zero_iters,
);
if self.consecutive_psi_zero_iters >= MAX_CONSECUTIVE_PSI_STAGNATION {
log::info!(
"[STAGE] HybridEFS -> joint gradient (BFGS/L-BFGS) fallback: \
{} consecutive ψ-zero iterations after exhausted backtracking \
(rho_dim={}, psi_dim={}, n_params={}, cost={:.6e})",
self.consecutive_psi_zero_iters,
self.layout.rho_dim(),
self.layout.psi_dim,
self.layout.n_params,
current_cost,
);
return Err(ObjectiveEvalError::recoverable(format!(
"{} HybridEFS ψ stagnation: {} consecutive iterations \
exhausted backtracking and zeroed ψ step \
(rho_dim={}, psi_dim={}, n_params={}, cost={:.6e})",
EFS_FIRST_ORDER_FALLBACK_MARKER,
self.consecutive_psi_zero_iters,
self.layout.rho_dim(),
self.layout.psi_dim,
self.layout.n_params,
current_cost,
)));
}
return Ok(FixedPointSample {
value: current_cost,
step: scaled,
status,
});
}
}
log::info!(
"[STAGE] HybridEFS -> joint gradient fallback: ρ/τ-only step also \
failed all {} halvings (rho_dim={}, psi_dim={}, n_params={}, \
cost={:.6e})",
MAX_EFS_BACKTRACK,
self.layout.rho_dim(),
self.layout.psi_dim,
self.layout.n_params,
current_cost,
);
return Err(ObjectiveEvalError::recoverable(format!(
"{} HybridEFS step rejected after {} halvings on full vector \
and {} halvings on ρ/τ-only fallback \
(rho_dim={}, psi_dim={}, n_params={}, cost={:.6e})",
EFS_FIRST_ORDER_FALLBACK_MARKER,
MAX_EFS_BACKTRACK,
MAX_EFS_BACKTRACK,
self.layout.rho_dim(),
self.layout.psi_dim,
self.layout.n_params,
current_cost,
)));
}
log::info!(
"[STAGE] EFS -> gradient fallback: no α ∈ {{1, …, 2^-{}}} decreased the \
cost (rho_dim={}, n_params={}, cost={:.6e})",
MAX_EFS_BACKTRACK,
self.layout.rho_dim(),
self.layout.n_params,
current_cost,
);
Err(ObjectiveEvalError::recoverable(format!(
"{} EFS step rejected after {} halvings on pure-ρ vector \
(rho_dim={}, n_params={}, cost={:.6e})",
EFS_FIRST_ORDER_FALLBACK_MARKER,
MAX_EFS_BACKTRACK,
self.layout.rho_dim(),
self.layout.n_params,
current_cost,
)))
}
}
impl OuterFixedPointBridge<'_> {
fn efs_backtrack(
&mut self,
x: &Array1<f64>,
raw_step: &Array1<f64>,
current_cost: f64,
max_halvings: usize,
) -> Result<Option<Array1<f64>>, ObjectiveEvalError> {
let cost_floor = current_cost + EFS_COST_DESCENT_TOL * current_cost.abs().max(1.0);
let mut alpha = 1.0_f64;
for bt in 0..=max_halvings {
let trial_step = raw_step * alpha;
let trial = x + &trial_step;
match self.obj.eval_cost(&trial) {
Ok(c) if c.is_finite() && c <= cost_floor => {
if bt > 0 {
log::debug!(
"[EFS] backtrack accepted at α=2^-{bt}={alpha:.4e} \
after {bt} halvings (cost: {current_cost:.6e} → {c:.6e})"
);
}
return Ok(Some(trial_step));
}
Ok(c) => {
log::trace!(
"[EFS] backtrack α=2^-{bt}={alpha:.4e}: trial cost {c:.6e} \
not below current {current_cost:.6e}, halving"
);
}
Err(err) => {
log::trace!(
"[EFS] backtrack α=2^-{bt}={alpha:.4e}: trial eval failed \
({err}), halving"
);
}
}
alpha *= 0.5;
}
Ok(None)
}
}
enum CompassSearchOutcome {
Converged {
point: Array1<f64>,
cost: f64,
polls: usize,
},
BudgetExhausted {
point: Array1<f64>,
cost: f64,
polls: usize,
},
}
fn compass_search_outer(
obj: &mut dyn OuterObjective,
mut x: Array1<f64>,
mut best_cost: f64,
lower: ndarray::ArrayView1<'_, f64>,
upper: ndarray::ArrayView1<'_, f64>,
init_step: f64,
step_tol: f64,
max_polls: usize,
) -> CompassSearchOutcome {
for i in 0..x.len() {
x[i] = x[i].clamp(lower[i], upper[i]);
}
let mut step = init_step;
let mut polls: usize = 0;
while step > step_tol && polls < max_polls {
let mut improved = false;
'sweep: for i in 0..x.len() {
for &sign in &[1.0, -1.0] {
if polls >= max_polls {
break 'sweep;
}
polls += 1;
let candidate_i = (x[i] + sign * step).clamp(lower[i], upper[i]);
if (candidate_i - x[i]).abs() < step_tol {
continue;
}
let mut candidate = x.clone();
candidate[i] = candidate_i;
let probe = obj.eval_cost(&candidate).ok().filter(|v| v.is_finite());
if let Some(c) = probe
&& c < best_cost
{
x = candidate;
best_cost = c;
improved = true;
break 'sweep;
}
}
}
if !improved {
step *= 0.5;
}
}
if step <= step_tol {
CompassSearchOutcome::Converged {
point: x,
cost: best_cost,
polls,
}
} else {
CompassSearchOutcome::BudgetExhausted {
point: x,
cost: best_cost,
polls,
}
}
}
fn solution_into_outer_result(
solution: Solution,
converged: bool,
plan_used: OuterPlan,
) -> OuterResult {
let final_grad_norm = solution
.final_gradient_norm
.or(solution.final_step_norm)
.unwrap_or(0.0);
OuterResult {
rho: solution.final_point,
final_value: solution.final_value,
iterations: solution.iterations,
final_grad_norm,
final_gradient: solution.final_gradient,
final_hessian: solution.final_hessian,
converged,
plan_used,
operator_trust_radius: None,
operator_stop_reason: None,
}
}
#[derive(Clone, Debug)]
struct OuterConfig {
tolerance: f64,
max_iter: usize,
bounds: Option<(Array1<f64>, Array1<f64>)>,
seed_config: crate::seeding::SeedConfig,
rho_bound: f64,
heuristic_lambdas: Option<Vec<f64>>,
initial_rho: Option<Array1<f64>>,
fallback_policy: FallbackPolicy,
screening_cap: Option<Arc<AtomicUsize>>,
outer_inner_cap: Option<InnerProgressFeedback>,
solver_class: SolverClass,
operator_initial_trust_radius: Option<f64>,
}
impl Default for OuterConfig {
fn default() -> Self {
Self {
tolerance: 1e-5,
max_iter: 200,
bounds: None,
seed_config: crate::seeding::SeedConfig::default(),
rho_bound: 30.0,
heuristic_lambdas: None,
initial_rho: None,
fallback_policy: FallbackPolicy::Automatic,
screening_cap: None,
outer_inner_cap: None,
solver_class: SolverClass::Primary,
operator_initial_trust_radius: None,
}
}
}
pub struct OuterProblem {
n_params: usize,
gradient: Derivative,
hessian: Derivative,
prefer_gradient_only: bool,
disable_fixed_point: bool,
psi_dim: usize,
barrier_config: Option<BarrierConfig>,
tolerance: f64,
max_iter: usize,
bounds: Option<(Array1<f64>, Array1<f64>)>,
rho_bound: f64,
seed_config: crate::seeding::SeedConfig,
heuristic_lambdas: Option<Vec<f64>>,
initial_rho: Option<Array1<f64>>,
fallback_policy: FallbackPolicy,
screening_cap: Option<Arc<AtomicUsize>>,
outer_inner_cap: Option<InnerProgressFeedback>,
solver_class: SolverClass,
operator_initial_trust_radius: Option<f64>,
}
impl OuterProblem {
pub fn new(n_params: usize) -> Self {
Self {
n_params,
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
prefer_gradient_only: false,
disable_fixed_point: false,
psi_dim: 0,
barrier_config: None,
tolerance: 1e-5,
max_iter: 200,
bounds: None,
rho_bound: 30.0,
seed_config: crate::seeding::SeedConfig::default(),
heuristic_lambdas: None,
initial_rho: None,
fallback_policy: FallbackPolicy::Automatic,
screening_cap: None,
outer_inner_cap: None,
solver_class: SolverClass::Primary,
operator_initial_trust_radius: None,
}
}
pub fn with_gradient(mut self, d: Derivative) -> Self {
self.gradient = d;
self
}
pub fn with_hessian(mut self, d: Derivative) -> Self {
self.hessian = d;
self
}
pub fn with_prefer_gradient_only(mut self, prefer_gradient_only: bool) -> Self {
self.prefer_gradient_only = prefer_gradient_only;
self
}
pub fn with_disable_fixed_point(mut self, disable: bool) -> Self {
self.disable_fixed_point = disable;
self
}
pub fn with_psi_dim(mut self, dim: usize) -> Self {
self.psi_dim = dim;
self
}
pub fn with_barrier(mut self, cfg: Option<BarrierConfig>) -> Self {
self.barrier_config = cfg;
self
}
pub fn with_tolerance(mut self, tol: f64) -> Self {
self.tolerance = tol;
self
}
pub fn with_max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn with_bounds(mut self, lo: Array1<f64>, hi: Array1<f64>) -> Self {
self.bounds = Some((lo, hi));
self
}
pub fn with_rho_bound(mut self, b: f64) -> Self {
self.rho_bound = b;
self
}
pub fn with_seed_config(mut self, sc: crate::seeding::SeedConfig) -> Self {
self.seed_config = sc;
self
}
pub fn with_heuristic_lambdas(mut self, h: Vec<f64>) -> Self {
self.heuristic_lambdas = Some(h);
self
}
pub fn with_initial_rho(mut self, rho: Array1<f64>) -> Self {
self.initial_rho = Some(rho);
self
}
pub fn with_screening_cap(mut self, screening_cap: Arc<AtomicUsize>) -> Self {
self.screening_cap = Some(screening_cap);
self
}
pub fn with_outer_inner_cap(mut self, feedback: InnerProgressFeedback) -> Self {
self.outer_inner_cap = Some(feedback);
self
}
pub fn with_solver_class(mut self, class: SolverClass) -> Self {
self.solver_class = class;
self
}
pub fn with_operator_initial_trust_radius(mut self, radius: Option<f64>) -> Self {
self.operator_initial_trust_radius = radius;
self
}
pub fn with_fallback_policy(mut self, policy: FallbackPolicy) -> Self {
self.fallback_policy = policy;
self
}
fn capability(&self) -> OuterCapability {
OuterCapability {
gradient: self.gradient,
hessian: self.hessian,
prefer_gradient_only: self.prefer_gradient_only,
disable_fixed_point: self.disable_fixed_point,
n_params: self.n_params,
psi_dim: self.psi_dim,
fixed_point_available: false,
barrier_config: self.barrier_config.clone(),
}
}
fn config(&self) -> OuterConfig {
OuterConfig {
tolerance: self.tolerance,
max_iter: self.max_iter,
bounds: self.bounds.clone(),
seed_config: self.seed_config.clone(),
rho_bound: self.rho_bound,
heuristic_lambdas: self.heuristic_lambdas.clone(),
initial_rho: self.initial_rho.clone(),
fallback_policy: self.fallback_policy,
screening_cap: self.screening_cap.clone(),
outer_inner_cap: self.outer_inner_cap.clone(),
solver_class: self.solver_class,
operator_initial_trust_radius: self.operator_initial_trust_radius,
}
}
pub fn build_objective<S, Fc, Fe, Fr, Fefs>(
&self,
state: S,
cost_fn: Fc,
eval_fn: Fe,
reset_fn: Option<Fr>,
efs_fn: Option<Fefs>,
) -> ClosureObjective<S, Fc, Fe, Fr, Fefs>
where
Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
Fr: FnMut(&mut S),
Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
{
let mut cap = self.capability();
cap.fixed_point_available = efs_fn.is_some();
ClosureObjective {
state,
cap,
cost_fn,
eval_fn,
eval_order_fn: None,
reset_fn,
efs_fn,
screening_proxy_fn: None::<fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>>,
}
}
pub fn build_objective_with_eval_order<S, Fc, Fe, Feo, Fr, Fefs>(
&self,
state: S,
cost_fn: Fc,
eval_fn: Fe,
eval_order_fn: Feo,
reset_fn: Option<Fr>,
efs_fn: Option<Fefs>,
) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo>
where
Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
Fr: FnMut(&mut S),
Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
{
let mut cap = self.capability();
cap.fixed_point_available = efs_fn.is_some();
ClosureObjective {
state,
cap,
cost_fn,
eval_fn,
eval_order_fn: Some(eval_order_fn),
reset_fn,
efs_fn,
screening_proxy_fn: None::<fn(&mut S, &Array1<f64>) -> Result<f64, EstimationError>>,
}
}
pub fn build_objective_with_screening_proxy<S, Fc, Fe, Feo, Fr, Fefs, Fsp>(
&self,
state: S,
cost_fn: Fc,
eval_fn: Fe,
eval_order_fn: Feo,
reset_fn: Option<Fr>,
efs_fn: Option<Fefs>,
screening_proxy_fn: Fsp,
) -> ClosureObjective<S, Fc, Fe, Fr, Fefs, Feo, Fsp>
where
Fc: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
Fe: FnMut(&mut S, &Array1<f64>) -> Result<OuterEval, EstimationError>,
Feo: FnMut(&mut S, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
Fr: FnMut(&mut S),
Fefs: FnMut(&mut S, &Array1<f64>) -> Result<EfsEval, EstimationError>,
Fsp: FnMut(&mut S, &Array1<f64>) -> Result<f64, EstimationError>,
{
let mut cap = self.capability();
cap.fixed_point_available = efs_fn.is_some();
ClosureObjective {
state,
cap,
cost_fn,
eval_fn,
eval_order_fn: Some(eval_order_fn),
reset_fn,
efs_fn,
screening_proxy_fn: Some(screening_proxy_fn),
}
}
pub fn run(
&self,
obj: &mut dyn OuterObjective,
context: &str,
) -> Result<OuterResult, EstimationError> {
run_outer(obj, &self.config(), context)
}
}
#[derive(Clone, Debug)]
pub struct OuterResult {
pub rho: Array1<f64>,
pub final_value: f64,
pub iterations: usize,
pub final_grad_norm: f64,
pub final_gradient: Option<Array1<f64>>,
pub final_hessian: Option<Array2<f64>>,
pub converged: bool,
pub plan_used: OuterPlan,
pub operator_trust_radius: Option<f64>,
pub operator_stop_reason: Option<OperatorTrustRegionStopReason>,
}
const OPERATOR_TRUST_RADIUS_INIT: f64 = 1.0;
const OPERATOR_TRUST_RADIUS_MAX: f64 = 1.0e6;
const OPERATOR_ETA_ACCEPT: f64 = 1.0e-4;
const OPERATOR_TRUST_RADIUS_REJECT_FLOOR: f64 = 1.0e-9;
const OPERATOR_DENSE_REUSE_MAX_DIM: usize = 64;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OperatorTrustRegionStopReason {
Converged,
RejectFloor,
IterationBudget,
RoutingMismatch,
}
struct BorrowedDenseOuterHessian<'a> {
matrix: &'a Array2<f64>,
}
impl OuterHessianOperator for BorrowedDenseOuterHessian<'_> {
fn dim(&self) -> usize {
self.matrix.nrows()
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
if v.len() != self.matrix.ncols() {
return Err(format!(
"dense outer Hessian matvec length mismatch: got {}, expected {}",
v.len(),
self.matrix.ncols()
));
}
Ok(self.matrix.dot(v))
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
if factor.nrows() != self.matrix.ncols() {
return Err(format!(
"dense outer Hessian factor row count mismatch: got {}, expected {}",
factor.nrows(),
self.matrix.ncols()
));
}
Ok(self.matrix.dot(&factor))
}
fn is_cheap_to_materialize(&self) -> bool {
true
}
}
#[inline]
fn snap_recovery_radius_on_rejection(
cost: f64,
pred_dec: f64,
derived_radius: f64,
default_radius: f64,
) -> f64 {
let cost_scale = cost.abs().max(1.0);
let mismatch_ratio = if pred_dec.is_finite() && pred_dec > 0.0 {
pred_dec / cost_scale
} else {
0.0
};
if mismatch_ratio > 1e6 && derived_radius < default_radius * 0.5 {
derived_radius
} else {
default_radius
}
}
#[inline]
fn derive_initial_trust_radius_from_seed(cost: f64, g_norm: f64) -> f64 {
let cost_scale = cost.abs().max(1.0);
if g_norm.is_finite() && g_norm > 0.0 {
(cost_scale / g_norm).clamp(
OPERATOR_TRUST_RADIUS_REJECT_FLOOR * 10.0,
OPERATOR_TRUST_RADIUS_INIT,
)
} else {
OPERATOR_TRUST_RADIUS_INIT
}
}
fn project_to_bounds(x: &Array1<f64>, bounds: Option<&(Array1<f64>, Array1<f64>)>) -> Array1<f64> {
match bounds {
Some((lower, upper)) => {
let mut out = x.clone();
for idx in 0..out.len() {
out[idx] = out[idx].clamp(lower[idx], upper[idx]);
}
out
}
None => x.clone(),
}
}
fn projected_gradient(
x: &Array1<f64>,
gradient: &Array1<f64>,
bounds: Option<&(Array1<f64>, Array1<f64>)>,
) -> Array1<f64> {
let mut out = gradient.clone();
if let Some((lower, upper)) = bounds {
for idx in 0..out.len() {
let at_lower = x[idx] <= lower[idx] + 1e-10;
let at_upper = x[idx] >= upper[idx] - 1e-10;
if (at_lower && gradient[idx] > 0.0) || (at_upper && gradient[idx] < 0.0) {
out[idx] = 0.0;
}
}
}
out
}
fn active_mask(
x: &Array1<f64>,
gradient: &Array1<f64>,
bounds: Option<&(Array1<f64>, Array1<f64>)>,
) -> Vec<bool> {
match bounds {
Some((lower, upper)) => (0..x.len())
.map(|idx| {
let at_lower = x[idx] <= lower[idx] + 1e-10;
let at_upper = x[idx] >= upper[idx] - 1e-10;
(at_lower && gradient[idx] > 0.0) || (at_upper && gradient[idx] < 0.0)
})
.collect(),
None => vec![false; x.len()],
}
}
fn masked_matvec(
op: &dyn OuterHessianOperator,
vector: &Array1<f64>,
active: &[bool],
) -> Result<Array1<f64>, String> {
let mut masked = vector.clone();
for idx in 0..masked.len() {
if active[idx] {
masked[idx] = 0.0;
}
}
let mut out = op.matvec(&masked)?;
if out.len() != masked.len() {
return Err(format!(
"outer Hessian operator matvec length mismatch: got {}, expected {}",
out.len(),
masked.len()
));
}
for idx in 0..out.len() {
if active[idx] {
out[idx] = 0.0;
}
}
Ok(out)
}
fn predicted_decrease_from_operator(
op: &dyn OuterHessianOperator,
gradient: &Array1<f64>,
step: &Array1<f64>,
active: &[bool],
) -> Result<f64, String> {
let hs = if active.iter().copied().any(|flag| flag) {
masked_matvec(op, step, active)?
} else {
op.matvec(step)?
};
Ok(-(gradient.dot(step) + 0.5 * step.dot(&hs)))
}
fn steihaug_toint_step_operator(
op: &dyn OuterHessianOperator,
gradient: &Array1<f64>,
trust_radius: f64,
active: &[bool],
) -> Result<Option<(Array1<f64>, f64)>, String> {
let n = gradient.len();
let g_norm = gradient.dot(gradient).sqrt();
if !g_norm.is_finite() || g_norm <= 0.0 {
return Ok(None);
}
let mut p = Array1::<f64>::zeros(n);
let mut r = gradient.clone();
for idx in 0..n {
if active[idx] {
r[idx] = 0.0;
}
}
let mut d = r.mapv(|value| -value);
let mut rtr = r.dot(&r);
if !rtr.is_finite() || rtr <= 0.0 {
return Ok(None);
}
let cg_tol = (1e-6 * g_norm).max(1e-12);
let max_iter = (2 * n).max(10);
for _ in 0..max_iter {
let bd = if active.iter().copied().any(|flag| flag) {
masked_matvec(op, &d, active)?
} else {
op.matvec(&d)?
};
let d_bd = d.dot(&bd);
if !d_bd.is_finite() || d_bd <= 1e-14 * d.dot(&d).max(1.0) {
let d_norm_sq = d.dot(&d);
if d_norm_sq <= 0.0 {
return Ok(None);
}
let p_dot_d = p.dot(&d);
let p_norm_sq = p.dot(&p);
let disc = p_dot_d * p_dot_d - d_norm_sq * (p_norm_sq - trust_radius * trust_radius);
if disc < 0.0 {
return Ok(None);
}
let tau = (-p_dot_d + disc.sqrt()) / d_norm_sq;
let mut boundary = p.clone();
boundary.scaled_add(tau, &d);
let pred = predicted_decrease_from_operator(op, gradient, &boundary, active)?;
return Ok((pred.is_finite() && pred > 0.0).then_some((boundary, pred)));
}
let alpha = rtr / d_bd;
if !alpha.is_finite() || alpha <= 0.0 {
return Ok(None);
}
let mut p_next = p.clone();
p_next.scaled_add(alpha, &d);
let p_next_norm = p_next.dot(&p_next).sqrt();
if p_next_norm >= trust_radius {
let d_norm_sq = d.dot(&d);
if d_norm_sq <= 0.0 {
return Ok(None);
}
let p_dot_d = p.dot(&d);
let p_norm_sq = p.dot(&p);
let disc = p_dot_d * p_dot_d - d_norm_sq * (p_norm_sq - trust_radius * trust_radius);
if disc < 0.0 {
return Ok(None);
}
let tau = (-p_dot_d + disc.sqrt()) / d_norm_sq;
let mut boundary = p.clone();
boundary.scaled_add(tau, &d);
let pred = predicted_decrease_from_operator(op, gradient, &boundary, active)?;
return Ok((pred.is_finite() && pred > 0.0).then_some((boundary, pred)));
}
r.scaled_add(alpha, &bd);
for idx in 0..n {
if active[idx] {
r[idx] = 0.0;
}
}
let rtr_next = r.dot(&r);
if !rtr_next.is_finite() {
return Ok(None);
}
p = p_next;
if rtr_next.sqrt() <= cg_tol {
let pred = predicted_decrease_from_operator(op, gradient, &p, active)?;
return Ok((pred.is_finite() && pred > 0.0).then_some((p, pred)));
}
let beta = rtr_next / rtr.max(1e-32);
if !beta.is_finite() || beta < 0.0 {
return Ok(None);
}
d *= beta;
d -= &r;
for idx in 0..n {
if active[idx] {
d[idx] = 0.0;
}
}
rtr = rtr_next;
}
let pred = predicted_decrease_from_operator(op, gradient, &p, active)?;
Ok((pred.is_finite() && pred > 0.0).then_some((p, pred)))
}
fn pinned_axes_mask(x: &Array1<f64>, bounds: Option<&(Array1<f64>, Array1<f64>)>) -> Vec<bool> {
match bounds {
Some((lower, upper)) => (0..x.len())
.map(|idx| {
let span = (upper[idx] - lower[idx]).abs().max(1.0);
let tol = 1e-12 * span;
(x[idx] - lower[idx]).abs() <= tol || (upper[idx] - x[idx]).abs() <= tol
})
.collect(),
None => vec![false; x.len()],
}
}
fn clipped_axes_mask(x_raw: &Array1<f64>, x_projected: &Array1<f64>) -> Vec<bool> {
debug_assert_eq!(x_raw.len(), x_projected.len());
(0..x_raw.len())
.map(|idx| (x_raw[idx] - x_projected[idx]).abs() > 0.0)
.collect()
}
fn union_active_with_pinned(base_active: &[bool], pinned: &[bool]) -> Vec<bool> {
debug_assert_eq!(base_active.len(), pinned.len());
base_active
.iter()
.zip(pinned.iter())
.map(|(a, p)| *a || *p)
.collect()
}
fn compute_active_set_step(
op: &dyn OuterHessianOperator,
g_proj: &Array1<f64>,
base_active: &[bool],
pinned_or_clipped: &[bool],
trust_radius: f64,
) -> Result<Option<(Array1<f64>, f64, Vec<bool>)>, String> {
let extended = union_active_with_pinned(base_active, pinned_or_clipped);
if extended.iter().all(|flag| *flag) {
return Ok(None);
}
if extended
.iter()
.zip(base_active.iter())
.all(|(e, b)| *e == *b)
{
return Ok(None);
}
match steihaug_toint_step_operator(op, g_proj, trust_radius, &extended)? {
Some((step, pred)) => {
let s_norm = step.dot(&step).sqrt();
if !s_norm.is_finite() || s_norm <= 1e-16 {
Ok(None)
} else {
Ok(Some((step, pred, extended)))
}
}
None => Ok(None),
}
}
#[inline]
fn outer_scaled_tolerance(base_tol: f64, seed_cost: f64) -> f64 {
base_tol * (1.0 + seed_cost.abs())
}
fn run_operator_trust_region(
obj: &mut dyn OuterObjective,
seed: &Array1<f64>,
layout: OuterThetaLayout,
bounds: Option<&(Array1<f64>, Array1<f64>)>,
tolerance: f64,
max_iter: usize,
initial_eval: OuterEval,
plan: OuterPlan,
initial_trust_radius: Option<f64>,
) -> Result<OuterResult, EstimationError> {
let mut x_k = project_to_bounds(seed, bounds);
let mut eval_k = initial_eval;
let g_seed_for_init = projected_gradient(&x_k, &eval_k.gradient, bounds);
let g_norm_for_init = g_seed_for_init.dot(&g_seed_for_init).sqrt();
let derived_initial_radius =
derive_initial_trust_radius_from_seed(eval_k.cost, g_norm_for_init);
let mut trust_radius = initial_trust_radius
.filter(|radius| radius.is_finite() && *radius > 0.0)
.unwrap_or(derived_initial_radius)
.clamp(
OPERATOR_TRUST_RADIUS_REJECT_FLOOR,
OPERATOR_TRUST_RADIUS_MAX,
);
if initial_trust_radius.is_none() && derived_initial_radius < OPERATOR_TRUST_RADIUS_INIT {
log::info!(
"[ARC-timing] iter=0 derived initial trust_radius={:.3e} from \
|cost|={:.3e} / ‖g‖={:.3e} (default {:.3e} would have wasted halving iters)",
trust_radius,
eval_k.cost.abs(),
eval_k.gradient.dot(&eval_k.gradient).sqrt(),
OPERATOR_TRUST_RADIUS_INIT,
);
}
let mut dense_model: Option<Array2<f64>> = None;
let mut materialize_dense_after_rejection = false;
let mut last_rejection_act_dec: Option<f64> = None;
let mut consecutive_corner_clamps: usize = 0;
const CORNER_CLAMP_REPORT_THRESHOLD: usize = 4;
let mut corner_clamp_reported_for_run: bool = false;
for iter in 0..max_iter {
let iter_start = std::time::Instant::now();
let g_proj = projected_gradient(&x_k, &eval_k.gradient, bounds);
let g_norm = g_proj.dot(&g_proj).sqrt();
let scaled_tol = outer_scaled_tolerance(tolerance, eval_k.cost);
if g_norm.is_finite() && g_norm <= scaled_tol {
log::info!(
"[ARC-timing] iter={iter} converged cost={:.6e} grad_norm={:.3e} \
trust_radius={:.3e} tol={:.3e}",
eval_k.cost,
g_norm,
trust_radius,
scaled_tol,
);
return Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: iter,
final_grad_norm: g_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: true,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::Converged),
});
}
if trust_radius <= OPERATOR_TRUST_RADIUS_REJECT_FLOOR {
log::info!(
"[ARC-timing] iter={iter} status=reject_floor cost={:.6e} grad_norm={:.3e} \
trust_radius={:.3e}",
eval_k.cost,
g_norm,
trust_radius,
);
return Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: iter,
final_grad_norm: g_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: false,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::RejectFloor),
});
}
let HessianResult::Operator(op_arc) = &eval_k.hessian else {
let observed_shape = match &eval_k.hessian {
HessianResult::Operator(_) => "Operator", HessianResult::Analytic(_) => "Analytic(dense)",
HessianResult::Unavailable => "Unavailable",
};
log::warn!(
"[ARC] iter={iter}: family returned non-operator Hessian mid-flight \
(planner expected Operator throughout, got {observed_shape}); \
returning best-effort x_k cost={:.6e} grad_norm={:.3e}. \
Routing decision was made on seed_eval.hessian shape; family \
should declare a consistent shape via its capability surface.",
eval_k.cost,
g_norm
);
return Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: iter,
final_grad_norm: g_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: false,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::RoutingMismatch),
});
};
if materialize_dense_after_rejection
&& dense_model.is_none()
&& op_arc.dim() <= OPERATOR_DENSE_REUSE_MAX_DIM
&& op_arc.materialization_capability().is_available()
{
let materialize_start = std::time::Instant::now();
match op_arc.materialize_dense() {
Ok(dense) => {
if !dense.iter().all(|value| value.is_finite()) {
log::debug!(
"[ARC-timing] iter={iter} dense outer Hessian reuse skipped: \
materialization produced non-finite entries"
);
} else {
log::info!(
"[ARC-timing] iter={iter} status=dense_model_ready dim={} bytes={} \
elapsed={:.3}s",
dense.nrows(),
dense.len() * std::mem::size_of::<f64>(),
materialize_start.elapsed().as_secs_f64(),
);
dense_model = Some(dense);
}
}
Err(message) => {
log::debug!(
"[ARC-timing] iter={iter} dense outer Hessian reuse skipped: {message}"
);
}
}
materialize_dense_after_rejection = false;
}
let counter = HvApplyCounter::new(op_arc.as_ref());
let dense_borrowed;
let step_op: &dyn OuterHessianOperator = if let Some(dense) = dense_model.as_ref() {
dense_borrowed = BorrowedDenseOuterHessian { matrix: dense };
&dense_borrowed
} else {
&counter
};
let active = active_mask(&x_k, &eval_k.gradient, bounds);
let cg_result = steihaug_toint_step_operator(step_op, &g_proj, trust_radius, &active)
.map_err(EstimationError::RemlOptimizationFailed)?;
let Some((trial_step, pred_dec_free)) = cg_result else {
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=cg_no_step cost={:.6e} grad_norm={:.3e} \
trust_radius={:.3e} hv_applies={} elapsed={:.3}s",
eval_k.cost,
g_norm,
trust_radius,
counter.count(),
elapsed,
);
trust_radius = (trust_radius * 0.5).max(1e-12);
continue;
};
let x_trial_raw = &x_k + &trial_step;
let x_trial = project_to_bounds(&x_trial_raw, bounds);
let s_trial = &x_trial - &x_k;
let s_norm = s_trial.dot(&s_trial).sqrt();
if !s_norm.is_finite() || s_norm <= 1e-16 {
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=zero_step cost={:.6e} grad_norm={:.3e} \
trust_radius={:.3e} hv_applies={} elapsed={:.3}s",
eval_k.cost,
g_norm,
trust_radius,
counter.count(),
elapsed,
);
trust_radius = (trust_radius * 0.5).max(1e-12);
continue;
}
let pred_dec = if (&s_trial - &trial_step)
.dot(&(&s_trial - &trial_step))
.sqrt()
> 1e-8 * (1.0 + trial_step.dot(&trial_step).sqrt())
{
predicted_decrease_from_operator(step_op, &g_proj, &s_trial, &active)
.map_err(EstimationError::RemlOptimizationFailed)?
} else {
pred_dec_free
};
if !pred_dec.is_finite() || pred_dec <= 0.0 {
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=bad_pred_dec cost={:.6e} grad_norm={:.3e} \
trust_radius={:.3e} hv_applies={} elapsed={:.3}s",
eval_k.cost,
g_norm,
trust_radius,
counter.count(),
elapsed,
);
trust_radius = (trust_radius * 0.5).max(1e-12);
continue;
}
let trial_cost = match obj.eval_cost(&x_trial) {
Ok(cost) => match finite_cost_or_error("outer operator trial cost failed", cost) {
Ok(cost) => cost,
Err(ObjectiveEvalError::Recoverable { message }) => {
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=infeasible_trial cost={:.6e} \
grad_norm={:.3e} pred_dec={:.3e} trust_radius={:.3e}->{:.3e} \
hv_applies={} elapsed={:.3}s reason={}",
eval_k.cost,
g_norm,
pred_dec,
trust_radius,
(trust_radius * 0.25).max(1e-12),
counter.count(),
elapsed,
message,
);
trust_radius = (trust_radius * 0.25).max(1e-12);
continue;
}
Err(ObjectiveEvalError::Fatal { message }) => {
return Err(EstimationError::RemlOptimizationFailed(message));
}
},
Err(err) => {
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=eval_error cost={:.6e} grad_norm={:.3e} \
pred_dec={:.3e} trust_radius={:.3e}->{:.3e} hv_applies={} \
elapsed={:.3}s reason={}",
eval_k.cost,
g_norm,
pred_dec,
trust_radius,
(trust_radius * 0.25).max(1e-12),
counter.count(),
elapsed,
err,
);
trust_radius = (trust_radius * 0.25).max(1e-12);
continue;
}
};
let act_dec = eval_k.cost - trial_cost;
let rho = act_dec / pred_dec;
let accepted_by_ratio = rho > OPERATOR_ETA_ACCEPT;
let accepted = accepted_by_ratio;
let new_trust_radius = if rho > 0.75 && s_norm > 0.99 * trust_radius {
(trust_radius * 2.0).min(OPERATOR_TRUST_RADIUS_MAX)
} else if rho < 0.25 {
(trust_radius * 0.5).max(1e-12)
} else {
trust_radius
};
let hv_applies = counter.count();
if accepted {
let trust_radius_before_trial_eval = trust_radius;
let eval_trial = match obj
.eval_with_order(&x_trial, OuterEvalOrder::ValueGradientHessian)
{
Ok(eval) => {
match finite_outer_eval_or_error("outer operator eval failed", layout, eval) {
Ok(eval) => eval,
Err(ObjectiveEvalError::Recoverable { message }) => {
let elapsed = iter_start.elapsed().as_secs_f64();
let rejected_trust_radius = (new_trust_radius * 0.25).max(1e-12);
log::info!(
"[ARC-timing] iter={iter} status=accepted_eval_error \
cost={:.6e}->{:.6e} grad_norm={:.3e} rho={:.3} \
pred_dec={:.3e} act_dec={:.3e} \
trust_radius={:.3e}->{:.3e} hv_applies={} elapsed={:.3}s \
reason={}",
eval_k.cost,
trial_cost,
g_norm,
rho,
pred_dec,
act_dec,
trust_radius_before_trial_eval,
rejected_trust_radius,
hv_applies,
elapsed,
message,
);
trust_radius = rejected_trust_radius;
continue;
}
Err(ObjectiveEvalError::Fatal { message }) => {
return Err(EstimationError::RemlOptimizationFailed(message));
}
}
}
Err(err) => {
let elapsed = iter_start.elapsed().as_secs_f64();
let rejected_trust_radius = (new_trust_radius * 0.25).max(1e-12);
log::info!(
"[ARC-timing] iter={iter} status=accepted_eval_error \
cost={:.6e}->{:.6e} grad_norm={:.3e} rho={:.3} \
pred_dec={:.3e} act_dec={:.3e} \
trust_radius={:.3e}->{:.3e} hv_applies={} elapsed={:.3}s \
reason={}",
eval_k.cost,
trial_cost,
g_norm,
rho,
pred_dec,
act_dec,
trust_radius_before_trial_eval,
rejected_trust_radius,
hv_applies,
elapsed,
err,
);
trust_radius = rejected_trust_radius;
continue;
}
};
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=accepted cost={:.6e}->{:.6e} \
grad_norm={:.3e} rho={:.3} pred_dec={:.3e} act_dec={:.3e} \
trust_radius={:.3e}->{:.3e} hv_applies={} elapsed={:.3}s",
eval_k.cost,
trial_cost,
g_norm,
rho,
pred_dec,
act_dec,
trust_radius_before_trial_eval,
new_trust_radius,
hv_applies,
elapsed,
);
trust_radius = new_trust_radius;
x_k = x_trial;
eval_k = eval_trial;
dense_model = None;
materialize_dense_after_rejection = false;
last_rejection_act_dec = None;
consecutive_corner_clamps = 0;
} else {
let derived_recovery_radius =
derive_initial_trust_radius_from_seed(eval_k.cost, g_norm);
let snapped_radius = snap_recovery_radius_on_rejection(
eval_k.cost,
pred_dec,
derived_recovery_radius,
new_trust_radius,
);
let elapsed = iter_start.elapsed().as_secs_f64();
log::info!(
"[ARC-timing] iter={iter} status=rejected cost={:.6e}->{:.6e} \
grad_norm={:.3e} rho={:.3} pred_dec={:.3e} act_dec={:.3e} \
trust_radius={:.3e}->{:.3e} hv_applies={} elapsed={:.3}s",
eval_k.cost,
eval_k.cost,
g_norm,
rho,
pred_dec,
act_dec,
trust_radius,
snapped_radius,
hv_applies,
elapsed,
);
if snapped_radius < new_trust_radius {
let cost_scale_for_log = eval_k.cost.abs().max(1.0);
let mismatch_ratio_for_log = if pred_dec.is_finite() && pred_dec > 0.0 {
pred_dec / cost_scale_for_log
} else {
0.0
};
log::info!(
"[ARC-timing] iter={iter} snap_recovery: pred_dec/|cost|={:.3e} >> 1; \
snapped trust_radius {:.3e} -> {:.3e} (skipping ~{} halvings)",
mismatch_ratio_for_log,
new_trust_radius,
snapped_radius,
(new_trust_radius / snapped_radius).log2().ceil() as i64,
);
}
let mut corner_clamp_just_detected = false;
if act_dec.is_finite() {
let cost_floor = eval_k.cost.abs().max(1.0);
let near_equal = match last_rejection_act_dec {
Some(prev) => {
let scale = prev.abs().max(act_dec.abs()).max(cost_floor) * 1e-9;
(act_dec - prev).abs() <= scale
}
None => false,
};
if near_equal {
consecutive_corner_clamps += 1;
if consecutive_corner_clamps >= CORNER_CLAMP_REPORT_THRESHOLD {
corner_clamp_just_detected = true;
}
if !corner_clamp_reported_for_run
&& consecutive_corner_clamps >= CORNER_CLAMP_REPORT_THRESHOLD
{
log::warn!(
"[ARC-timing] iter={iter} corner_clamp_detected: act_dec={:.6e} \
stable across {} consecutive rejected halvings (trust_radius now \
{:.3e}); the unconstrained cubic-model step direction is crossing \
one or more box bounds, projection clamps the trial point to a \
constant corner regardless of step magnitude, so cost(trial) is \
constant and act_dec stops carrying useful information. \
Attempting active-set escape (reduced-subspace step).",
act_dec,
consecutive_corner_clamps + 1,
snapped_radius,
);
corner_clamp_reported_for_run = true;
}
} else {
consecutive_corner_clamps = 0;
}
last_rejection_act_dec = Some(act_dec);
}
if corner_clamp_just_detected {
let clipped = clipped_axes_mask(&x_trial_raw, &x_trial);
let pinned = pinned_axes_mask(&x_k, bounds);
let extra_active: Vec<bool> = clipped
.iter()
.zip(pinned.iter())
.map(|(c, p)| *c || *p)
.collect();
match compute_active_set_step(
step_op,
&g_proj,
&active,
&extra_active,
snapped_radius.max(trust_radius),
) {
Ok(Some((reduced_step, reduced_pred, extended_active))) => {
let x_trial_red_raw = &x_k + &reduced_step;
let x_trial_red = project_to_bounds(&x_trial_red_raw, bounds);
let s_red = &x_trial_red - &x_k;
let s_red_norm = s_red.dot(&s_red).sqrt();
if s_red_norm.is_finite() && s_red_norm > 1e-16 {
let red_pred = if (&s_red - &reduced_step)
.dot(&(&s_red - &reduced_step))
.sqrt()
> 1e-8 * (1.0 + reduced_step.dot(&reduced_step).sqrt())
{
predicted_decrease_from_operator(
step_op,
&g_proj,
&s_red,
&extended_active,
)
.unwrap_or(reduced_pred)
} else {
reduced_pred
};
if red_pred.is_finite() && red_pred > 0.0 {
let red_trial_cost = obj.eval_cost(&x_trial_red);
if let Ok(red_trial_cost) = red_trial_cost {
if red_trial_cost.is_finite() {
let red_act_dec = eval_k.cost - red_trial_cost;
let red_rho = red_act_dec / red_pred;
if red_rho > OPERATOR_ETA_ACCEPT {
match obj.eval_with_order(
&x_trial_red,
OuterEvalOrder::ValueGradientHessian,
) {
Ok(eval_red) => {
if let Ok(eval_red) = finite_outer_eval_or_error(
"outer operator active-set eval failed",
layout,
eval_red,
) {
log::warn!(
"[ARC-timing] iter={iter} \
active_set_escape_accepted: \
cost={:.6e}->{:.6e} red_rho={:.3} \
red_pred={:.3e} red_act_dec={:.3e} \
axes_pinned={}/{} \
trust_radius={:.3e}",
eval_k.cost,
red_trial_cost,
red_rho,
red_pred,
red_act_dec,
extended_active
.iter()
.filter(|f| **f)
.count(),
extended_active.len(),
snapped_radius,
);
x_k = x_trial_red;
eval_k = eval_red;
last_rejection_act_dec = None;
consecutive_corner_clamps = 0;
corner_clamp_reported_for_run = false;
trust_radius = snapped_radius.max(
OPERATOR_TRUST_RADIUS_REJECT_FLOOR
* 10.0,
);
dense_model = None;
materialize_dense_after_rejection = false;
continue;
}
}
Err(_) => {
}
}
}
}
}
}
}
}
Ok(None) => {
}
Err(_) => {
}
}
trust_radius = snapped_radius.max(OPERATOR_TRUST_RADIUS_REJECT_FLOOR * 10.0);
let final_grad = projected_gradient(&x_k, &eval_k.gradient, bounds);
let final_grad_norm = final_grad.dot(&final_grad).sqrt();
log::warn!(
"[ARC-timing] iter={iter} active_set_escape_unavailable: \
terminating at corner with trust_radius={:.3e} \
grad_norm={:.3e}",
trust_radius,
final_grad_norm,
);
return Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: iter + 1,
final_grad_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: false,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::RejectFloor),
});
}
trust_radius = snapped_radius;
if trust_radius <= OPERATOR_TRUST_RADIUS_REJECT_FLOOR {
let final_grad = projected_gradient(&x_k, &eval_k.gradient, bounds);
let final_grad_norm = final_grad.dot(&final_grad).sqrt();
return Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: iter + 1,
final_grad_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: false,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::RejectFloor),
});
} else if dense_model.is_none() {
materialize_dense_after_rejection = true;
}
}
}
let final_grad = projected_gradient(&x_k, &eval_k.gradient, bounds);
let final_grad_norm = final_grad.dot(&final_grad).sqrt();
Ok(OuterResult {
rho: x_k,
final_value: eval_k.cost,
iterations: max_iter,
final_grad_norm,
final_gradient: Some(eval_k.gradient),
final_hessian: None,
converged: false,
plan_used: plan,
operator_trust_radius: Some(trust_radius),
operator_stop_reason: Some(OperatorTrustRegionStopReason::IterationBudget),
})
}
fn run_outer(
obj: &mut dyn OuterObjective,
config: &OuterConfig,
context: &str,
) -> Result<OuterResult, EstimationError> {
let cap = primary_capability_for_config(obj.capability(), config, context);
cap.validate_layout(context)?;
if let Some(initial_rho) = config.initial_rho.as_ref() {
cap.theta_layout()
.validate_point_len(initial_rho, "initial outer seed")
.map_err(|err| match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(format!("{context}: {message}"))
}
})?;
}
if cap.n_params == 0 {
let cost = obj.eval_cost(&Array1::zeros(0))?;
let the_plan = plan_with_class(&cap, config.solver_class);
return Ok(OuterResult {
rho: Array1::zeros(0),
final_value: cost,
iterations: 0,
final_grad_norm: 0.0,
final_gradient: None,
final_hessian: None,
converged: true,
plan_used: the_plan,
operator_trust_radius: None,
operator_stop_reason: None,
});
}
let fallback_attempts = match (config.fallback_policy, config.solver_class) {
(FallbackPolicy::Automatic, SolverClass::Primary) => automatic_fallback_attempts(&cap),
(FallbackPolicy::Automatic, SolverClass::AuxiliaryGradientFree)
| (FallbackPolicy::Disabled, _) => Vec::new(),
};
let mut attempts: Vec<OuterCapability> = Vec::with_capacity(1 + fallback_attempts.len());
attempts.push(cap.clone());
for degraded in fallback_attempts {
attempts.push(degraded);
}
let mut last_error: Option<EstimationError> = None;
for (attempt_idx, attempt_cap) in attempts.iter().enumerate() {
let the_plan = plan_with_class(attempt_cap, config.solver_class);
if attempt_idx > 0 {
log::debug!("[OUTER] {context}: primary plan failed; falling back to {the_plan}");
}
log_plan(context, attempt_cap, &the_plan);
obj.reset();
let mut arc_retries_left: u32 = if matches!(the_plan.solver, Solver::Arc) {
2
} else {
0
};
let mut retry_config: Option<OuterConfig> = None;
let outcome = loop {
let active_config: &OuterConfig = retry_config.as_ref().unwrap_or(config);
match run_outer_with_plan(obj, active_config, context, attempt_cap, &the_plan) {
Ok(result) => {
if result.converged
|| arc_retries_left == 0
|| matches!(
result.operator_stop_reason,
Some(OperatorTrustRegionStopReason::RejectFloor)
)
{
break Ok(result);
}
let prev_max_iter = active_config.max_iter;
let bumped_max_iter = prev_max_iter.saturating_mul(2);
let next_trust_radius = result.operator_trust_radius;
log::info!(
"[OUTER] {context}: ARC attempt exhausted budget at \
iter={} cost={:.6e} |g|={:.6e}; retrying ARC with \
max_iter {} -> {} warm-started from last rho \
and trust_radius {:?} (analytic-Hessian preservation)",
result.iterations,
result.final_value,
result.final_grad_norm,
prev_max_iter,
bumped_max_iter,
next_trust_radius,
);
let mut next = active_config.clone();
next.max_iter = bumped_max_iter;
next.initial_rho = Some(result.rho.clone());
next.operator_initial_trust_radius = next_trust_radius;
retry_config = Some(next);
arc_retries_left -= 1;
obj.reset();
}
Err(e) => break Err(e),
}
};
match outcome {
Ok(result) => {
if result.converged || attempt_idx + 1 == attempts.len() {
return Ok(result);
}
let message = format!(
"{context}: attempt {} (plan={the_plan}) exhausted without convergence",
attempt_idx + 1
);
log::debug!("[OUTER] {message}; trying degraded fallback plan");
last_error = Some(EstimationError::RemlOptimizationFailed(message));
}
Err(e) => {
log::debug!(
"[OUTER] {context}: attempt {} (plan={the_plan}) failed: {e}",
attempt_idx + 1
);
last_error = Some(e);
}
}
}
Err(last_error.unwrap_or_else(|| {
EstimationError::RemlOptimizationFailed(format!("all plan attempts exhausted ({context})"))
}))
}
fn run_outer_with_plan(
obj: &mut dyn OuterObjective,
config: &OuterConfig,
context: &str,
cap: &OuterCapability,
the_plan: &OuterPlan,
) -> Result<OuterResult, EstimationError> {
let mut seeds = {
let generated = crate::seeding::generate_rho_candidates(
cap.n_params,
config.heuristic_lambdas.as_deref(),
&config.seed_config,
);
if generated.is_empty() {
Vec::new()
} else {
generated
}
};
if let Some(initial_rho) = config.initial_rho.as_ref()
&& !seeds.iter().any(|seed| seed == initial_rho)
{
seeds.insert(0, initial_rho.clone());
}
if seeds.is_empty() {
return Err(EstimationError::RemlOptimizationFailed(format!(
"no seeds generated for outer optimization ({context})"
)));
}
let screening_enabled = config.screening_cap.is_some();
let seed_budget = effective_seed_budget(
config.seed_config.seed_budget,
the_plan.solver,
config.seed_config.risk_profile,
screening_enabled,
)
.min(seeds.len());
if should_screen_seeds(config, the_plan.solver, seeds.len(), seed_budget) {
seeds = rank_seeds_with_screening(obj, config, context, &seeds);
}
log::debug!(
"[OUTER] {context}: trying generated seeds directly (generated={}, budget={})",
seeds.len(),
seed_budget,
);
if seed_budget < config.seed_config.seed_budget.max(1) {
log::debug!(
"[OUTER] {context}: capped requested seed budget {} -> {} for {:?} ({:?})",
config.seed_config.seed_budget.max(1),
seed_budget,
the_plan.solver,
config.seed_config.risk_profile,
);
}
if seeds.len() > seed_budget {
log::debug!(
"[OUTER] {context}: trying up to {seed_budget}/{} generated seeds in heuristic order",
seeds.len(),
);
}
let (lower, upper) = config.bounds.clone().unwrap_or_else(|| {
(
Array1::<f64>::from_elem(cap.n_params, -config.rho_bound),
Array1::<f64>::from_elem(cap.n_params, config.rho_bound),
)
});
let bounds_template = (lower, upper);
let mut best: Option<OuterResult> = None;
let mut rejection_reasons: Vec<(usize, &'static str, String)> = Vec::new();
let layout = cap.theta_layout();
let mut started_seeds = 0usize;
let expensive_seed_limit =
expensive_unsuccessful_seed_limit(the_plan.solver, config.seed_config.risk_profile);
let mut unsuccessful_expensive_seeds = 0usize;
let mut stopped_early_due_to_limit = false;
'seed_attempts: for (seed_idx, seed) in seeds.iter().enumerate() {
if started_seeds == seed_budget {
break;
}
obj.reset();
let t_seed_start = std::time::Instant::now();
let seed_slot;
let result: Result<OuterResult, EstimationError> = match the_plan.solver {
Solver::Arc => {
let seed_eval = obj
.eval_with_order(&seed, OuterEvalOrder::ValueGradientHessian)
.map_err(|err| into_objective_error("outer eval failed", err));
let seed_eval = match seed_eval {
Ok(seed_eval) => seed_eval,
Err(err) => {
let err = match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
};
if requests_immediate_first_order_fallback(&err.to_string()) {
return Err(err);
}
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
let seed_eval = finite_outer_eval_or_error("outer eval failed", layout, seed_eval)
.map_err(|err| match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
});
let mut seed_eval = match seed_eval {
Ok(seed_eval) => seed_eval,
Err(err) => {
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
validate_second_order_seed_hessian(context, layout, &seed_eval).map_err(|err| {
match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
}
})?;
started_seeds += 1;
seed_slot = started_seeds;
let cheap_materializable_operator = matches!(
seed_eval.hessian,
HessianResult::Operator(ref op)
if op.materialization_capability().is_available()
&& op.dim() <= OUTER_HVP_MATERIALIZE_MAX_DIM
);
if cheap_materializable_operator {
if let HessianResult::Operator(op) = &seed_eval.hessian {
match op.materialize_dense() {
Ok(dense) => {
seed_eval.hessian = HessianResult::Analytic(dense);
}
Err(message) => {
let err = EstimationError::RemlOptimizationFailed(format!(
"outer Hessian operator materialization failed: {message}"
));
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
}
}
}
if matches!(seed_eval.hessian, HessianResult::Operator(_)) {
log::debug!(
"[OUTER] {context}: analytic Hessian provided as Hv operator; \
routing to internal trust-region CG"
);
run_operator_trust_region(
obj,
&seed,
layout,
Some(&bounds_template),
config.tolerance,
config.max_iter,
seed_eval,
*the_plan,
config.operator_initial_trust_radius,
)
} else {
let hessian_source = the_plan.hessian_source;
let objective = OuterSecondOrderBridge {
obj,
layout,
hessian_source,
materialize_operator_max_dim: OUTER_HVP_MATERIALIZE_MAX_DIM,
eval_count: 0,
outer_inner_cap: config.outer_inner_cap.clone(),
g_norm_initial: None,
last_g_norm: None,
};
let (lo, hi) = &bounds_template;
let bounds = Bounds::new(lo.clone(), hi.clone(), 1e-6)
.expect("outer rho bounds must be valid");
let scaled_tol = outer_scaled_tolerance(config.tolerance, seed_eval.cost);
let tol = Tolerance::new(scaled_tol).expect("outer tolerance must be valid");
let max_iter =
MaxIterations::new(config.max_iter).expect("outer max_iter must be valid");
let mut optimizer = ArcOptimizer::new(seed.clone(), objective)
.with_bounds(bounds)
.with_tolerance(tol)
.with_max_iterations(max_iter);
match optimizer.run() {
Ok(sol) => Ok(solution_into_outer_result(sol, true, *the_plan)),
Err(ArcError::MaxIterationsReached { last_solution, .. }) => {
Ok(solution_into_outer_result(*last_solution, false, *the_plan))
}
Err(e) => Err(EstimationError::RemlOptimizationFailed(format!(
"Arc solver failed: {e:?}"
))),
}
}
}
Solver::Bfgs => {
if cap.gradient != Derivative::Analytic {
return Err(EstimationError::RemlOptimizationFailed(format!(
"{context}: outer BFGS requires an analytic gradient capability; \
no non-analytic fallback is available (plan={the_plan}, \
declared gradient={:?})",
cap.gradient,
)));
}
let seed_eval = obj
.eval_with_order(&seed, OuterEvalOrder::ValueAndGradient)
.map_err(|err| into_objective_error("outer eval failed", err));
let seed_eval = match seed_eval {
Ok(seed_eval) => seed_eval,
Err(err) => {
let err = match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
};
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
let seed_eval = match finite_outer_first_order_eval_or_error(
"outer eval failed",
layout,
seed_eval,
)
.map_err(|err| match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
}) {
Ok(eval) => eval,
Err(err) => {
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
started_seeds += 1;
seed_slot = started_seeds;
let (lo, hi) = &bounds_template;
let bounds = Bounds::new(lo.clone(), hi.clone(), 1e-6)
.expect("outer rho bounds must be valid");
let scaled_tol = outer_scaled_tolerance(config.tolerance, seed_eval.cost);
let tol = Tolerance::new(scaled_tol).expect("outer tolerance must be valid");
let max_iter =
MaxIterations::new(config.max_iter).expect("outer max_iter must be valid");
let objective = OuterFirstOrderBridge {
obj,
layout,
outer_inner_cap: config.outer_inner_cap.clone(),
iter_count: 0,
g_norm_initial: None,
last_g_norm: None,
};
let mut optimizer = Bfgs::new(seed.clone(), objective)
.with_bounds(bounds)
.with_tolerance(tol)
.with_max_iterations(max_iter);
let bfgs_start = std::time::Instant::now();
let outcome = optimizer.run();
let bfgs_elapsed = bfgs_start.elapsed().as_secs_f64();
match &outcome {
Ok(sol) => log::info!(
"[OUTER summary] BFGS converged in {} iters elapsed={:.3}s final_value={:.6e}",
sol.iterations,
bfgs_elapsed,
sol.final_value
),
Err(BfgsError::MaxIterationsReached { last_solution }) => log::info!(
"[OUTER summary] BFGS hit max_iter in {} iters elapsed={:.3}s final_value={:.6e}",
last_solution.iterations,
bfgs_elapsed,
last_solution.final_value
),
Err(BfgsError::LineSearchFailed { last_solution, .. }) => log::info!(
"[OUTER summary] BFGS line-search failed in {} iters elapsed={:.3}s final_value={:.6e}",
last_solution.iterations,
bfgs_elapsed,
last_solution.final_value
),
Err(e) => log::info!(
"[OUTER summary] BFGS failed elapsed={:.3}s err={:?}",
bfgs_elapsed,
e
),
}
match outcome {
Ok(sol) => Ok(solution_into_outer_result(sol, true, *the_plan)),
Err(BfgsError::MaxIterationsReached { last_solution }) => {
Ok(solution_into_outer_result(*last_solution, false, *the_plan))
}
Err(BfgsError::LineSearchFailed { last_solution, .. }) => {
Ok(solution_into_outer_result(*last_solution, false, *the_plan))
}
Err(e) => Err(EstimationError::RemlOptimizationFailed(format!(
"BFGS solver failed: {e:?}"
))),
}
}
Solver::Efs => {
let seed_eval = obj
.eval_efs(&seed)
.map_err(|err| into_objective_error("outer EFS eval failed", err));
let seed_eval = match seed_eval {
Ok(seed_eval) => seed_eval,
Err(err) => {
let err = match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
};
if requests_immediate_first_order_fallback(&err.to_string()) {
return Err(err);
}
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
let seed_eval =
finite_efs_eval_or_error("outer EFS eval failed", layout, seed_eval).map_err(
|err| match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
},
);
if let Err(err) = seed_eval {
if requests_immediate_first_order_fallback(&err.to_string()) {
return Err(err);
}
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
started_seeds += 1;
seed_slot = started_seeds;
let (lo, hi) = &bounds_template;
let bounds = Bounds::new(lo.clone(), hi.clone(), 1e-6)
.expect("outer rho bounds must be valid");
let tol = Tolerance::new(config.tolerance).expect("outer tolerance must be valid");
let max_iter =
MaxIterations::new(config.max_iter).expect("outer max_iter must be valid");
let objective = OuterFixedPointBridge {
obj,
layout,
barrier_config: cap.barrier_config.clone(),
consecutive_psi_zero_iters: 0,
};
let mut optimizer = FixedPoint::new(seed.clone(), objective)
.with_bounds(bounds)
.with_tolerance(tol)
.with_max_iterations(max_iter);
match optimizer.run() {
Ok(sol) => Ok(solution_into_outer_result(sol, true, *the_plan)),
Err(FixedPointError::MaxIterationsReached { last_solution }) => {
Ok(solution_into_outer_result(*last_solution, false, *the_plan))
}
Err(e) => Err(EstimationError::RemlOptimizationFailed(format!(
"fixed-point solver failed: {e:?}"
))),
}
}
Solver::HybridEfs => {
let seed_eval = obj
.eval_efs(&seed)
.map_err(|err| into_objective_error("outer EFS eval failed", err));
let seed_eval = match seed_eval {
Ok(seed_eval) => seed_eval,
Err(err) => {
let err = match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
};
if requests_immediate_first_order_fallback(&err.to_string()) {
return Err(err);
}
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
};
let seed_eval =
finite_efs_eval_or_error("outer EFS eval failed", layout, seed_eval).map_err(
|err| match err {
ObjectiveEvalError::Recoverable { message }
| ObjectiveEvalError::Fatal { message } => {
EstimationError::RemlOptimizationFailed(message)
}
},
);
if let Err(err) = seed_eval {
if requests_immediate_first_order_fallback(&err.to_string()) {
return Err(err);
}
log::warn!(
"[OUTER] {context}: rejecting seed {seed_idx} before solver start: {err}"
);
rejection_reasons.push((seed_idx, "validation", err.to_string()));
continue 'seed_attempts;
}
started_seeds += 1;
seed_slot = started_seeds;
let (lo, hi) = &bounds_template;
let bounds = Bounds::new(lo.clone(), hi.clone(), 1e-6)
.expect("outer rho bounds must be valid");
let tol = Tolerance::new(config.tolerance).expect("outer tolerance must be valid");
let max_iter =
MaxIterations::new(config.max_iter).expect("outer max_iter must be valid");
let objective = OuterFixedPointBridge {
obj,
layout,
barrier_config: cap.barrier_config.clone(),
consecutive_psi_zero_iters: 0,
};
let mut optimizer = FixedPoint::new(seed.clone(), objective)
.with_bounds(bounds)
.with_tolerance(tol)
.with_max_iterations(max_iter);
match optimizer.run() {
Ok(sol) => Ok(solution_into_outer_result(sol, true, *the_plan)),
Err(FixedPointError::MaxIterationsReached { last_solution }) => {
Ok(solution_into_outer_result(*last_solution, false, *the_plan))
}
Err(e) => Err(EstimationError::RemlOptimizationFailed(format!(
"hybrid EFS solver failed: {e:?}"
))),
}
}
Solver::CompassSearch => {
let projected_seed = project_to_bounds(seed, Some(&bounds_template));
let seed_cost = obj.eval_cost(&projected_seed).map_err(|err| {
EstimationError::RemlOptimizationFailed(format!(
"aux direct-search seed cost failed ({context}): {err}"
))
})?;
if !seed_cost.is_finite() {
rejection_reasons.push((
seed_idx,
"validation",
format!("aux direct-search rejects non-finite seed cost ({seed_cost})"),
));
continue 'seed_attempts;
}
started_seeds += 1;
seed_slot = started_seeds;
let (lo, hi) = &bounds_template;
let outcome = compass_search_outer(
obj,
projected_seed,
seed_cost,
lo.view(),
hi.view(),
1.0,
config.tolerance,
config.max_iter,
);
match outcome {
CompassSearchOutcome::Converged { point, cost, polls } => Ok(OuterResult {
rho: point,
final_value: cost,
iterations: polls,
final_grad_norm: 0.0,
final_gradient: None,
final_hessian: None,
converged: true,
plan_used: *the_plan,
operator_trust_radius: None,
operator_stop_reason: None,
}),
CompassSearchOutcome::BudgetExhausted { point, cost, polls } => {
Ok(OuterResult {
rho: point,
final_value: cost,
iterations: polls,
final_grad_norm: 0.0,
final_gradient: None,
final_hessian: None,
converged: false,
plan_used: *the_plan,
operator_trust_radius: None,
operator_stop_reason: None,
})
}
}
}
};
let seed_elapsed = t_seed_start.elapsed().as_secs_f64();
match result {
Ok(candidate) => {
let candidate_converged = candidate.converged;
log::debug!(
"[outer-timing] seed {}/{} ({:?}): {:.3}s cost={:.6e} converged={}",
seed_slot,
seed_budget,
the_plan.solver,
seed_elapsed,
candidate.final_value,
candidate.converged,
);
if candidate_improves_best(&candidate, best.as_ref()) {
best = Some(candidate);
}
if best.as_ref().is_some_and(|b| b.converged) {
break;
}
if !candidate_converged && matches!(expensive_seed_limit, Some(limit) if limit > 0)
{
unsuccessful_expensive_seeds += 1;
if let Some(limit) = expensive_seed_limit
&& unsuccessful_expensive_seeds >= limit
{
log::info!(
"[OUTER] {context}: stopping expensive multi-start after {} non-converged {:?} seed(s)",
unsuccessful_expensive_seeds,
the_plan.solver,
);
stopped_early_due_to_limit = true;
break;
}
}
}
Err(e) => {
if requests_immediate_first_order_fallback(&e.to_string()) {
return Err(e);
}
log::debug!(
"[outer-timing] seed {}/{} ({:?}): {:.3}s FAILED: {}",
seed_slot,
seed_budget,
the_plan.solver,
seed_elapsed,
e,
);
rejection_reasons.push((seed_idx, "solver", e.to_string()));
if let Some(limit) = expensive_seed_limit {
unsuccessful_expensive_seeds += 1;
if unsuccessful_expensive_seeds >= limit {
log::info!(
"[OUTER] {context}: stopping expensive multi-start after {} failed {:?} seed(s)",
unsuccessful_expensive_seeds,
the_plan.solver,
);
stopped_early_due_to_limit = true;
break;
}
}
}
}
}
best.ok_or_else(|| {
let n_generated = seeds.len();
let n_attempted = n_generated.min(seed_budget);
let n_rejected = rejection_reasons.len();
let breakdown = if rejection_reasons.is_empty() {
String::new()
} else {
let joined = rejection_reasons
.iter()
.map(|(idx, phase, msg)| format!("seed {idx} ({phase}): {msg}"))
.collect::<Vec<_>>()
.join("; ");
format!("; reasons: [{joined}]")
};
let early_stop_note = if stopped_early_due_to_limit {
format!(
"; stopped early after {unsuccessful_expensive_seeds} consecutive \
non-converged {:?} seed(s) (expensive_unsuccessful_seed_limit)",
the_plan.solver
)
} else {
String::new()
};
if started_seeds == 0 {
EstimationError::RemlOptimizationFailed(format!(
"no candidate seeds passed outer startup validation ({context}); \
generated={n_generated}, attempted={n_attempted}, rejected={n_rejected}{breakdown}"
))
} else {
EstimationError::RemlOptimizationFailed(format!(
"all {started_seeds} seed candidates failed ({context}); \
generated={n_generated}, attempted={n_attempted}, \
started_in_solver={started_seeds}, rejected={n_rejected}\
{early_stop_note}{breakdown}"
))
}
})
}
#[cfg(test)]
mod tests {
use super::*;
use ::opt::FixedPointObjective;
use ndarray::array;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
#[test]
fn derive_initial_trust_radius_clamps_huge_gradient_to_floor() {
let r = derive_initial_trust_radius_from_seed(-1.1e10, 3.3e17);
let expected = 1.1e10 / 3.3e17;
assert!(
(r - expected).abs() < expected * 1e-10,
"expected {expected:.3e}, got {r:.3e}"
);
assert!(r >= OPERATOR_TRUST_RADIUS_REJECT_FLOOR * 10.0);
assert!(r <= OPERATOR_TRUST_RADIUS_INIT);
let clamped = derive_initial_trust_radius_from_seed(1.0, 1e30);
assert!((clamped - OPERATOR_TRUST_RADIUS_REJECT_FLOOR * 10.0).abs() < 1e-15);
}
#[test]
fn derive_initial_trust_radius_preserves_default_for_small_gradient() {
let r = derive_initial_trust_radius_from_seed(10.0, 0.1);
assert!((r - OPERATOR_TRUST_RADIUS_INIT).abs() < 1e-15);
let r = derive_initial_trust_radius_from_seed(5.0, 5.0);
assert!((r - 1.0).abs() < 1e-15);
}
#[test]
fn derive_initial_trust_radius_handles_degenerate_gradient() {
let r = derive_initial_trust_radius_from_seed(100.0, 0.0);
assert!((r - OPERATOR_TRUST_RADIUS_INIT).abs() < 1e-15);
let r = derive_initial_trust_radius_from_seed(100.0, f64::NAN);
assert!((r - OPERATOR_TRUST_RADIUS_INIT).abs() < 1e-15);
let r = derive_initial_trust_radius_from_seed(100.0, f64::INFINITY);
assert!((r - OPERATOR_TRUST_RADIUS_INIT).abs() < 1e-15);
}
#[test]
fn snap_on_rejection_fires_for_catastrophic_pred_dec() {
let cost = 3.124e6_f64;
let pred_dec = 2.548e36_f64; let derived_radius = 1e-7_f64; let halve_default = 0.15_f64;
let r = snap_recovery_radius_on_rejection(cost, pred_dec, derived_radius, halve_default);
assert_eq!(
r, derived_radius,
"snap should jump to derived radius when mismatch is extreme"
);
}
#[test]
fn snap_on_rejection_no_op_for_normal_rejection() {
let cost = 1.0_f64;
let pred_dec = 100.0_f64;
let derived_radius = 1e-3_f64;
let halve_default = 0.5_f64;
let r = snap_recovery_radius_on_rejection(cost, pred_dec, derived_radius, halve_default);
assert_eq!(
r, halve_default,
"snap must NOT fire for normal-magnitude rejections"
);
}
#[test]
fn snap_on_rejection_no_op_when_derived_close_to_halve() {
let cost = 1e6_f64;
let pred_dec = 1e20_f64; let derived_radius = 0.4_f64; let halve_default = 0.5_f64;
let r = snap_recovery_radius_on_rejection(cost, pred_dec, derived_radius, halve_default);
assert_eq!(
r, halve_default,
"snap requires derived < default*0.5 to fire"
);
}
#[test]
fn snap_on_rejection_handles_degenerate_pred_dec() {
let cost = 100.0_f64;
let derived_radius = 1e-6_f64;
let halve_default = 0.5_f64;
let r = snap_recovery_radius_on_rejection(cost, f64::NAN, derived_radius, halve_default);
assert_eq!(r, halve_default);
let r = snap_recovery_radius_on_rejection(cost, -1e10, derived_radius, halve_default);
assert_eq!(r, halve_default);
}
#[test]
fn outer_scaled_tolerance_matches_textbook_form() {
let trivial = outer_scaled_tolerance(1e-6, 0.0);
assert!((trivial - 1e-6).abs() < 1e-18);
let small = outer_scaled_tolerance(1e-6, 0.5);
assert!((small - 1.5e-6).abs() < 1e-18);
let biobank = outer_scaled_tolerance(1e-6, -1.6e5);
let expected = 1e-6 * (1.0 + 1.6e5);
assert!((biobank - expected).abs() < 1e-12);
assert!(
biobank > 0.1,
"biobank tolerance must be substantially larger than the absolute 1e-6: got {biobank:.6e}"
);
}
#[test]
fn outer_scaled_tolerance_is_scale_invariant_at_large_cost() {
let base_tol = 1e-7;
let v_base: f64 = 1.0e6;
let v_scaled: f64 = v_base * 1000.0;
let ratio_base = outer_scaled_tolerance(base_tol, v_base) / (1.0 + v_base);
let ratio_scaled = outer_scaled_tolerance(base_tol, v_scaled) / (1.0 + v_scaled);
assert!(
(ratio_base - ratio_scaled).abs() < 1e-15,
"scale ratio must be invariant: base={ratio_base:.3e} scaled={ratio_scaled:.3e}"
);
}
#[test]
fn pinned_axes_mask_detects_corner_points() {
let lower = array![0.0, 0.0, -10.0, 0.0];
let upper = array![1.0, 1.0, 10.0, 1.0];
let bounds = (lower, upper);
let x = array![0.0, 1.0, 0.0, 1.0 - 1e-15];
let mask = pinned_axes_mask(&x, Some(&bounds));
assert_eq!(mask, vec![true, true, false, true]);
assert_eq!(pinned_axes_mask(&x, None), vec![false; 4]);
}
#[test]
fn clipped_axes_mask_marks_projection_changes() {
let raw = array![1.5, 0.5, -0.2, 0.7];
let projected = array![1.0, 0.5, 0.0, 0.7];
let mask = clipped_axes_mask(&raw, &projected);
assert_eq!(mask, vec![true, false, true, false]);
let mask = clipped_axes_mask(&projected, &projected);
assert_eq!(mask, vec![false; 4]);
}
#[test]
fn union_active_with_pinned_is_logical_or() {
let base = vec![false, true, false, false];
let pinned = vec![false, false, true, false];
let merged = union_active_with_pinned(&base, &pinned);
assert_eq!(merged, vec![false, true, true, false]);
}
#[test]
fn active_set_step_unblocks_corner_clamp_on_4d_quadratic() {
struct IdHess(usize);
impl OuterHessianOperator for IdHess {
fn dim(&self) -> usize {
self.0
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(v.clone())
}
}
let n = 4;
let op = IdHess(n);
let lower = array![0.0, -10.0, -10.0, -10.0];
let upper = array![1.0, 10.0, 10.0, 10.0];
let bounds = (lower.clone(), upper.clone());
let x_k = array![0.5, 0.0, 0.0, 0.0];
let g = array![-0.3, 0.5, -0.5, 0.3];
let g_proj = projected_gradient(&x_k, &g, Some(&bounds));
for idx in 0..n {
assert!((g_proj[idx] - g[idx]).abs() < 1e-15);
}
let base_active = active_mask(&x_k, &g, Some(&bounds));
assert_eq!(
base_active,
vec![false; n],
"interior x_k must yield empty KKT active mask"
);
let s_unclipped = array![1.0, -0.2, 0.2, -0.1];
let x_raw = &x_k + &s_unclipped;
let x_proj = project_to_bounds(&x_raw, Some(&bounds));
let clipped = clipped_axes_mask(&x_raw, &x_proj);
let pinned = pinned_axes_mask(&x_k, Some(&bounds));
let extra: Vec<bool> = clipped
.iter()
.zip(pinned.iter())
.map(|(c, p)| *c || *p)
.collect();
assert_eq!(extra, vec![true, false, false, false]);
let trust_radius = 0.5_f64;
let result = compute_active_set_step(&op, &g_proj, &base_active, &extra, trust_radius)
.expect("compute_active_set_step must not error");
let (step, pred, extended) = result.expect(
"active-set step must produce a feasible reduced-space step on this 4D quadratic",
);
assert_eq!(extended, vec![true, false, false, false]);
assert_eq!(step[0], 0.0, "active-set step must be zero on pinned axis");
assert!(
pred > 0.0,
"expected positive predicted decrease, got {pred}"
);
let s_norm = step.dot(&step).sqrt();
assert!(
s_norm <= trust_radius * (1.0 + 1e-9),
"active-set step must respect trust radius: ‖s‖={s_norm:.3e} > Δ={trust_radius:.3e}"
);
let reduced_norm = (1..n).map(|i| step[i] * step[i]).sum::<f64>().sqrt();
assert!(
reduced_norm > 0.0,
"active-set step must produce non-zero motion in the unpinned subspace"
);
}
#[test]
fn active_set_step_returns_none_at_true_corner() {
struct IdHess(usize);
impl OuterHessianOperator for IdHess {
fn dim(&self) -> usize {
self.0
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(v.clone())
}
}
let op = IdHess(2);
let g = array![1.0, -1.0];
let base = vec![true, true];
let extra = vec![true, true];
let result = compute_active_set_step(&op, &g, &base, &extra, 0.5).expect("must not error");
assert!(result.is_none());
}
#[test]
fn active_set_step_returns_none_when_extended_matches_base() {
struct IdHess(usize);
impl OuterHessianOperator for IdHess {
fn dim(&self) -> usize {
self.0
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(v.clone())
}
}
let op = IdHess(3);
let g = array![0.1, -0.2, 0.3];
let base = vec![true, false, false];
let extra = vec![false, false, false];
let result = compute_active_set_step(&op, &g, &base, &extra, 0.5).expect("must not error");
assert!(result.is_none());
}
struct FailingSeedMaterializationOperator {
dim: usize,
}
impl OuterHessianOperator for FailingSeedMaterializationOperator {
fn dim(&self) -> usize {
self.dim
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(v.clone())
}
fn is_cheap_to_materialize(&self) -> bool {
true
}
fn materialize_dense(&self) -> Result<Array2<f64>, String> {
Err("seed materialization failed".to_string())
}
}
struct IdentityOuterHessianOperator;
impl OuterHessianOperator for IdentityOuterHessianOperator {
fn dim(&self) -> usize {
1
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
Ok(v.clone())
}
}
#[test]
fn materialize_dense_uses_single_batched_mul_mat() {
struct BatchedOnlyHessian {
matrix: Array2<f64>,
matvec_calls: Arc<AtomicUsize>,
mul_mat_calls: Arc<AtomicUsize>,
rhs_columns: Arc<AtomicUsize>,
}
impl OuterHessianOperator for BatchedOnlyHessian {
fn dim(&self) -> usize {
self.matrix.nrows()
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
self.matvec_calls.fetch_add(1, Ordering::Relaxed);
Ok(self.matrix.dot(v))
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
self.mul_mat_calls.fetch_add(1, Ordering::Relaxed);
self.rhs_columns
.fetch_add(factor.ncols(), Ordering::Relaxed);
Ok(self.matrix.dot(&factor))
}
}
let matvec_calls = Arc::new(AtomicUsize::new(0));
let mul_mat_calls = Arc::new(AtomicUsize::new(0));
let rhs_columns = Arc::new(AtomicUsize::new(0));
let op = BatchedOnlyHessian {
matrix: array![[2.0, 0.25, -0.5], [0.5, 3.0, 1.0], [-0.25, 2.0, 4.0]],
matvec_calls: Arc::clone(&matvec_calls),
mul_mat_calls: Arc::clone(&mul_mat_calls),
rhs_columns: Arc::clone(&rhs_columns),
};
let dense = op
.materialize_dense()
.expect("batched dense materialization");
let expected = array![[2.0, 0.375, -0.375], [0.375, 3.0, 1.5], [-0.375, 1.5, 4.0]];
assert_eq!(dense, expected);
assert_eq!(
mul_mat_calls.load(Ordering::Relaxed),
1,
"dense materialization must batch all identity columns into one mul_mat call"
);
assert_eq!(
rhs_columns.load(Ordering::Relaxed),
3,
"the single batched materialization call must include every identity RHS"
);
assert_eq!(
matvec_calls.load(Ordering::Relaxed),
0,
"operators with batched mul_mat must not be probed column-by-column"
);
}
#[test]
fn plan_analytic_hessian_selects_arc() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 3,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn plan_prefer_gradient_only_does_not_hide_analytic_hessian() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 3,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: true,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn plan_survival_baseline_exact_hessian_selects_arc() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 3,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn plan_no_hessian_few_params_selects_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 3,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_no_hessian_many_params_selects_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 12,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_boundary_8_params_uses_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: SMALL_OUTER_BFGS_MAX_PARAMS,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_boundary_9_params_uses_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: SMALL_OUTER_BFGS_MAX_PARAMS + 1,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_efs_selected_for_penalty_like_many_params() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Efs);
assert_eq!(p.hessian_source, HessianSource::EfsFixedPoint);
}
#[test]
fn plan_penalty_like_without_fixed_point_stays_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_efs_not_selected_few_params_even_if_penalty_like() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 5,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_efs_not_selected_with_analytic_hessian() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 20,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
}
#[test]
fn plan_efs_with_no_gradient_penalty_like_many_params() {
let cap = OuterCapability {
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
n_params: 20,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Efs);
assert_eq!(p.hessian_source, HessianSource::EfsFixedPoint);
}
#[test]
fn plan_efs_allowed_with_barrier_config() {
let barrier = BarrierConfig {
tau: 1e-6,
constrained_indices: vec![0, 1],
lower_bounds: vec![0.0, 0.0],
};
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: Some(barrier),
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Efs);
assert_eq!(p.hessian_source, HessianSource::EfsFixedPoint);
}
#[test]
fn plan_efs_allowed_with_barrier_config_no_gradient() {
let barrier = BarrierConfig {
tau: 1e-6,
constrained_indices: vec![0],
lower_bounds: vec![0.0],
};
let cap = OuterCapability {
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
n_params: 20,
psi_dim: 0,
fixed_point_available: true,
barrier_config: Some(barrier),
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Efs);
assert_eq!(p.hessian_source, HessianSource::EfsFixedPoint);
}
#[test]
fn barrier_curvature_significant_blocks_efs_at_runtime() {
let barrier = BarrierConfig {
tau: 1e-6,
constrained_indices: vec![0],
lower_bounds: vec![0.0],
};
let beta_near = Array1::from_vec(vec![0.001]);
assert!(barrier.barrier_curvature_is_significant(&beta_near, 1.0, 0.01));
let beta_far = Array1::from_vec(vec![10.0]);
assert!(!barrier.barrier_curvature_is_significant(&beta_far, 1.0, 0.01));
}
#[test]
fn hessian_result_unwrap_analytic() {
let h = Array2::<f64>::eye(3);
let result = HessianResult::Analytic(h.clone());
assert!(result.is_analytic());
let extracted = result.unwrap_analytic();
assert_eq!(extracted, h);
}
#[test]
#[should_panic(expected = "expected analytic Hessian")]
fn hessian_result_unwrap_unavailable_panics() {
let result = HessianResult::Unavailable;
result.unwrap_analytic();
}
#[test]
fn zero_params_selects_arc() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 0,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn hessian_result_into_option() {
let h = Array2::<f64>::eye(2);
let result = HessianResult::Analytic(h.clone());
assert_eq!(result.into_option(), Some(h));
let result = HessianResult::Unavailable;
assert_eq!(result.into_option(), None);
}
#[test]
fn closure_objective_delegates() {
let mut obj = ClosureObjective {
state: 42_i32,
cap: OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 1,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
},
cost_fn: |_: &mut i32, _: &Array1<f64>| Ok(1.0),
eval_fn: |_: &mut i32, _: &Array1<f64>| {
Ok(OuterEval {
cost: 1.0,
gradient: Array1::zeros(1),
hessian: HessianResult::Unavailable,
})
},
eval_order_fn: None::<
fn(&mut i32, &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
>,
reset_fn: Some(|st: &mut i32| {
*st = 42;
}),
efs_fn: None::<fn(&mut i32, &Array1<f64>) -> Result<EfsEval, EstimationError>>,
screening_proxy_fn: None::<fn(&mut i32, &Array1<f64>) -> Result<f64, EstimationError>>,
};
assert_eq!(obj.capability().n_params, 1);
assert_eq!(obj.eval_cost(&Array1::zeros(1)).unwrap(), 1.0);
}
#[test]
fn hybrid_efs_backtracking_uses_half_step_after_first_rejection() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 12,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let mut obj = ClosureObjective {
state: (),
cap: cap.clone(),
cost_fn: |_: &mut (), theta: &Array1<f64>| {
let psi = theta[11];
let cost = if (psi - 0.0).abs() < 1e-12 {
1.0
} else if (psi - 0.5).abs() < 1e-12 {
0.5
} else {
2.0
};
Ok(cost)
},
eval_fn: |_: &mut (), theta: &Array1<f64>| {
Ok(OuterEval {
cost: theta[11].abs(),
gradient: Array1::zeros(theta.len()),
hessian: HessianResult::Unavailable,
})
},
eval_order_fn: None::<
fn(&mut (), &Array1<f64>, OuterEvalOrder) -> Result<OuterEval, EstimationError>,
>,
reset_fn: None::<fn(&mut ())>,
efs_fn: Some(|_: &mut (), theta: &Array1<f64>| {
let mut steps = vec![0.0; theta.len()];
steps[11] = 1.0;
Ok(EfsEval {
cost: 1.0,
steps,
beta: None,
psi_gradient: Some(array![1.0]),
psi_indices: Some(vec![11]),
})
}),
screening_proxy_fn: None::<fn(&mut (), &Array1<f64>) -> Result<f64, EstimationError>>,
};
let mut bridge = OuterFixedPointBridge {
obj: &mut obj,
layout: cap.theta_layout(),
barrier_config: None,
consecutive_psi_zero_iters: 0,
};
let sample = bridge
.eval_step(&Array1::zeros(cap.n_params))
.expect("hybrid EFS step should backtrack cleanly");
assert_eq!(sample.status, FixedPointStatus::Continue);
assert_eq!(sample.step.len(), cap.n_params);
assert_eq!(sample.step[11], 0.5);
assert!(
sample
.step
.iter()
.enumerate()
.all(|(idx, &value)| idx == 11 || value == 0.0)
);
}
#[test]
fn run_bfgs_mode_aware_eval_skips_hessian_work() {
let seen_orders = Arc::new(Mutex::new(Vec::new()));
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_initial_rho(array![1.0])
.with_max_iter(1);
let mut obj = problem.build_objective_with_eval_order(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0] * theta[0]),
|_: &mut (), _: &Array1<f64>| {
Err(EstimationError::InvalidInput(
"legacy eager eval should not run on BFGS".to_string(),
))
},
{
let seen_orders = Arc::clone(&seen_orders);
move |_: &mut (), theta: &Array1<f64>, order: OuterEvalOrder| {
seen_orders.lock().unwrap().push(order);
Ok(OuterEval {
cost: theta[0] * theta[0],
gradient: array![2.0 * theta[0]],
hessian: HessianResult::Unavailable,
})
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "mode-aware bfgs first order")
.expect("BFGS should use the order-aware first-order bridge");
assert_eq!(result.plan_used.solver, Solver::Bfgs);
let seen_orders = seen_orders.lock().unwrap();
assert!(
!seen_orders.is_empty(),
"mode-aware eval hook should have been used"
);
assert!(
seen_orders
.iter()
.all(|order| *order == OuterEvalOrder::ValueAndGradient),
"BFGS should request only value+gradient, saw {seen_orders:?}"
);
}
#[test]
fn outer_second_order_bridge_separates_first_and_second_order_requests() {
let seen_orders = Arc::new(Mutex::new(Vec::new()));
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic);
let mut obj = problem.build_objective_with_eval_order(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0] * theta[0]),
|_: &mut (), _: &Array1<f64>| {
Err(EstimationError::InvalidInput(
"legacy eager eval should not run".to_string(),
))
},
{
let seen_orders = Arc::clone(&seen_orders);
move |_: &mut (), theta: &Array1<f64>, order: OuterEvalOrder| {
seen_orders.lock().unwrap().push(order);
Ok(OuterEval {
cost: theta[0] * theta[0],
gradient: array![2.0 * theta[0]],
hessian: match order {
OuterEvalOrder::ValueAndGradient => HessianResult::Unavailable,
OuterEvalOrder::ValueGradientHessian => {
HessianResult::Analytic(array![[2.0]])
}
},
})
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let mut bridge = OuterSecondOrderBridge {
obj: &mut obj,
layout: OuterThetaLayout::new(1, 0),
hessian_source: HessianSource::Analytic,
materialize_operator_max_dim: OUTER_HVP_MATERIALIZE_MAX_DIM,
eval_count: 0,
outer_inner_cap: None,
g_norm_initial: None,
last_g_norm: None,
};
let grad_sample =
FirstOrderObjective::eval_grad(&mut bridge, &array![1.0]).expect("grad eval");
assert_eq!(grad_sample.value, 1.0);
assert_eq!(grad_sample.gradient, array![2.0]);
let hess_sample =
SecondOrderObjective::eval_hessian(&mut bridge, &array![1.0]).expect("hessian eval");
assert_eq!(hess_sample.value, 1.0);
assert_eq!(hess_sample.gradient, array![2.0]);
assert_eq!(hess_sample.hessian, Some(array![[2.0]]));
let seen_orders = seen_orders.lock().unwrap();
assert!(
*seen_orders
== vec![
OuterEvalOrder::ValueAndGradient,
OuterEvalOrder::ValueGradientHessian
],
"second-order bridge should split first-order and second-order requests, saw {seen_orders:?}"
);
}
#[test]
fn outer_config_default() {
let cfg = OuterConfig::default();
assert_eq!(cfg.tolerance, 1e-5);
assert_eq!(cfg.max_iter, 200);
assert_eq!(cfg.rho_bound, 30.0);
}
#[test]
fn plan_hybrid_efs_selected_for_psi_coords_many_params() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::HybridEfs);
assert_eq!(p.hessian_source, HessianSource::HybridEfsFixedPoint);
}
#[test]
fn plan_psi_without_fixed_point_stays_bfgs() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 1,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_hybrid_efs_no_gradient_selected_for_psi_coords() {
let cap = OuterCapability {
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::HybridEfs);
assert_eq!(p.hessian_source, HessianSource::HybridEfsFixedPoint);
}
fn cap_for_routing(
gradient: Derivative,
hessian: Derivative,
n_params: usize,
) -> OuterCapability {
OuterCapability {
gradient,
hessian,
n_params,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
}
}
#[test]
fn routing_analytic_analytic_stays_arc_at_biobank_scale() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 6);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_analytic_analytic_stays_arc_at_dense_work_scale() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 3);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_unavailable_hessian_routes_to_bfgs() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Unavailable, 8);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn routing_explicit_prefer_gradient_only_does_not_override_exact_hessian() {
let mut cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 6);
cap.prefer_gradient_only = true;
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_log_line_arc_analytic_advertises_matrix_free() {
let p = OuterPlan {
solver: Solver::Arc,
hessian_source: HessianSource::Analytic,
};
let line = p.routing_log_line();
assert!(line.contains("solver=Arc"), "got {line}");
assert!(line.contains("hessian=Analytic"), "got {line}");
assert!(line.contains("matrix-free=true"), "got {line}");
}
#[test]
fn routing_log_line_bfgs_reports_no_matrix_free() {
let p = OuterPlan {
solver: Solver::Bfgs,
hessian_source: HessianSource::BfgsApprox,
};
let line = p.routing_log_line();
assert!(line.contains("solver=Bfgs"), "got {line}");
assert!(line.contains("hessian=BfgsApprox"), "got {line}");
assert!(line.contains("matrix-free=false"), "got {line}");
}
#[test]
fn routing_log_line_efs_reports_no_matrix_free() {
for source in [
HessianSource::EfsFixedPoint,
HessianSource::HybridEfsFixedPoint,
] {
let p = OuterPlan {
solver: Solver::Efs,
hessian_source: source,
};
assert!(
p.routing_log_line().contains("matrix-free=false"),
"{:?} should not advertise matrix-free",
source
);
}
}
#[test]
fn routing_custom_family_gamlss_stays_on_arc_when_both_derivs_analytic() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 4);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_matern_iso_kappa_stays_on_arc_when_both_derivs_analytic() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 5);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_matern_iso_large_kappa_dim_stays_on_arc_with_analytic_hessian() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 37,
psi_dim: 31,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn routing_marginal_slope_stays_on_arc_when_both_derivs_analytic() {
let cap = cap_for_routing(Derivative::Analytic, Derivative::Analytic, 3);
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn plan_hybrid_efs_not_selected_few_params() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 5,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn plan_exact_hvp_capability_selects_arc_even_when_fixed_point_is_available() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 64,
psi_dim: 16,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: true,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
assert_eq!(p.hessian_source, HessianSource::Analytic);
}
#[test]
fn plan_hybrid_efs_not_selected_with_analytic_hessian() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 20,
psi_dim: 1,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Arc);
}
#[test]
fn plan_pure_efs_not_hybrid_when_all_penalty_like() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Efs);
assert_eq!(p.hessian_source, HessianSource::EfsFixedPoint);
}
#[test]
fn automatic_fallbacks_preserve_analytic_hessian_for_arc_primary() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 12,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&cap).solver, Solver::Arc);
let attempts = automatic_fallback_attempts(&cap);
assert!(
attempts.is_empty(),
"ARC primary must not lateral-demote to BFGS+BfgsApprox; \
ARC budget retries live in the runner",
);
}
#[test]
fn automatic_fallbacks_from_efs_prefer_analytic_bfgs_over_fd() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&cap).solver, Solver::Efs);
let attempts = automatic_fallback_attempts(&cap);
assert!(!attempts.is_empty(), "EFS failure must have a fallback");
assert_eq!(attempts[0].gradient, Derivative::Analytic);
assert_eq!(attempts[0].hessian, Derivative::Unavailable);
assert!(attempts[0].disable_fixed_point);
assert_eq!(plan(&attempts[0]).solver, Solver::Bfgs);
assert!(
attempts.iter().all(|c| c.gradient == Derivative::Analytic),
"fallback cascade must stay on analytic-gradient attempts",
);
}
#[test]
fn automatic_fallbacks_from_hybrid_efs_prefer_analytic_bfgs_over_fd() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 2,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&cap).solver, Solver::HybridEfs);
let attempts = automatic_fallback_attempts(&cap);
assert!(!attempts.is_empty());
assert_eq!(attempts[0].gradient, Derivative::Analytic);
assert!(attempts[0].disable_fixed_point);
assert_eq!(plan(&attempts[0]).solver, Solver::Bfgs);
}
#[test]
fn disabled_fallback_hybrid_efs_capability_routes_to_bfgs_primary() {
let trapped_cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 9,
psi_dim: 6,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&trapped_cap).solver, Solver::HybridEfs);
let disabled_config = OuterConfig {
fallback_policy: FallbackPolicy::Disabled,
..OuterConfig::default()
};
let primary_cap = primary_capability_for_config(
trapped_cap.clone(),
&disabled_config,
"biobank exact adaptive",
);
assert!(primary_cap.disable_fixed_point);
assert_eq!(plan(&primary_cap).solver, Solver::Bfgs);
let pure_efs_cap = OuterCapability {
psi_dim: 0,
..trapped_cap.clone()
};
assert_eq!(plan(&pure_efs_cap).solver, Solver::Efs);
let pure_primary_cap =
primary_capability_for_config(pure_efs_cap.clone(), &disabled_config, "pure EFS");
assert!(!pure_primary_cap.disable_fixed_point);
assert_eq!(plan(&pure_primary_cap).solver, Solver::Efs);
let no_gradient_cap = OuterCapability {
gradient: Derivative::Unavailable,
..trapped_cap.clone()
};
assert_eq!(plan(&no_gradient_cap).solver, Solver::HybridEfs);
let no_gradient_primary_cap = primary_capability_for_config(
no_gradient_cap.clone(),
&disabled_config,
"gradient-unavailable hybrid EFS",
);
assert!(!no_gradient_primary_cap.disable_fixed_point);
assert_eq!(plan(&no_gradient_primary_cap).solver, Solver::HybridEfs);
let automatic_config = OuterConfig::default();
let automatic_cap = primary_capability_for_config(
trapped_cap.clone(),
&automatic_config,
"biobank exact adaptive",
);
assert!(!automatic_cap.disable_fixed_point);
assert_eq!(plan(&automatic_cap).solver, Solver::HybridEfs);
let automatic_attempts = automatic_fallback_attempts(&trapped_cap);
assert!(!automatic_attempts.is_empty());
assert!(automatic_attempts[0].disable_fixed_point);
assert_eq!(plan(&automatic_attempts[0]).solver, Solver::Bfgs);
}
#[test]
fn disabled_fallback_hybrid_efs_problem_uses_bfgs_without_calling_efs() {
let efs_calls = Arc::new(AtomicUsize::new(0));
let problem = OuterProblem::new(9)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_psi_dim(6)
.with_fallback_policy(FallbackPolicy::Disabled)
.with_initial_rho(Array1::zeros(9))
.with_max_iter(5);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(0.5 * theta.dot(theta)),
|_: &mut (), theta: &Array1<f64>| {
Ok(OuterEval {
cost: 0.5 * theta.dot(theta),
gradient: theta.clone(),
hessian: HessianResult::Unavailable,
})
},
None::<fn(&mut ())>,
{
let efs_calls = Arc::clone(&efs_calls);
Some(move |_: &mut (), _: &Array1<f64>| {
efs_calls.fetch_add(1, Ordering::Relaxed);
Err(EstimationError::RemlOptimizationFailed(format!(
"{} synthetic biobank adaptive HybridEFS escape",
EFS_FIRST_ORDER_FALLBACK_MARKER,
)))
})
},
);
let result = problem
.run(&mut obj, "disabled fallback marker")
.expect("disabled-fallback HybridEFS-shaped problem should route directly to BFGS");
assert_eq!(result.plan_used.solver, Solver::Bfgs);
assert_eq!(
efs_calls.load(Ordering::Relaxed),
0,
"central primary-capability canonicalization should avoid the EFS hook entirely"
);
}
#[test]
fn automatic_fallbacks_without_gradient_stop_at_fixed_point_status() {
for (psi_dim, expected_solver) in [(0, Solver::Efs), (2, Solver::HybridEfs)] {
let cap = OuterCapability {
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&cap).solver, expected_solver);
assert!(
automatic_fallback_attempts(&cap).is_empty(),
"gradient-unavailable fixed-point capabilities must not fabricate a BFGS fallback",
);
}
}
#[test]
fn automatic_fallbacks_do_not_repeat_arc_when_fixed_point_is_irrelevant() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
assert_eq!(plan(&cap).solver, Solver::Arc);
let attempts = automatic_fallback_attempts(&cap);
assert!(
attempts.is_empty(),
"ARC primary with incidental fixed_point_available must not \
cascade through the EFS arm or lateral-demote to BFGS",
);
}
#[test]
fn plan_disable_fixed_point_forces_bfgs_even_when_efs_eligible() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 15,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: true,
};
let p = plan(&cap);
assert_eq!(p.solver, Solver::Bfgs);
assert_eq!(p.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn run_malformed_gradient_seed_surfaces_as_error() {
let problem = OuterProblem::new(2)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_initial_rho(Array1::zeros(2))
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), _: &Array1<f64>| Ok(0.0),
|_: &mut (), _: &Array1<f64>| {
Ok(OuterEval {
cost: 0.0,
gradient: Array1::zeros(1),
hessian: HessianResult::Unavailable,
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let err = problem
.run(&mut obj, "test gradient mismatch")
.expect_err("malformed analytic gradient must surface as error");
assert!(
matches!(err, EstimationError::RemlOptimizationFailed(_)),
"unexpected error variant: {err:?}",
);
}
#[test]
fn run_bfgs_ignores_malformed_hessian_payload() {
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_initial_rho(array![0.0])
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0] * theta[0]),
|_: &mut (), theta: &Array1<f64>| {
Ok(OuterEval {
cost: theta[0] * theta[0],
gradient: array![2.0 * theta[0]],
hessian: HessianResult::Analytic(array![[f64::NAN, 0.0], [0.0, 1.0]]),
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "bfgs should ignore malformed hessian payload")
.expect("valid first-order data should be enough for BFGS");
assert_eq!(result.plan_used.solver, Solver::Bfgs);
assert_eq!(result.plan_used.hessian_source, HessianSource::BfgsApprox);
}
#[test]
fn finite_outer_eval_reports_gradient_length_mismatch() {
let err = finite_outer_eval_or_error(
"test gradient mismatch",
OuterThetaLayout::new(2, 0),
OuterEval {
cost: 0.0,
gradient: Array1::zeros(1),
hessian: HessianResult::Unavailable,
},
)
.expect_err("gradient mismatch should be rejected");
let message = match err {
ObjectiveEvalError::Recoverable { message } | ObjectiveEvalError::Fatal { message } => {
message
}
};
assert!(
message.contains("outer gradient length mismatch"),
"unexpected error: {message}"
);
}
#[test]
fn run_with_initial_seed_still_considers_generated_candidates() {
let generated = crate::seeding::generate_rho_candidates(
1,
None,
&crate::seeding::SeedConfig::default(),
);
let valid_seed = generated
.first()
.expect("seed generator should yield at least one candidate")
.clone();
let expected_seed = valid_seed.clone();
let initial_seed = array![9.0];
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_seed_config(seed_config)
.with_initial_rho(initial_seed)
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
{
let valid_seed = valid_seed.clone();
move |_: &mut (), theta: &Array1<f64>| {
if theta == &valid_seed {
Ok(0.0)
} else {
Ok(f64::INFINITY)
}
}
},
move |_: &mut (), theta: &Array1<f64>| {
if theta == &valid_seed {
Ok(OuterEval {
cost: 0.0,
gradient: Array1::zeros(1),
hessian: HessianResult::Unavailable,
})
} else {
Ok(OuterEval::infeasible(theta.len()))
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "generated seed should remain reachable")
.expect("generated seed should still be eligible when an initial seed is provided");
assert_eq!(result.rho, expected_seed);
}
#[test]
fn run_indefinite_analytic_seed_stays_on_arc() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_initial_rho(array![0.0])
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0] * theta[0]),
|_: &mut (), _: &Array1<f64>| {
Ok(OuterEval {
cost: 0.0,
gradient: array![0.0],
hessian: HessianResult::Analytic(array![[-1.0]]),
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "indefinite seed geometry")
.expect("indefinite analytic seed geometry should stay on the second-order plan");
assert_eq!(result.plan_used.solver, Solver::Arc);
assert_eq!(result.plan_used.hessian_source, HessianSource::Analytic);
}
#[test]
fn run_seed_materialization_failure_surfaces_arc_error_verbatim() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_initial_rho(array![0.0])
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0] * theta[0]),
|_: &mut (), _: &Array1<f64>| {
Ok(OuterEval {
cost: 0.0,
gradient: array![0.0],
hessian: HessianResult::Operator(Arc::new(
FailingSeedMaterializationOperator { dim: 1 },
)),
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let err = problem
.run(&mut obj, "seed materialization failure")
.expect_err(
"ARC primary must surface the materialization failure verbatim — \
no lateral demote to BFGS+BfgsApprox",
);
let msg = err.to_string();
assert!(
msg.contains("seed materialization failed"),
"error must propagate the underlying materialization message; got: {msg}"
);
}
#[test]
fn run_nonconverged_arc_stays_on_arc_after_budget_retry_ladder() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_initial_rho(array![5.0])
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(theta[0].powi(4)),
|_: &mut (), theta: &Array1<f64>| {
let x = theta[0];
Ok(OuterEval {
cost: x.powi(4),
gradient: array![4.0 * x.powi(3)],
hessian: HessianResult::Analytic(array![[12.0 * x.powi(2)]]),
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "nonconverged arc should stay on arc")
.expect(
"ARC ladder must surface the last non-converged ARC result rather than \
demoting to BFGS+BfgsApprox",
);
assert_eq!(
result.plan_used.solver,
Solver::Arc,
"ARC primary must not lateral-demote after budget exhaustion"
);
assert_eq!(
result.plan_used.hessian_source,
HessianSource::Analytic,
"analytic outer Hessian must be preserved across the budget-bump retry ladder"
);
assert!(
!result.converged,
"test fixture is engineered so the ladder cannot converge; \
converged=true would mean the fixture stopped exercising the ladder"
);
}
#[test]
fn arc_budget_retry_preserves_operator_trust_radius() {
struct RejectingArcObjective {
trial_abs: Arc<std::sync::Mutex<Vec<f64>>>,
}
impl OuterObjective for RejectingArcObjective {
fn capability(&self) -> OuterCapability {
OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 1,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
}
}
fn eval_cost(&mut self, theta: &Array1<f64>) -> Result<f64, EstimationError> {
if theta[0] != 0.0 {
self.trial_abs
.lock()
.expect("trial recorder mutex poisoned")
.push(theta[0].abs());
}
Ok(-theta[0])
}
fn eval(&mut self, theta: &Array1<f64>) -> Result<OuterEval, EstimationError> {
self.eval_with_order(theta, OuterEvalOrder::ValueGradientHessian)
}
fn eval_with_order(
&mut self,
theta: &Array1<f64>,
_: OuterEvalOrder,
) -> Result<OuterEval, EstimationError> {
Ok(OuterEval {
cost: -theta[0],
gradient: array![1.0],
hessian: HessianResult::Operator(Arc::new(IdentityOuterHessianOperator)),
})
}
fn reset(&mut self) {}
}
let trial_abs = Arc::new(std::sync::Mutex::new(Vec::<f64>::new()));
let mut obj = RejectingArcObjective {
trial_abs: Arc::clone(&trial_abs),
};
let seed = array![0.0];
let layout = OuterThetaLayout::new(1, 0);
let plan = OuterPlan {
solver: Solver::Arc,
hessian_source: HessianSource::Analytic,
};
let initial_eval = obj
.eval_with_order(&seed, OuterEvalOrder::ValueGradientHessian)
.expect("initial eval");
let first = run_operator_trust_region(
&mut obj,
&seed,
layout,
None,
1e-12,
1,
initial_eval,
plan,
None,
)
.expect("first operator ARC attempt");
assert_eq!(
first.operator_stop_reason,
Some(OperatorTrustRegionStopReason::IterationBudget)
);
let retry_eval = obj
.eval_with_order(&first.rho, OuterEvalOrder::ValueGradientHessian)
.expect("retry eval");
let result = run_operator_trust_region(
&mut obj,
&first.rho,
layout,
None,
1e-12,
1,
retry_eval,
plan,
first.operator_trust_radius,
)
.expect("retry operator ARC attempt");
assert_eq!(
result.operator_stop_reason,
Some(OperatorTrustRegionStopReason::IterationBudget)
);
let trials = trial_abs.lock().expect("trial recorder mutex poisoned");
assert_eq!(trials.len(), 2, "unexpected trial evaluations: {trials:?}");
assert!(
(trials[0] - 1.0).abs() < 1e-12,
"first ARC attempt should use the default trust radius, got {trials:?}"
);
assert!(
(trials[1] - 0.5).abs() < 1e-12,
"retry must resume from the shrunken trust radius instead of replaying radius=1, got {trials:?}"
);
}
#[test]
fn operator_arc_rejected_trial_uses_cost_only() {
struct CountingRejectedTrialObjective {
cost_calls: Arc<std::sync::atomic::AtomicUsize>,
full_calls: Arc<std::sync::atomic::AtomicUsize>,
}
impl OuterObjective for CountingRejectedTrialObjective {
fn capability(&self) -> OuterCapability {
OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 1,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
}
}
fn eval_cost(&mut self, theta: &Array1<f64>) -> Result<f64, EstimationError> {
self.cost_calls
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(theta[0].abs())
}
fn eval(&mut self, theta: &Array1<f64>) -> Result<OuterEval, EstimationError> {
self.eval_with_order(theta, OuterEvalOrder::ValueGradientHessian)
}
fn eval_with_order(
&mut self,
theta: &Array1<f64>,
_: OuterEvalOrder,
) -> Result<OuterEval, EstimationError> {
self.full_calls
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(OuterEval {
cost: theta[0].abs(),
gradient: array![1.0],
hessian: HessianResult::Operator(Arc::new(IdentityOuterHessianOperator)),
})
}
fn reset(&mut self) {}
}
let cost_calls = Arc::new(std::sync::atomic::AtomicUsize::new(0));
let full_calls = Arc::new(std::sync::atomic::AtomicUsize::new(0));
let mut obj = CountingRejectedTrialObjective {
cost_calls: Arc::clone(&cost_calls),
full_calls: Arc::clone(&full_calls),
};
let seed = array![0.0];
let initial_eval = obj
.eval_with_order(&seed, OuterEvalOrder::ValueGradientHessian)
.expect("initial eval");
let result = run_operator_trust_region(
&mut obj,
&seed,
OuterThetaLayout::new(1, 0),
None,
1e-12,
1,
initial_eval,
OuterPlan {
solver: Solver::Arc,
hessian_source: HessianSource::Analytic,
},
None,
)
.expect("operator ARC attempt");
assert!(
!result.converged,
"single-iteration fixture should reject and exhaust its budget"
);
assert_eq!(
cost_calls.load(std::sync::atomic::Ordering::Relaxed),
1,
"one rejected trial should require exactly one cost-only evaluation"
);
assert_eq!(
full_calls.load(std::sync::atomic::Ordering::Relaxed),
1,
"rejected trial must not request gradient/Hessian; only initial eval should be full"
);
}
#[test]
fn operator_arc_reuses_dense_model_after_rejection() {
struct CountingIdentityOuterHessianOperator {
matvec_calls: Arc<std::sync::atomic::AtomicUsize>,
batched_mul_mat_calls: Arc<std::sync::atomic::AtomicUsize>,
}
impl OuterHessianOperator for CountingIdentityOuterHessianOperator {
fn dim(&self) -> usize {
1
}
fn matvec(&self, v: &Array1<f64>) -> Result<Array1<f64>, String> {
self.matvec_calls
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(v.clone())
}
fn mul_mat(&self, factor: ArrayView2<'_, f64>) -> Result<Array2<f64>, String> {
self.batched_mul_mat_calls
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(factor.to_owned())
}
fn materialization_capability(&self) -> OuterHessianMaterialization {
OuterHessianMaterialization::BatchedHvp
}
}
struct RejectingObjective {
matvec_calls: Arc<std::sync::atomic::AtomicUsize>,
batched_mul_mat_calls: Arc<std::sync::atomic::AtomicUsize>,
}
impl OuterObjective for RejectingObjective {
fn capability(&self) -> OuterCapability {
OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 1,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
}
}
fn eval_cost(&mut self, theta: &Array1<f64>) -> Result<f64, EstimationError> {
Ok(theta[0].abs())
}
fn eval(&mut self, theta: &Array1<f64>) -> Result<OuterEval, EstimationError> {
self.eval_with_order(theta, OuterEvalOrder::ValueGradientHessian)
}
fn eval_with_order(
&mut self,
theta: &Array1<f64>,
_: OuterEvalOrder,
) -> Result<OuterEval, EstimationError> {
Ok(OuterEval {
cost: theta[0].abs(),
gradient: array![1.0],
hessian: HessianResult::Operator(Arc::new(
CountingIdentityOuterHessianOperator {
matvec_calls: Arc::clone(&self.matvec_calls),
batched_mul_mat_calls: Arc::clone(&self.batched_mul_mat_calls),
},
)),
})
}
fn reset(&mut self) {}
}
let matvec_calls = Arc::new(std::sync::atomic::AtomicUsize::new(0));
let batched_mul_mat_calls = Arc::new(std::sync::atomic::AtomicUsize::new(0));
let mut obj = RejectingObjective {
matvec_calls: Arc::clone(&matvec_calls),
batched_mul_mat_calls: Arc::clone(&batched_mul_mat_calls),
};
let seed = array![0.0];
let initial_eval = obj
.eval_with_order(&seed, OuterEvalOrder::ValueGradientHessian)
.expect("initial eval");
let result = run_operator_trust_region(
&mut obj,
&seed,
OuterThetaLayout::new(1, 0),
None,
1e-12,
3,
initial_eval,
OuterPlan {
solver: Solver::Arc,
hessian_source: HessianSource::Analytic,
},
None,
)
.expect("operator ARC attempt");
assert!(!result.converged);
assert_eq!(
matvec_calls.load(std::sync::atomic::Ordering::Relaxed),
2,
"first rejected step uses two HVPs; batched dense materialization must not \
fall back to an extra per-column matvec"
);
assert_eq!(
batched_mul_mat_calls.load(std::sync::atomic::Ordering::Relaxed),
1,
"post-rejection dense ARC reuse must materialize through the operator's \
batched mul_mat path exactly once"
);
}
#[test]
fn candidate_selection_prefers_lower_cost_within_same_convergence_class() {
let nonconverged_hi = OuterResult {
rho: array![0.0],
final_value: 9.0,
iterations: 1,
final_grad_norm: 1.0,
final_gradient: None,
final_hessian: None,
converged: false,
plan_used: OuterPlan {
solver: Solver::Bfgs,
hessian_source: HessianSource::BfgsApprox,
},
operator_trust_radius: None,
operator_stop_reason: None,
};
let nonconverged_lo = OuterResult {
rho: array![1.0],
final_value: 1.0,
iterations: 1,
final_grad_norm: 1.0,
final_gradient: None,
final_hessian: None,
converged: false,
plan_used: OuterPlan {
solver: Solver::Bfgs,
hessian_source: HessianSource::BfgsApprox,
},
operator_trust_radius: None,
operator_stop_reason: None,
};
let converged = OuterResult {
rho: array![2.0],
final_value: 5.0,
iterations: 1,
final_grad_norm: 0.0,
final_gradient: None,
final_hessian: None,
converged: true,
plan_used: OuterPlan {
solver: Solver::Bfgs,
hessian_source: HessianSource::BfgsApprox,
},
operator_trust_radius: None,
operator_stop_reason: None,
};
assert!(candidate_improves_best(&nonconverged_hi, None));
assert!(candidate_improves_best(
&nonconverged_lo,
Some(&nonconverged_hi)
));
assert!(!candidate_improves_best(
&nonconverged_hi,
Some(&nonconverged_lo)
));
assert!(candidate_improves_best(&converged, Some(&nonconverged_lo)));
assert!(!candidate_improves_best(&nonconverged_lo, Some(&converged)));
}
#[test]
fn run_starts_solver_with_direct_startup_eval() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let calls = Arc::new(Mutex::new(Vec::new()));
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
{
let calls = Arc::clone(&calls);
move |_: &mut (), theta: &Array1<f64>| {
calls.lock().unwrap().push("cost");
Ok(theta[0] * theta[0])
}
},
{
let calls = Arc::clone(&calls);
move |_: &mut (), theta: &Array1<f64>| {
calls.lock().unwrap().push("eval");
Ok(OuterEval {
cost: theta[0] * theta[0],
gradient: array![2.0 * theta[0]],
hessian: HessianResult::Analytic(array![[2.0]]),
})
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
problem
.run(&mut obj, "solver should start from a direct startup eval")
.expect("analytic plans should start with a direct full evaluation");
let calls = calls.lock().unwrap();
let first_eval_idx = calls
.iter()
.position(|call| *call == "eval")
.expect("solver should eventually request a full eval");
assert!(
first_eval_idx == 0,
"startup should not perform a separate cost-screening pass first: {calls:?}"
);
}
#[test]
fn run_screening_reorders_expensive_seeds_before_full_startup_eval() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
seed_config.risk_profile = crate::seeding::SeedRiskProfile::GeneralizedLinear;
let screening_cap = Arc::new(AtomicUsize::new(0));
let initial_seed = array![9.0];
let valid_seed = crate::seeding::generate_rho_candidates(1, None, &seed_config)
.first()
.expect("seed generator should yield at least one candidate")
.clone();
let started = Arc::new(Mutex::new(Vec::new()));
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_screening_cap(Arc::clone(&screening_cap))
.with_initial_rho(initial_seed.clone())
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
{
let valid_seed = valid_seed.clone();
move |_: &mut (), theta: &Array1<f64>| {
if theta == &valid_seed {
Ok(0.0)
} else {
Ok(1000.0)
}
}
},
{
let valid_seed = valid_seed.clone();
let started = Arc::clone(&started);
move |_: &mut (), theta: &Array1<f64>| {
started.lock().unwrap().push(theta.clone());
if theta == &valid_seed {
Ok(OuterEval {
cost: 0.0,
gradient: array![0.0],
hessian: HessianResult::Analytic(array![[1.0]]),
})
} else {
Ok(OuterEval::infeasible(theta.len()))
}
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "screening should reorder expensive seeds")
.expect("screened startup should reach the best generated seed");
assert_eq!(result.rho, valid_seed);
assert_eq!(
started.lock().unwrap().first().cloned(),
Some(valid_seed),
"screening should move the lowest-cost seed to the front before full startup eval",
);
assert_eq!(screening_cap.load(std::sync::atomic::Ordering::Relaxed), 0);
}
#[test]
fn rank_seeds_cascade_escalates_when_initial_cap_collapses_all() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
seed_config.screen_max_inner_iterations = 3;
let screening_cap = Arc::new(AtomicUsize::new(0));
let initial_seed = array![5.0];
let valid_seed = crate::seeding::generate_rho_candidates(1, None, &seed_config)
.first()
.expect("seed generator should yield at least one candidate")
.clone();
let max_cap_seen = Arc::new(AtomicUsize::new(0));
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Analytic)
.with_seed_config(seed_config)
.with_screening_cap(Arc::clone(&screening_cap))
.with_initial_rho(initial_seed.clone())
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
{
let screening_cap = Arc::clone(&screening_cap);
let max_cap_seen = Arc::clone(&max_cap_seen);
let valid_seed = valid_seed.clone();
move |_: &mut (), theta: &Array1<f64>| {
let cap = screening_cap.load(Ordering::Relaxed);
max_cap_seen.fetch_max(cap, Ordering::Relaxed);
if cap > 0 && cap < 12 {
return Ok(f64::NAN);
}
if theta == &valid_seed {
Ok(0.0)
} else {
Ok(1000.0)
}
}
},
{
let valid_seed = valid_seed.clone();
move |_: &mut (), theta: &Array1<f64>| {
if theta == &valid_seed {
Ok(OuterEval {
cost: 0.0,
gradient: array![0.0],
hessian: HessianResult::Analytic(array![[1.0]]),
})
} else {
Ok(OuterEval::infeasible(theta.len()))
}
}
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let _ = problem
.run(&mut obj, "cascade should escalate")
.expect("cascade should reach a finite cost at the 4× cap stage");
let max_cap = max_cap_seen.load(Ordering::Relaxed);
assert_eq!(
max_cap, 12,
"cascade should stop at the 4× cap stage; observed max cap = {max_cap}"
);
assert_eq!(
screening_cap.load(Ordering::Relaxed),
0,
"screening cap must be restored to its previous value after cascade"
);
}
#[test]
fn run_efs_skips_global_cost_screening() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.max_seeds = 6;
seed_config.seed_budget = 1;
let screening_calls = Arc::new(AtomicUsize::new(0));
let problem = OuterProblem::new(15)
.with_gradient(Derivative::Unavailable)
.with_hessian(Derivative::Unavailable)
.with_seed_config(seed_config)
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
{
let screening_calls = Arc::clone(&screening_calls);
move |_: &mut (), _: &Array1<f64>| {
screening_calls.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(0.0)
}
},
|_: &mut (), theta: &Array1<f64>| Ok(OuterEval::infeasible(theta.len())),
None::<fn(&mut ())>,
Some(|_: &mut (), theta: &Array1<f64>| {
Ok(EfsEval {
cost: 0.0,
steps: vec![0.0; theta.len()],
beta: None,
psi_gradient: None,
psi_indices: None,
})
}),
);
problem
.run(
&mut obj,
"EFS should not use a separate global cost-screening pass",
)
.expect("first generated EFS seed should be sufficient");
assert_eq!(
screening_calls.load(std::sync::atomic::Ordering::Relaxed),
0,
"EFS startup should not call eval_cost just to screen seeds"
);
}
#[test]
fn run_efs_skips_invalid_leading_seed_without_spending_budget() {
let generated = crate::seeding::generate_rho_candidates(
15,
None,
&crate::seeding::SeedConfig::default(),
);
let valid_seed = generated
.first()
.expect("seed generator should yield at least one candidate")
.clone();
let invalid_seed = Array1::from_elem(15, 9.0);
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 1;
let problem = OuterProblem::new(15)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_seed_config(seed_config)
.with_initial_rho(invalid_seed)
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), _: &Array1<f64>| Ok(0.0),
|_: &mut (), theta: &Array1<f64>| Ok(OuterEval::infeasible(theta.len())),
None::<fn(&mut ())>,
{
let valid_seed = valid_seed.clone();
Some(move |_: &mut (), theta: &Array1<f64>| {
if theta == &valid_seed {
Ok(EfsEval {
cost: 0.0,
steps: vec![0.0; theta.len()],
beta: None,
psi_gradient: None,
psi_indices: None,
})
} else {
Err(EstimationError::RemlOptimizationFailed(
"invalid EFS seed".to_string(),
))
}
})
},
);
let result = problem
.run(&mut obj, "efs generated seed should remain reachable")
.expect("invalid startup seeds should not consume the only EFS seed slot");
assert_eq!(result.rho, valid_seed);
assert_eq!(result.plan_used.solver, Solver::Efs);
}
#[test]
fn run_efs_runtime_fallback_marker_degrades_to_bfgs_immediately() {
let mut seed_config = crate::seeding::SeedConfig::default();
seed_config.seed_budget = 2;
let efs_calls = Arc::new(AtomicUsize::new(0));
let problem = OuterProblem::new(12)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_seed_config(seed_config)
.with_initial_rho(Array1::zeros(12))
.with_max_iter(5);
let mut obj = problem.build_objective(
(),
|_: &mut (), theta: &Array1<f64>| Ok(0.5 * theta.dot(theta)),
|_: &mut (), theta: &Array1<f64>| {
Ok(OuterEval {
cost: 0.5 * theta.dot(theta),
gradient: theta.clone(),
hessian: HessianResult::Unavailable,
})
},
None::<fn(&mut ())>,
{
let efs_calls = Arc::clone(&efs_calls);
Some(move |_: &mut (), _: &Array1<f64>| {
efs_calls.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Err(EstimationError::RemlOptimizationFailed(format!(
"{} synthetic runtime escape hatch",
EFS_FIRST_ORDER_FALLBACK_MARKER,
)))
})
},
);
let result = problem
.run(&mut obj, "efs runtime fallback marker")
.expect("runtime EFS escape hatch should degrade to BFGS");
assert_eq!(result.plan_used.solver, Solver::Bfgs);
assert_eq!(
efs_calls.load(std::sync::atomic::Ordering::Relaxed),
1,
"runtime fallback marker should abort the EFS attempt immediately"
);
}
#[test]
fn run_rejects_invalid_theta_layout() {
let problem = OuterProblem::new(1)
.with_gradient(Derivative::Analytic)
.with_hessian(Derivative::Unavailable)
.with_psi_dim(2)
.with_initial_rho(Array1::zeros(1))
.with_max_iter(1);
let mut obj = problem.build_objective(
(),
|_: &mut (), _: &Array1<f64>| Ok(0.0),
|_: &mut (), _: &Array1<f64>| {
Ok(OuterEval {
cost: 0.0,
gradient: Array1::zeros(1),
hessian: HessianResult::Unavailable,
})
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let err = problem
.run(&mut obj, "test invalid layout")
.expect_err("invalid theta layout should fail cleanly");
assert!(
err.to_string().contains("invalid outer theta layout"),
"unexpected error: {err}"
);
}
#[test]
fn effective_seed_budget_caps_expensive_solver_retries() {
assert_eq!(
effective_seed_budget(
4,
Solver::Efs,
crate::seeding::SeedRiskProfile::GeneralizedLinear,
false,
),
1
);
assert_eq!(
effective_seed_budget(
4,
Solver::HybridEfs,
crate::seeding::SeedRiskProfile::Survival,
false,
),
1
);
assert_eq!(
effective_seed_budget(
3,
Solver::Arc,
crate::seeding::SeedRiskProfile::GeneralizedLinear,
true,
),
1
);
assert_eq!(
effective_seed_budget(
3,
Solver::Arc,
crate::seeding::SeedRiskProfile::Survival,
false,
),
1
);
assert_eq!(
effective_seed_budget(
3,
Solver::Bfgs,
crate::seeding::SeedRiskProfile::Survival,
false,
),
3
);
}
fn aux_cap_unavailable(n_params: usize) -> OuterCapability {
OuterCapability {
gradient: Derivative::Unavailable,
hessian: Derivative::Unavailable,
n_params,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
}
}
#[test]
fn plan_with_class_primary_is_identical_to_plan_for_unavailable_grad() {
let cap = aux_cap_unavailable(3);
assert_eq!(plan_with_class(&cap, SolverClass::Primary), plan(&cap));
}
#[test]
fn plan_with_class_aux_unavailable_routes_to_compass_search() {
let cap = aux_cap_unavailable(3);
let p = plan_with_class(&cap, SolverClass::AuxiliaryGradientFree);
assert_eq!(p.solver, Solver::CompassSearch);
}
#[test]
fn plan_with_class_aux_analytic_grad_defers_to_primary_plan() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 3,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan_with_class(&cap, SolverClass::AuxiliaryGradientFree);
assert_eq!(p.solver, Solver::Arc);
}
#[test]
fn plan_with_class_aux_efs_eligible_defers_to_primary() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Unavailable,
n_params: 12,
psi_dim: 0,
fixed_point_available: true,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let p = plan_with_class(&cap, SolverClass::AuxiliaryGradientFree);
assert_eq!(p.solver, Solver::Efs);
}
#[test]
fn automatic_fallback_never_includes_compass_search() {
let cap = OuterCapability {
gradient: Derivative::Analytic,
hessian: Derivative::Analytic,
n_params: 5,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: false,
};
let attempts = automatic_fallback_attempts(&cap);
for attempt_cap in &attempts {
let p = plan_with_class(attempt_cap, SolverClass::Primary);
assert_ne!(p.solver, Solver::CompassSearch);
}
}
#[test]
fn compass_search_budget_accounts_for_single_seed() {
let b = effective_seed_budget(
8,
Solver::CompassSearch,
crate::seeding::SeedRiskProfile::Survival,
false,
);
assert_eq!(b, 1);
}
#[test]
fn run_aux_compass_projects_seed_before_seed_cost() {
let seen = Arc::new(Mutex::new(Vec::new()));
let problem = OuterProblem::new(1)
.with_solver_class(SolverClass::AuxiliaryGradientFree)
.with_bounds(array![0.0], array![1.0])
.with_initial_rho(array![2.0])
.with_max_iter(64);
let mut obj = problem.build_objective(
(),
{
let seen = Arc::clone(&seen);
move |_: &mut (), theta: &Array1<f64>| {
seen.lock().unwrap().push(theta.clone());
Ok((theta[0] - 2.0).powi(2))
}
},
|_: &mut (), _: &Array1<f64>| {
Err(EstimationError::InvalidInput(
"aux direct-search test should not call eval".to_string(),
))
},
None::<fn(&mut ())>,
None::<fn(&mut (), &Array1<f64>) -> Result<EfsEval, EstimationError>>,
);
let result = problem
.run(&mut obj, "aux direct-search seed projection")
.expect("aux direct-search should evaluate the projected seed");
assert_eq!(result.plan_used.solver, Solver::CompassSearch);
assert_eq!(result.rho, array![1.0]);
assert_eq!(result.final_value, 1.0);
assert_eq!(
seen.lock().unwrap().first().cloned(),
Some(array![1.0]),
"aux direct-search must project the seed before evaluating its cost",
);
}
}