use ndarray::{Array1, ArrayView1};
use crate::solver::arrow_schur::{
ArrowFactorCache, ArrowSchurError, ArrowSchurSystem, ArrowSolveOptions, ArrowSolverMode,
arrow_quadratic_model_reduction, solve_arrow_newton_step_with_options,
};
use crate::terms::latent_coord::LatentCoordValues;
#[derive(Debug, Clone)]
pub struct LatentInnerOptions {
pub max_iterations: usize,
pub convergence_tolerance: f64,
pub initial_ridge_t: f64,
pub initial_ridge_beta: f64,
pub lm_grow: f64,
pub lm_shrink: f64,
pub max_ridge: f64,
pub solver_mode: Option<ArrowSolverMode>,
pub trust_region_radius: f64,
}
impl Default for LatentInnerOptions {
fn default() -> Self {
Self {
max_iterations: 50,
convergence_tolerance: 1e-8,
initial_ridge_t: 0.0,
initial_ridge_beta: 0.0,
lm_grow: 10.0,
lm_shrink: 0.5,
max_ridge: 1e12,
solver_mode: None,
trust_region_radius: 1.0,
}
}
}
#[derive(Debug, Clone)]
pub struct LatentInnerOutcome {
pub beta: Array1<f64>,
pub factor_cache: Option<ArrowFactorCache>,
pub iterations: usize,
pub converged: bool,
pub final_ridge_t: f64,
pub final_ridge_beta: f64,
}
pub trait ArrowSystemAssembler {
fn assemble(
&mut self,
beta: ArrayView1<'_, f64>,
latent: &LatentCoordValues,
) -> Result<ArrowSchurSystem, String>;
fn objective(
&mut self,
beta: ArrayView1<'_, f64>,
latent: &LatentCoordValues,
) -> Result<f64, String>;
}
pub struct LatentInnerSolver<'a, A: ArrowSystemAssembler> {
pub beta: Array1<f64>,
pub latent: &'a mut LatentCoordValues,
pub assembler: A,
pub options: LatentInnerOptions,
}
impl<'a, A: ArrowSystemAssembler> LatentInnerSolver<'a, A> {
#[must_use]
pub fn new(
beta: Array1<f64>,
latent: &'a mut LatentCoordValues,
assembler: A,
options: LatentInnerOptions,
) -> Self {
Self {
beta,
latent,
assembler,
options,
}
}
pub fn solve(&mut self) -> Result<LatentInnerOutcome, String> {
let opts = self.options.clone();
assert!(opts.lm_grow > 1.0, "LM ridge grow factor must exceed 1");
assert!(
opts.lm_shrink > 0.0 && opts.lm_shrink < 1.0,
"LM ridge shrink factor must lie in (0, 1)"
);
let mut ridge_t = opts.initial_ridge_t.max(0.0);
let mut ridge_beta = opts.initial_ridge_beta.max(0.0);
let mut last_cache: Option<ArrowFactorCache> = None;
let mut converged = false;
let mut iter = 0_usize;
let mut current_objective = self
.assembler
.objective(self.beta.view(), self.latent)
.map_err(|e| format!("LatentInnerSolver: objective failed at start: {e}"))?;
if !current_objective.is_finite() {
return Err("LatentInnerSolver: non-finite objective at start".to_string());
}
while iter < opts.max_iterations {
let mut system = self
.assembler
.assemble(self.beta.view(), self.latent)
.map_err(|e| format!("LatentInnerSolver: assembler failed at iter {iter}: {e}"))?;
system.apply_riemannian_latent_geometry(self.latent);
let g_norm_sq = system_gradient_norm_sq(&system);
let scale = 1.0 + iterate_norm(self.beta.view(), self.latent.as_flat().view());
let rel = (g_norm_sq.sqrt()) / scale;
if rel < opts.convergence_tolerance {
converged = true;
let solve_options = latent_arrow_solve_options(
&system,
&opts,
!self.latent.effective_is_all_euclidean(),
);
if let Ok((_, _, cache)) = solve_arrow_newton_step_with_options(
&system,
ridge_t.max(1e-12),
ridge_beta.max(1e-12),
&solve_options,
) {
last_cache = Some(cache);
}
break;
}
let solve_options = latent_arrow_solve_options(
&system,
&opts,
!self.latent.effective_is_all_euclidean(),
);
let step_result =
solve_arrow_newton_step_with_options(&system, ridge_t, ridge_beta, &solve_options);
match step_result {
Ok((delta_t, delta_beta, cache)) => {
let delta_t = limit_delta_to_riemannian_trust_region(
delta_t,
self.latent,
solve_options.riemannian_trust_region,
solve_options.trust_region.radius,
);
let predicted_reduction = arrow_quadratic_model_reduction(
&system,
delta_t.view(),
delta_beta.view(),
ridge_t,
ridge_beta,
)
.map_err(|e| {
format!("LatentInnerSolver: predicted reduction failed at iter {iter}: {e}")
})?;
let beta_before = self.beta.clone();
let t_before = self.latent.as_flat().clone();
for (b, db) in self.beta.iter_mut().zip(delta_beta.iter()) {
*b += *db;
}
self.latent.retract_flat_delta(delta_t.view());
let trial_objective = self
.assembler
.objective(self.beta.view(), self.latent)
.map_err(|e| {
format!("LatentInnerSolver: objective failed at trial iter {iter}: {e}")
})?;
let objective_scale = current_objective.abs().max(1.0);
let noise_floor = objective_scale * 1e-14;
let actual_reduction = current_objective - trial_objective;
let rho = if predicted_reduction > noise_floor {
actual_reduction / predicted_reduction
} else if actual_reduction >= -noise_floor {
1.0
} else {
-1.0
};
if rho > 0.0 && trial_objective.is_finite() {
current_objective = trial_objective;
ridge_t = (ridge_t * opts.lm_shrink).max(0.0);
ridge_beta = (ridge_beta * opts.lm_shrink).max(0.0);
last_cache = Some(cache);
iter += 1;
} else {
self.beta = beta_before;
self.latent.set_flat(t_before.view());
ridge_t = if ridge_t == 0.0 {
1e-6
} else {
ridge_t * opts.lm_grow
};
ridge_beta = if ridge_beta == 0.0 {
1e-6
} else {
ridge_beta * opts.lm_grow
};
if ridge_t > opts.max_ridge || ridge_beta > opts.max_ridge {
return Err(format!(
"LatentInnerSolver: LM rejected nonlinear step until ridge \
exceeded max ({}) at iter {} \
(ridge_t={ridge_t:.3e}, ridge_beta={ridge_beta:.3e}, \
rho={rho:.3e}, predicted_reduction={predicted_reduction:.3e}, \
actual_reduction={actual_reduction:.3e})",
opts.max_ridge, iter,
));
}
}
}
Err(err @ ArrowSchurError::PerRowFactorFailed { .. })
| Err(err @ ArrowSchurError::PerRowFactorIllConditioned { .. })
| Err(err @ ArrowSchurError::SchurFactorFailed { .. })
| Err(err @ ArrowSchurError::PcgFailed { .. })
| Err(err @ ArrowSchurError::AdaptiveCorrectionFailed { .. }) => {
ridge_t = if ridge_t == 0.0 {
1e-6
} else {
ridge_t * opts.lm_grow
};
ridge_beta = if ridge_beta == 0.0 {
1e-6
} else {
ridge_beta * opts.lm_grow
};
if ridge_t > opts.max_ridge || ridge_beta > opts.max_ridge {
return Err(format!(
"LatentInnerSolver: cold-restart condition — LM ridge \
exceeded max ({}) at iter {} \
(ridge_t={ridge_t:.3e}, ridge_beta={ridge_beta:.3e}); \
root-cause arrow-Schur error: {err}",
opts.max_ridge, iter,
));
}
}
}
}
Ok(LatentInnerOutcome {
beta: self.beta.clone(),
factor_cache: last_cache,
iterations: iter,
converged,
final_ridge_t: ridge_t,
final_ridge_beta: ridge_beta,
})
}
}
fn latent_arrow_solve_options(
system: &ArrowSchurSystem,
opts: &LatentInnerOptions,
riemannian_trust_region: bool,
) -> ArrowSolveOptions {
let mut solve_options = ArrowSolveOptions::automatic(system.k);
if let Some(mode) = opts.solver_mode {
solve_options.mode = mode;
}
solve_options.trust_region.radius = opts.trust_region_radius;
solve_options.riemannian_trust_region = riemannian_trust_region;
solve_options
}
fn limit_delta_to_riemannian_trust_region(
mut delta_t: Array1<f64>,
latent: &LatentCoordValues,
enabled: bool,
radius: f64,
) -> Array1<f64> {
if !enabled || !radius.is_finite() || radius <= 0.0 {
return delta_t;
}
let row_weights = latent.effective_metric_weights();
assert_eq!(row_weights.len(), latent.latent_dim());
let mut norm_sq = 0.0_f64;
for n in 0..latent.n_obs() {
let start = n * latent.latent_dim();
for a in 0..latent.latent_dim() {
let value = delta_t[start + a];
norm_sq += row_weights[a] * value * value;
}
}
let norm = norm_sq.sqrt();
if norm <= radius || norm == 0.0 {
return delta_t;
}
let shrink = radius / norm;
for value in delta_t.iter_mut() {
*value *= shrink;
}
delta_t
}
fn system_gradient_norm_sq(sys: &ArrowSchurSystem) -> f64 {
let mut acc = 0.0_f64;
for j in 0..sys.k {
acc += sys.gb[j] * sys.gb[j];
}
for row in &sys.rows {
for c in 0..sys.d {
acc += row.gt[c] * row.gt[c];
}
}
acc
}
fn iterate_norm(beta: ArrayView1<'_, f64>, t: ArrayView1<'_, f64>) -> f64 {
let mut acc = 0.0_f64;
for v in beta.iter() {
acc += v * v;
}
for v in t.iter() {
acc += v * v;
}
acc.sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::terms::latent_coord::{LatentCoordValues, LatentIdMode};
use ndarray::array;
struct ZeroAssembler {
n: usize,
d: usize,
k: usize,
}
impl ArrowSystemAssembler for ZeroAssembler {
fn assemble(
&mut self,
arr: ArrayView1<'_, f64>,
latent_coords: &LatentCoordValues,
) -> Result<ArrowSchurSystem, String> {
assert!(arr.iter().all(|v| !v.is_nan()));
assert!(std::mem::size_of_val(latent_coords) > 0);
let mut sys = ArrowSchurSystem::new(self.n, self.d, self.k);
for j in 0..self.k {
sys.hbb[[j, j]] = 1.0;
}
for row in sys.rows.iter_mut() {
for c in 0..self.d {
row.htt[[c, c]] = 1.0;
}
}
Ok(sys)
}
fn objective(
&mut self,
arr: ArrayView1<'_, f64>,
latent_coords: &LatentCoordValues,
) -> Result<f64, String> {
assert!(arr.iter().all(|v| !v.is_nan()));
assert!(std::mem::size_of_val(latent_coords) > 0);
Ok(0.0)
}
}
#[test]
fn zero_assembler_converges_immediately() {
let m = array![[0.0_f64, 0.0], [0.0, 0.0]];
let mut latent = LatentCoordValues::from_matrix(m.view(), LatentIdMode::None);
let beta = Array1::<f64>::zeros(3);
let assembler = ZeroAssembler { n: 2, d: 2, k: 3 };
let mut solver =
LatentInnerSolver::new(beta, &mut latent, assembler, LatentInnerOptions::default());
let outcome = solver.solve().expect("zero assembler always succeeds");
assert!(outcome.converged);
assert_eq!(outcome.iterations, 0);
}
}