use crate::utils::Float;
use std::fmt;
#[cfg(target_arch = "wasm32")]
use web_time::Instant;
#[cfg(not(target_arch = "wasm32"))]
use std::time::Instant;
struct G<T>(T);
impl fmt::Display for G<f64> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let v = self.0;
if v == 0.0 {
return f.write_str("0");
}
let exp = v.abs().log10().floor() as i32;
if exp < -4 || exp >= 6 {
let s = format!("{:.5e}", v);
if let Some(pos) = s.find('e') {
let (mantissa, exponent) = s.split_at(pos);
let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.');
write!(f, "{}{}", trimmed, exponent)
} else {
f.write_str(&s)
}
} else {
let decimals = (5 - exp).max(0) as usize;
let s = format!("{:.*}", decimals, v);
if s.contains('.') {
let trimmed = s.trim_end_matches('0').trim_end_matches('.');
f.write_str(trimmed)
} else {
f.write_str(&s)
}
}
}
}
impl fmt::Display for G<f32> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", G(self.0 as f64))
}
}
pub const FIXED_SIZE_THRESHOLD: usize = 9;
pub struct LmConfig<T> {
pub abs_precision: T,
pub rel_precision: T,
pub max_iters: usize,
pub min_iters: usize,
pub patience: usize,
pub initial_lambda: T,
pub cost_threshold: T,
pub verbose: bool,
}
impl<T: Float> Default for LmConfig<T> {
fn default() -> Self {
LmConfig {
abs_precision: T::from(1e-6).unwrap(),
rel_precision: T::from(1e-4).unwrap(),
max_iters: 100,
min_iters: 5,
patience: 3,
initial_lambda: T::from(1e-4).unwrap(),
cost_threshold: T::zero(),
verbose: false,
}
}
}
#[derive(Debug)]
pub struct BandError {
pub row: usize,
pub col: usize,
pub kd: usize,
}
impl std::fmt::Display for BandError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "band overflow: element ({}, {}) exceeds bandwidth kd={}", self.row, self.col, self.kd)
}
}
pub trait LmProblem<T> {
fn calc_cost(&mut self, params: &[T]) -> T;
fn calc_grad_hessian_dense(&mut self, params: &[T], grad: &mut [T], hessian: &mut [T]);
fn calc_grad_hessian_band(&mut self, params: &[T], grad: &mut [T], band: &mut [T], kd: usize) -> Result<(), BandError>;
fn calc_grad_hessian_sparse(&mut self, params: &[T], grad: &mut [T], coo: &mut CooMatrix<T>);
fn calc_grad_hessian_sparse_direct(&mut self, params: &[T], grad: &mut [T], csc: &mut CscMatrix<T>);
fn calc_grad_hessian_sparse_indexed(&mut self, params: &[T], grad: &mut [T], vals: &mut [T], positions: &[usize]);
fn advance(&mut self, _params: &mut [T]) {}
}
pub struct FnProblem<F1, F2> {
pub cost: F1,
pub grad_hessian: F2,
}
impl<T, F1, F2> LmProblem<T> for FnProblem<F1, F2>
where
F1: FnMut(&[T]) -> T,
F2: FnMut(&[T], &mut [T], &mut [T]),
{
fn calc_cost(&mut self, params: &[T]) -> T {
(self.cost)(params)
}
fn calc_grad_hessian_dense(&mut self, params: &[T], grad: &mut [T], hessian: &mut [T]) {
(self.grad_hessian)(params, grad, hessian)
}
fn calc_grad_hessian_band(&mut self, _params: &[T], _grad: &mut [T], _band: &mut [T], _kd: usize) -> Result<(), BandError> {
unimplemented!("FnProblem does not support band assembly")
}
fn calc_grad_hessian_sparse(&mut self, _params: &[T], _grad: &mut [T], _coo: &mut CooMatrix<T>) {
unimplemented!("FnProblem does not support sparse assembly")
}
fn calc_grad_hessian_sparse_direct(&mut self, _params: &[T], _grad: &mut [T], _csc: &mut CscMatrix<T>) {
unimplemented!("FnProblem does not support sparse direct assembly")
}
fn calc_grad_hessian_sparse_indexed(&mut self, _params: &[T], _grad: &mut [T], _vals: &mut [T], _positions: &[usize]) {
unimplemented!("FnProblem does not support sparse indexed assembly")
}
}
pub struct LmResult<T> {
pub x: Vec<T>,
pub start_cost: T,
pub end_cost: T,
pub iterations: usize,
}
fn cholesky_solve<const N: usize>(a: &[f64], b: &[f64], x: &mut [f64]) -> bool {
let mat = nalgebra::SMatrix::<f64, N, N>::from_row_slice(a);
let rhs = nalgebra::SVector::<f64, N>::from_column_slice(b);
match nalgebra::linalg::Cholesky::new(mat) {
Some(chol) => {
let sol = chol.solve(&rhs);
x.copy_from_slice(sol.as_slice());
true
}
None => false,
}
}
fn cholesky_solve_f32<const N: usize>(a: &[f32], b: &[f32], x: &mut [f32]) -> bool {
let mat = nalgebra::SMatrix::<f32, N, N>::from_row_slice(a);
let rhs = nalgebra::SVector::<f32, N>::from_column_slice(b);
match nalgebra::linalg::Cholesky::new(mat) {
Some(chol) => {
let sol = chol.solve(&rhs);
x.copy_from_slice(sol.as_slice());
true
}
None => false,
}
}
macro_rules! dispatch_cholesky {
($solve_fn:ident, $n:expr, $a:expr, $b:expr, $x:expr) => {
match $n {
1 => $solve_fn::<1>($a, $b, $x),
2 => $solve_fn::<2>($a, $b, $x),
3 => $solve_fn::<3>($a, $b, $x),
4 => $solve_fn::<4>($a, $b, $x),
5 => $solve_fn::<5>($a, $b, $x),
6 => $solve_fn::<6>($a, $b, $x),
7 => $solve_fn::<7>($a, $b, $x),
8 => $solve_fn::<8>($a, $b, $x),
9 => $solve_fn::<9>($a, $b, $x),
_ => unreachable!(),
}
};
}
fn cholesky_solve_dynamic(n: usize, a: &[f64], b: &[f64], x: &mut [f64]) -> bool {
let mat = nalgebra::DMatrix::from_row_slice(n, n, a);
let rhs = nalgebra::DVector::from_column_slice(b);
match nalgebra::linalg::Cholesky::new(mat) {
Some(chol) => {
let sol = chol.solve(&rhs);
x.copy_from_slice(sol.as_slice());
true
}
None => false,
}
}
fn cholesky_solve_dynamic_f32(n: usize, a: &[f32], b: &[f32], x: &mut [f32]) -> bool {
let mat = nalgebra::DMatrix::from_row_slice(n, n, a);
let rhs = nalgebra::DVector::from_column_slice(b);
match nalgebra::linalg::Cholesky::new(mat) {
Some(chol) => {
let sol = chol.solve(&rhs);
x.copy_from_slice(sol.as_slice());
true
}
None => false,
}
}
pub fn solve_spd(n: usize, a: &[f64], b: &[f64], x: &mut [f64]) -> bool {
if n <= 9 {
dispatch_cholesky!(cholesky_solve, n, a, b, x)
} else {
cholesky_solve_dynamic(n, a, b, x)
}
}
pub fn solve_spd_f32(n: usize, a: &[f32], b: &[f32], x: &mut [f32]) -> bool {
if n <= 9 {
dispatch_cholesky!(cholesky_solve_f32, n, a, b, x)
} else {
cholesky_solve_dynamic_f32(n, a, b, x)
}
}
pub trait LmSolver<T: Float> {
type Matrix;
fn new_matrix(&self, n: usize) -> Self::Matrix;
fn compute(&self, problem: &mut dyn LmProblem<T>, params: &[T], grad: &mut [T], matrix: &mut Self::Matrix);
fn extract_diagonal(&self, matrix: &Self::Matrix, diagonal: &mut [T]);
fn solve_damped(&mut self, n: usize, matrix: &mut Self::Matrix, diagonal: &[T], lambda: T, grad: &[T], delta: &mut [T]) -> bool;
}
pub struct Dense;
impl LmSolver<f64> for Dense {
type Matrix = Vec<f64>;
fn new_matrix(&self, n: usize) -> Vec<f64> { vec![0.0; n * n] }
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], hessian: &mut Vec<f64>) {
problem.calc_grad_hessian_dense(params, grad, hessian);
}
fn extract_diagonal(&self, matrix: &Vec<f64>, diagonal: &mut [f64]) {
let n = diagonal.len();
for i in 0..n { diagonal[i] = matrix[i * n + i]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix[i * n + i] = (1.0 + lambda) * diagonal[i]; }
solve_spd(n, matrix, grad, delta)
}
}
impl LmSolver<f32> for Dense {
type Matrix = Vec<f32>;
fn new_matrix(&self, n: usize) -> Vec<f32> { vec![0.0; n * n] }
fn compute(&self, problem: &mut dyn LmProblem<f32>, params: &[f32], grad: &mut [f32], hessian: &mut Vec<f32>) {
problem.calc_grad_hessian_dense(params, grad, hessian);
}
fn extract_diagonal(&self, matrix: &Vec<f32>, diagonal: &mut [f32]) {
let n = diagonal.len();
for i in 0..n { diagonal[i] = matrix[i * n + i]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f32>, diagonal: &[f32], lambda: f32, grad: &[f32], delta: &mut [f32]) -> bool {
for i in 0..n { matrix[i * n + i] = (1.0 + lambda) * diagonal[i]; }
solve_spd_f32(n, matrix, grad, delta)
}
}
pub struct Band {
pub kd: usize,
}
impl LmSolver<f64> for Band {
type Matrix = Vec<f64>;
fn new_matrix(&self, n: usize) -> Vec<f64> { vec![0.0; (self.kd + 1) * n] }
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], band: &mut Vec<f64>) {
problem.calc_grad_hessian_band(params, grad, band, self.kd)
.expect("band assembly failed: element outside bandwidth");
}
fn extract_diagonal(&self, matrix: &Vec<f64>, diagonal: &mut [f64]) {
let ldab = self.kd + 1;
for i in 0..diagonal.len() { diagonal[i] = matrix[self.kd + i * ldab]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
let ldab = self.kd + 1;
let mut buf = matrix.clone();
for i in 0..n { buf[self.kd + i * ldab] = (1.0 + lambda) * diagonal[i]; }
delta.copy_from_slice(grad);
solve_spd_band(n, self.kd, &mut buf, delta)
}
}
impl LmSolver<f32> for Band {
type Matrix = Vec<f32>;
fn new_matrix(&self, n: usize) -> Vec<f32> { vec![0.0; (self.kd + 1) * n] }
fn compute(&self, problem: &mut dyn LmProblem<f32>, params: &[f32], grad: &mut [f32], band: &mut Vec<f32>) {
problem.calc_grad_hessian_band(params, grad, band, self.kd)
.expect("band assembly failed: element outside bandwidth");
}
fn extract_diagonal(&self, matrix: &Vec<f32>, diagonal: &mut [f32]) {
let ldab = self.kd + 1;
for i in 0..diagonal.len() { diagonal[i] = matrix[self.kd + i * ldab]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f32>, diagonal: &[f32], lambda: f32, grad: &[f32], delta: &mut [f32]) -> bool {
let ldab = self.kd + 1;
let mut buf = matrix.clone();
for i in 0..n { buf[self.kd + i * ldab] = (1.0 + lambda) * diagonal[i]; }
delta.copy_from_slice(grad);
solve_spd_band_f32(n, self.kd, &mut buf, delta)
}
}
#[cfg(feature = "lapack")]
pub struct BandLapack {
pub kd: usize,
}
#[cfg(feature = "lapack")]
impl LmSolver<f64> for BandLapack {
type Matrix = Vec<f64>;
fn new_matrix(&self, n: usize) -> Vec<f64> { vec![0.0; (self.kd + 1) * n] }
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], band: &mut Vec<f64>) {
problem.calc_grad_hessian_band(params, grad, band, self.kd)
.expect("band assembly failed: element outside bandwidth");
}
fn extract_diagonal(&self, matrix: &Vec<f64>, diagonal: &mut [f64]) {
let ldab = self.kd + 1;
for i in 0..diagonal.len() { diagonal[i] = matrix[self.kd + i * ldab]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
let ldab = self.kd + 1;
let mut buf = matrix.clone();
for i in 0..n { buf[self.kd + i * ldab] = (1.0 + lambda) * diagonal[i]; }
delta.copy_from_slice(grad);
solve_spd_band_lapack(n, self.kd, &mut buf, delta)
}
}
#[cfg(feature = "lapack")]
impl LmSolver<f32> for BandLapack {
type Matrix = Vec<f32>;
fn new_matrix(&self, n: usize) -> Vec<f32> { vec![0.0; (self.kd + 1) * n] }
fn compute(&self, problem: &mut dyn LmProblem<f32>, params: &[f32], grad: &mut [f32], band: &mut Vec<f32>) {
problem.calc_grad_hessian_band(params, grad, band, self.kd)
.expect("band assembly failed: element outside bandwidth");
}
fn extract_diagonal(&self, matrix: &Vec<f32>, diagonal: &mut [f32]) {
let ldab = self.kd + 1;
for i in 0..diagonal.len() { diagonal[i] = matrix[self.kd + i * ldab]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut Vec<f32>, diagonal: &[f32], lambda: f32, grad: &[f32], delta: &mut [f32]) -> bool {
let ldab = self.kd + 1;
let mut buf = matrix.clone();
for i in 0..n { buf[self.kd + i * ldab] = (1.0 + lambda) * diagonal[i]; }
delta.copy_from_slice(grad);
solve_spd_band_lapack_f32(n, self.kd, &mut buf, delta)
}
}
pub fn lm_solve<T: Float, S: LmSolver<T>>(
x0: &[T],
solver: &mut S,
problem: &mut impl LmProblem<T>,
config: &LmConfig<T>,
) -> LmResult<T> {
let n = x0.len();
if n == 0 {
return LmResult {
x: x0.to_vec(), start_cost: T::zero(), end_cost: T::zero(), iterations: 0,
};
}
let mut cur_x = x0.to_vec();
let mut try_x = vec![T::zero(); n];
let mut grad = vec![T::zero(); n];
let mut matrix = solver.new_matrix(n);
let mut diagonal = vec![T::zero(); n];
let mut delta = vec![T::zero(); n];
let lambda_floor = T::from(1e-12).unwrap();
let lambda_ceiling = T::from(1e10).unwrap();
let lambda_down = T::from(0.2).unwrap();
let lambda_up = T::from(10.0).unwrap();
let eight = T::from(8.0).unwrap();
let mut lambda = config.initial_lambda;
let start_cost = problem.calc_cost(&cur_x);
let mut end_cost = start_cost;
if start_cost == T::zero() {
return LmResult { x: cur_x, start_cost, end_cost, iterations: 0 };
}
let mut small_count = 0usize;
let mut done = false;
let mut iter = 0usize;
let mut timer = Instant::now();
while iter < config.max_iters && !done {
solver.compute(problem, &cur_x, &mut grad, &mut matrix);
solver.extract_diagonal(&matrix, &mut diagonal);
let mut inner = 0usize;
const INNER_LOOPS: usize = 20;
while inner < INNER_LOOPS && iter < config.max_iters {
iter += 1;
if !solver.solve_damped(n, &mut matrix, &diagonal, lambda, &grad, &mut delta) {
lambda = lambda * lambda_up;
inner += 1;
continue;
}
for i in 0..n {
try_x[i] = cur_x[i] - delta[i];
}
let new_cost = problem.calc_cost(&try_x);
if config.verbose {
let step_us = timer.elapsed().as_micros();
timer = Instant::now();
eprintln!("{}/{}: {}->{} / {}, lambda={} (step={})",
iter, inner,
G(end_cost.to_f64().unwrap()), G(new_cost.to_f64().unwrap()),
G((end_cost - new_cost).to_f64().unwrap()), G(lambda.to_f64().unwrap()),
step_us);
}
let at_precision = new_cost.is_finite()
&& (end_cost - new_cost).abs() < eight * T::epsilon() * end_cost.abs();
if new_cost.is_finite() && new_cost < end_cost {
if lambda > lambda_floor {
lambda = lambda * lambda_down;
}
lambda = lambda.max(T::epsilon());
cur_x.copy_from_slice(&try_x);
problem.advance(&mut cur_x);
if iter >= config.min_iters && new_cost <= config.cost_threshold {
end_cost = new_cost;
done = true;
break;
}
let abs_improve = end_cost - new_cost;
let rel_improve = if end_cost > T::epsilon() {
abs_improve / end_cost
} else {
T::zero()
};
let is_small = abs_improve < config.abs_precision
|| rel_improve < config.rel_precision;
if iter >= config.min_iters && is_small {
small_count += 1;
if small_count >= config.patience {
done = true;
}
} else {
small_count = 0;
}
end_cost = new_cost;
break;
}
if at_precision {
done = true;
break;
}
lambda = lambda * lambda_up;
if lambda > lambda_ceiling {
inner = INNER_LOOPS;
break;
}
inner += 1;
}
if inner >= INNER_LOOPS && !done {
break;
}
}
LmResult {
x: cur_x,
start_cost,
end_cost,
iterations: iter,
}
}
pub fn solve(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut Dense, problem, config)
}
pub fn solve_f32(x0: &[f32], problem: &mut impl LmProblem<f32>, config: &LmConfig<f32>) -> LmResult<f32> {
lm_solve(x0, &mut Dense, problem, config)
}
pub fn solve_band(x0: &[f64], kd: usize, problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut Band { kd }, problem, config)
}
pub fn solve_band_f32(x0: &[f32], kd: usize, problem: &mut impl LmProblem<f32>, config: &LmConfig<f32>) -> LmResult<f32> {
lm_solve(x0, &mut Band { kd }, problem, config)
}
#[cfg(feature = "lapack")]
pub fn solve_band_lapack(x0: &[f64], kd: usize, problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut BandLapack { kd }, problem, config)
}
#[cfg(feature = "lapack")]
pub fn solve_band_lapack_f32(x0: &[f32], kd: usize, problem: &mut impl LmProblem<f32>, config: &LmConfig<f32>) -> LmResult<f32> {
lm_solve(x0, &mut BandLapack { kd }, problem, config)
}
pub struct Sparse;
impl Sparse {
pub fn new() -> Self {
Sparse
}
}
pub struct SparseMatrix<T> {
pub csc: CscMatrix<T>,
}
impl LmSolver<f64> for Sparse {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
matrix.csc = coo.to_csc();
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() {
diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]];
}
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
let mut dense = vec![0.0f64; n * n];
for j in 0..n {
for k in matrix.csc.col_ptr[j]..matrix.csc.col_ptr[j + 1] {
let i = matrix.csc.row_idx[k] as usize;
let v = matrix.csc.vals[k];
dense[i * n + j] = v;
if i != j { dense[j * n + i] = v; }
}
}
solve_spd(n, &dense, grad, delta)
}
}
pub fn solve_sparse(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut Sparse::new(), problem, config)
}
pub struct SparseDirect {
pattern_built: bool,
}
impl SparseDirect {
pub fn new() -> Self {
SparseDirect { pattern_built: false }
}
}
impl LmSolver<f64> for SparseDirect {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
if !self.pattern_built {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
matrix.csc = coo.to_csc();
} else {
problem.calc_grad_hessian_sparse_direct(params, grad, &mut matrix.csc);
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() {
diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]];
}
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
let mut dense = vec![0.0f64; n * n];
for j in 0..n {
for k in matrix.csc.col_ptr[j]..matrix.csc.col_ptr[j + 1] {
let i = matrix.csc.row_idx[k] as usize;
let v = matrix.csc.vals[k];
dense[i * n + j] = v;
if i != j { dense[j * n + i] = v; }
}
}
solve_spd(n, &dense, grad, delta)
}
}
pub fn solve_sparse_direct(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut SparseDirect::new(), problem, config)
}
pub struct SparseFaer {
symbolic: Option<faer::sparse::linalg::cholesky::SymbolicCholesky<usize>>,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
impl SparseFaer {
pub fn new() -> Self {
SparseFaer { symbolic: None, positions: std::cell::RefCell::new(None) }
}
}
impl LmSolver<f64> for SparseFaer {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
let positions = positions.as_ref().unwrap();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions);
} else {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() {
diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]];
}
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
use faer::sparse::linalg::cholesky::*;
let row_idx_usize: Vec<usize> = matrix.csc.row_idx.iter().map(|&r| r as usize).collect();
let symbolic_ref = faer::sparse::SymbolicSparseColMatRef::new_checked(
n, n,
&matrix.csc.col_ptr,
None,
&row_idx_usize,
);
let mat_ref = faer::sparse::SparseColMatRef::new(symbolic_ref, &matrix.csc.vals);
if self.symbolic.is_none() {
let sym = factorize_symbolic_cholesky(
symbolic_ref,
faer::Side::Upper,
SymmetricOrdering::Amd,
CholeskySymbolicParams::default(),
);
match sym {
Ok(s) => self.symbolic = Some(s),
Err(_) => return false,
}
}
let symbolic = self.symbolic.as_ref().unwrap();
let mut l_vals = vec![0.0f64; symbolic.len_val()];
let scratch_size = symbolic.factorize_numeric_llt_scratch::<f64>(
faer::Par::Seq,
faer::Spec::default(),
);
let mut scratch_mem: Vec<std::mem::MaybeUninit<u8>> = vec![std::mem::MaybeUninit::uninit(); scratch_size.unaligned_bytes_required()];
let mut stack = faer::dyn_stack::MemStack::new(&mut scratch_mem);
let llt = symbolic.factorize_numeric_llt(
&mut l_vals,
mat_ref,
faer::Side::Upper,
faer::linalg::cholesky::llt::factor::LltRegularization::default(),
faer::Par::Seq,
&mut stack,
faer::Spec::default(),
);
let llt = match llt {
Ok(l) => l,
Err(_) => return false,
};
delta.copy_from_slice(grad);
let rhs = faer::col::ColMut::from_slice_mut(delta);
let solve_scratch_size = symbolic.solve_in_place_scratch::<f64>(1, faer::Par::Seq);
let mut solve_mem: Vec<std::mem::MaybeUninit<u8>> = vec![std::mem::MaybeUninit::uninit(); solve_scratch_size.unaligned_bytes_required()];
let mut solve_stack = faer::dyn_stack::MemStack::new(&mut solve_mem);
llt.solve_in_place_with_conj(
faer::Conj::No,
rhs.as_mat_mut(),
faer::Par::Seq,
&mut solve_stack,
);
true
}
}
pub fn solve_sparse_faer(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut SparseFaer::new(), problem, config)
}
pub struct SparseFaerF32 {
symbolic: Option<faer::sparse::linalg::cholesky::SymbolicCholesky<usize>>,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
impl SparseFaerF32 {
pub fn new() -> Self {
SparseFaerF32 { symbolic: None, positions: std::cell::RefCell::new(None) }
}
}
impl LmSolver<f32> for SparseFaerF32 {
type Matrix = SparseMatrix<f32>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f32> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f32>, params: &[f32], grad: &mut [f32], matrix: &mut SparseMatrix<f32>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
let positions = positions.as_ref().unwrap();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions);
return;
}
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f32>, diagonal: &mut [f32]) {
for i in 0..diagonal.len() {
diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]];
}
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f32>, diagonal: &[f32], lambda: f32, grad: &[f32], delta: &mut [f32]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
use faer::sparse::linalg::cholesky::*;
let row_idx_usize: Vec<usize> = matrix.csc.row_idx.iter().map(|&r| r as usize).collect();
let symbolic_ref = faer::sparse::SymbolicSparseColMatRef::new_checked(
n, n,
&matrix.csc.col_ptr,
None,
&row_idx_usize,
);
let mat_ref = faer::sparse::SparseColMatRef::new(symbolic_ref, &matrix.csc.vals);
if self.symbolic.is_none() {
let sym = factorize_symbolic_cholesky(
symbolic_ref,
faer::Side::Upper,
SymmetricOrdering::Amd,
CholeskySymbolicParams::default(),
);
match sym {
Ok(s) => self.symbolic = Some(s),
Err(_) => return false,
}
}
let symbolic = self.symbolic.as_ref().unwrap();
let mut l_vals = vec![0.0f32; symbolic.len_val()];
let scratch_size = symbolic.factorize_numeric_llt_scratch::<f32>(
faer::Par::Seq,
faer::Spec::default(),
);
let mut scratch_mem: Vec<std::mem::MaybeUninit<u8>> = vec![std::mem::MaybeUninit::uninit(); scratch_size.unaligned_bytes_required()];
let mut stack = faer::dyn_stack::MemStack::new(&mut scratch_mem);
let llt = symbolic.factorize_numeric_llt(
&mut l_vals,
mat_ref,
faer::Side::Upper,
faer::linalg::cholesky::llt::factor::LltRegularization::default(),
faer::Par::Seq,
&mut stack,
faer::Spec::default(),
);
let llt = match llt {
Ok(l) => l,
Err(_) => return false,
};
delta.copy_from_slice(grad);
let rhs = faer::col::ColMut::from_slice_mut(delta);
let solve_scratch_size = symbolic.solve_in_place_scratch::<f32>(1, faer::Par::Seq);
let mut solve_mem: Vec<std::mem::MaybeUninit<u8>> = vec![std::mem::MaybeUninit::uninit(); solve_scratch_size.unaligned_bytes_required()];
let mut solve_stack = faer::dyn_stack::MemStack::new(&mut solve_mem);
llt.solve_in_place_with_conj(
faer::Conj::No,
rhs.as_mat_mut(),
faer::Par::Seq,
&mut solve_stack,
);
true
}
}
pub fn solve_sparse_faer_f32(x0: &[f32], problem: &mut impl LmProblem<f32>, config: &LmConfig<f32>) -> LmResult<f32> {
lm_solve(x0, &mut SparseFaerF32::new(), problem, config)
}
pub struct SparseSchur {
np: usize,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
impl SparseSchur {
pub fn new(np: usize) -> Self {
SparseSchur { np, positions: std::cell::RefCell::new(None) }
}
}
fn invert_spd_3x3(u: &[f64; 6]) -> Option<[f64; 9]> {
let l00 = u[0].sqrt();
if l00 <= 0.0 { return None; }
let l10 = u[1] / l00;
let l11 = (u[3] - l10 * l10).sqrt();
if l11 <= 0.0 { return None; }
let l20 = u[2] / l00;
let l21 = (u[4] - l20 * l10) / l11;
let l22 = (u[5] - l20 * l20 - l21 * l21).sqrt();
if l22 <= 0.0 { return None; }
let i00 = 1.0 / l00;
let i11 = 1.0 / l11;
let i22 = 1.0 / l22;
let i10 = -l10 * i00 * i11;
let i20 = -(l20 * i00 + l21 * i10) * i22;
let i21 = -l21 * i11 * i22;
Some([
i00 * i00 + i10 * i10 + i20 * i20,
i10 * i11 + i20 * i21,
i20 * i22,
i10 * i11 + i20 * i21,
i11 * i11 + i21 * i21,
i21 * i22,
i20 * i22,
i21 * i22,
i22 * i22,
])
}
impl LmSolver<f64> for SparseSchur {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
let positions = positions.as_ref().unwrap();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions);
} else {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() {
diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]];
}
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
let np = self.np;
let nl = n - np;
if nl == 0 {
let mut dense = vec![0.0; np * np];
let csc = &matrix.csc;
for j in 0..np {
for idx in csc.col_ptr[j]..csc.col_ptr[j + 1] {
let i = csc.row_idx[idx] as usize;
let v = csc.vals[idx];
dense[i * np + j] += v;
if i != j { dense[j * np + i] += v; }
}
}
for i in 0..np { dense[i * np + i] = (1.0 + lambda) * diagonal[i]; }
return solve_spd(np, &dense, grad, delta);
}
for i in 0..n {
matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i];
}
let csc = &matrix.csc;
let mut s = vec![0.0; np * np]; for j in 0..np {
for idx in csc.col_ptr[j]..csc.col_ptr[j + 1] {
let i = csc.row_idx[idx] as usize;
let v = csc.vals[idx];
s[i * np + j] += v;
if i != j { s[j * np + i] += v; }
}
}
let n_lm = nl / 3;
let mut rhs_p: Vec<f64> = grad[..np].to_vec();
struct PoseBlock { offset: usize, c: [f64; 18] } struct LmInfo { poses: Vec<PoseBlock>, b_inv: [f64; 9] }
let mut lm_info: Vec<LmInfo> = Vec::with_capacity(n_lm);
for k in 0..n_lm {
let mut ll_block = [0.0f64; 6];
let mut raw_entries: Vec<(usize, usize, f64)> = Vec::new();
for local_col in 0..3_usize {
let j = np + k * 3 + local_col;
for idx in csc.col_ptr[j]..csc.col_ptr[j + 1] {
let i = csc.row_idx[idx] as usize;
let v = csc.vals[idx];
if i < np {
raw_entries.push((i, local_col, v));
} else {
let li = i - np - k * 3;
if li <= local_col {
let ti = match (li, local_col) {
(0, 0) => 0, (0, 1) => 1, (0, 2) => 2,
(1, 1) => 3, (1, 2) => 4, (2, 2) => 5,
_ => continue,
};
ll_block[ti] += v;
}
}
}
}
let b_inv = match invert_spd_3x3(&ll_block) {
Some(inv) => inv,
None => return false,
};
raw_entries.sort_by_key(|&(r, _, _)| r);
let mut poses: Vec<PoseBlock> = Vec::new();
let mut i = 0;
while i < raw_entries.len() {
let pose_base = raw_entries[i].0 / 6 * 6; let mut c = [0.0f64; 18];
while i < raw_entries.len() && raw_entries[i].0 / 6 * 6 == pose_base {
let (row, col, val) = raw_entries[i];
let local_row = row - pose_base;
c[local_row * 3 + col] = val;
i += 1;
}
poses.push(PoseBlock { offset: pose_base, c });
}
let g_l = [grad[np + k * 3], grad[np + k * 3 + 1], grad[np + k * 3 + 2]];
let bg = [
b_inv[0] * g_l[0] + b_inv[1] * g_l[1] + b_inv[2] * g_l[2],
b_inv[3] * g_l[0] + b_inv[4] * g_l[1] + b_inv[5] * g_l[2],
b_inv[6] * g_l[0] + b_inv[7] * g_l[1] + b_inv[8] * g_l[2],
];
for pb in &poses {
for r in 0..6 {
rhs_p[pb.offset + r] -= pb.c[r*3] * bg[0] + pb.c[r*3+1] * bg[1] + pb.c[r*3+2] * bg[2];
}
}
let mut d_blocks: Vec<[f64; 18]> = Vec::with_capacity(poses.len());
for pb in &poses {
let mut d = [0.0f64; 18];
for r in 0..6 {
for c2 in 0..3 {
d[r * 3 + c2] = pb.c[r*3] * b_inv[c2]
+ pb.c[r*3+1] * b_inv[3 + c2]
+ pb.c[r*3+2] * b_inv[6 + c2];
}
}
d_blocks.push(d);
}
for (ii, pi) in poses.iter().enumerate() {
let di = &d_blocks[ii];
for pj in poses.iter() {
let oi = pi.offset;
let oj = pj.offset;
for r in 0..6 {
for c2 in 0..6 {
s[(oi + r) * np + oj + c2] -=
di[r*3] * pj.c[c2*3] + di[r*3+1] * pj.c[c2*3+1] + di[r*3+2] * pj.c[c2*3+2];
}
}
}
}
lm_info.push(LmInfo { poses, b_inv });
}
let mut delta_p = vec![0.0; np];
if !solve_spd(np, &s, &rhs_p, &mut delta_p) {
return false;
}
delta[..np].copy_from_slice(&delta_p);
for (k, li) in lm_info.iter().enumerate() {
let mut g_l = [grad[np + k * 3], grad[np + k * 3 + 1], grad[np + k * 3 + 2]];
for pb in &li.poses {
for c2 in 0..3 {
for r in 0..6 {
g_l[c2] -= pb.c[r * 3 + c2] * delta_p[pb.offset + r];
}
}
}
delta[np + k * 3] = li.b_inv[0] * g_l[0] + li.b_inv[1] * g_l[1] + li.b_inv[2] * g_l[2];
delta[np + k * 3 + 1] = li.b_inv[3] * g_l[0] + li.b_inv[4] * g_l[1] + li.b_inv[5] * g_l[2];
delta[np + k * 3 + 2] = li.b_inv[6] * g_l[0] + li.b_inv[7] * g_l[1] + li.b_inv[8] * g_l[2];
}
true
}
}
pub fn solve_sparse_schur(x0: &[f64], np: usize, problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut SparseSchur::new(np), problem, config)
}
#[cfg(feature = "eigen")]
unsafe extern "C" {
fn eigen_llt_f64_create() -> *mut std::ffi::c_void;
fn eigen_llt_f64_destroy(handle: *mut std::ffi::c_void);
fn eigen_llt_f64_solve(
handle: *mut std::ffi::c_void, n: i32, nnz: i32,
col_ptr: *const i64, row_idx: *const i32,
vals: *const f64, rhs: *const f64, solution: *mut f64,
) -> bool;
fn eigen_llt_f32_create() -> *mut std::ffi::c_void;
fn eigen_llt_f32_destroy(handle: *mut std::ffi::c_void);
fn eigen_llt_f32_solve(
handle: *mut std::ffi::c_void, n: i32, nnz: i32,
col_ptr: *const i64, row_idx: *const i32,
vals: *const f32, rhs: *const f32, solution: *mut f32,
) -> bool;
}
#[cfg(feature = "cholmod")]
unsafe extern "C" {
fn eigen_cholmod_f64_create() -> *mut std::ffi::c_void;
fn eigen_cholmod_f64_destroy(handle: *mut std::ffi::c_void);
fn eigen_cholmod_f64_solve(
handle: *mut std::ffi::c_void, n: i32, nnz: i32,
col_ptr: *const i64, row_idx: *const i32,
vals: *const f64, rhs: *const f64, solution: *mut f64,
) -> bool;
}
#[cfg(feature = "eigen")]
fn eigen_ffi_solve<T>(
ffi_fn: unsafe extern "C" fn(*mut std::ffi::c_void, i32, i32, *const i64, *const i32, *const T, *const T, *mut T) -> bool,
handle: *mut std::ffi::c_void,
csc: &CscMatrix<T>,
grad: &[T],
delta: &mut [T],
) -> bool {
let n = csc.n as i32;
let nnz = csc.vals.len() as i32;
let col_ptr = csc.col_ptr.as_ptr() as *const i64;
let row_idx = csc.row_idx.as_ptr() as *const i32;
unsafe {
ffi_fn(handle, n, nnz, col_ptr, row_idx,
csc.vals.as_ptr(), grad.as_ptr(), delta.as_mut_ptr())
}
}
#[cfg(feature = "eigen")]
pub struct SparseEigen {
handle: *mut std::ffi::c_void,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
#[cfg(feature = "eigen")]
impl SparseEigen {
pub fn new() -> Self {
SparseEigen { handle: unsafe { eigen_llt_f64_create() }, positions: std::cell::RefCell::new(None) }
}
}
#[cfg(feature = "eigen")]
impl Drop for SparseEigen {
fn drop(&mut self) { unsafe { eigen_llt_f64_destroy(self.handle); } }
}
#[cfg(feature = "eigen")]
impl LmSolver<f64> for SparseEigen {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions.as_ref().unwrap());
} else {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() { diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
eigen_ffi_solve(eigen_llt_f64_solve, self.handle, &matrix.csc, grad, delta)
}
}
#[cfg(feature = "eigen")]
pub struct SparseEigenF32 {
handle: *mut std::ffi::c_void,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
#[cfg(feature = "eigen")]
impl SparseEigenF32 {
pub fn new() -> Self {
SparseEigenF32 { handle: unsafe { eigen_llt_f32_create() }, positions: std::cell::RefCell::new(None) }
}
}
#[cfg(feature = "eigen")]
impl Drop for SparseEigenF32 {
fn drop(&mut self) { unsafe { eigen_llt_f32_destroy(self.handle); } }
}
#[cfg(feature = "eigen")]
impl LmSolver<f32> for SparseEigenF32 {
type Matrix = SparseMatrix<f32>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f32> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f32>, params: &[f32], grad: &mut [f32], matrix: &mut SparseMatrix<f32>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions.as_ref().unwrap());
} else {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f32>, diagonal: &mut [f32]) {
for i in 0..diagonal.len() { diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f32>, diagonal: &[f32], lambda: f32, grad: &[f32], delta: &mut [f32]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
eigen_ffi_solve(eigen_llt_f32_solve, self.handle, &matrix.csc, grad, delta)
}
}
#[cfg(feature = "cholmod")]
pub struct SparseCholmod {
handle: *mut std::ffi::c_void,
positions: std::cell::RefCell<Option<Vec<usize>>>,
}
#[cfg(feature = "cholmod")]
impl SparseCholmod {
pub fn new() -> Self {
SparseCholmod { handle: unsafe { eigen_cholmod_f64_create() }, positions: std::cell::RefCell::new(None) }
}
}
#[cfg(feature = "cholmod")]
impl Drop for SparseCholmod {
fn drop(&mut self) { unsafe { eigen_cholmod_f64_destroy(self.handle); } }
}
#[cfg(feature = "cholmod")]
impl LmSolver<f64> for SparseCholmod {
type Matrix = SparseMatrix<f64>;
fn new_matrix(&self, n: usize) -> SparseMatrix<f64> {
SparseMatrix { csc: CscMatrix::empty(n) }
}
fn compute(&self, problem: &mut dyn LmProblem<f64>, params: &[f64], grad: &mut [f64], matrix: &mut SparseMatrix<f64>) {
let has_positions = self.positions.borrow().is_some();
if has_positions {
let positions = self.positions.borrow();
problem.calc_grad_hessian_sparse_indexed(params, grad, &mut matrix.csc.vals, positions.as_ref().unwrap());
} else {
let n = matrix.csc.n;
let mut coo = CooMatrix::new(n);
problem.calc_grad_hessian_sparse(params, grad, &mut coo);
let (csc, positions) = coo.to_csc_with_map();
*self.positions.borrow_mut() = Some(positions);
matrix.csc = csc;
}
}
fn extract_diagonal(&self, matrix: &SparseMatrix<f64>, diagonal: &mut [f64]) {
for i in 0..diagonal.len() { diagonal[i] = matrix.csc.vals[matrix.csc.diag_pos[i]]; }
}
fn solve_damped(&mut self, n: usize, matrix: &mut SparseMatrix<f64>, diagonal: &[f64], lambda: f64, grad: &[f64], delta: &mut [f64]) -> bool {
for i in 0..n { matrix.csc.vals[matrix.csc.diag_pos[i]] = (1.0 + lambda) * diagonal[i]; }
eigen_ffi_solve(eigen_cholmod_f64_solve, self.handle, &matrix.csc, grad, delta)
}
}
#[cfg(feature = "eigen")]
pub fn solve_sparse_eigen(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut SparseEigen::new(), problem, config)
}
#[cfg(feature = "eigen")]
pub fn solve_sparse_eigen_f32(x0: &[f32], problem: &mut impl LmProblem<f32>, config: &LmConfig<f32>) -> LmResult<f32> {
lm_solve(x0, &mut SparseEigenF32::new(), problem, config)
}
#[cfg(feature = "cholmod")]
pub fn solve_sparse_cholmod(x0: &[f64], problem: &mut impl LmProblem<f64>, config: &LmConfig<f64>) -> LmResult<f64> {
lm_solve(x0, &mut SparseCholmod::new(), problem, config)
}
pub struct CooMatrix<T> {
pub n: usize,
pub rows: Vec<u32>,
pub cols: Vec<u32>,
pub vals: Vec<T>,
}
impl<T: Float> CooMatrix<T> {
pub fn new(n: usize) -> Self {
CooMatrix { n, rows: Vec::new(), cols: Vec::new(), vals: Vec::new() }
}
pub fn clear(&mut self) {
self.rows.clear();
self.cols.clear();
self.vals.clear();
}
pub fn push(&mut self, row: u32, col: u32, val: T) {
self.rows.push(row);
self.cols.push(col);
self.vals.push(val);
}
pub fn nnz(&self) -> usize { self.rows.len() }
pub fn to_csc(&self) -> CscMatrix<T> {
let n = self.n;
let nnz_raw = self.nnz();
let mut order: Vec<usize> = (0..nnz_raw).collect();
order.sort_by(|&a, &b| {
self.cols[a].cmp(&self.cols[b]).then(self.rows[a].cmp(&self.rows[b]))
});
let mut col_ptr = vec![0usize; n + 1];
let mut row_idx = Vec::with_capacity(nnz_raw);
let mut vals = Vec::with_capacity(nnz_raw);
let mut prev_col = u32::MAX;
let mut prev_row = u32::MAX;
for &idx in &order {
let r = self.rows[idx];
let c = self.cols[idx];
let v = self.vals[idx];
if c == prev_col && r == prev_row {
*vals.last_mut().unwrap() = *vals.last().unwrap() + v;
} else {
row_idx.push(r);
vals.push(v);
col_ptr[c as usize + 1] += 1;
prev_col = c;
prev_row = r;
}
}
for j in 1..=n {
col_ptr[j] += col_ptr[j - 1];
}
let mut diag_pos = vec![0usize; n];
for j in 0..n {
for k in col_ptr[j]..col_ptr[j + 1] {
if row_idx[k] as usize == j {
diag_pos[j] = k;
break;
}
}
}
CscMatrix { n, col_ptr, row_idx, vals, diag_pos }
}
pub fn scatter_into(&self, csc: &mut CscMatrix<T>, coo_to_csc: &[usize]) {
csc.vals.iter_mut().for_each(|v| *v = T::zero());
for (i, &csc_idx) in coo_to_csc.iter().enumerate() {
csc.vals[csc_idx] = csc.vals[csc_idx] + self.vals[i];
}
}
pub fn build_scatter_map(&self, csc: &CscMatrix<T>) -> Vec<usize> {
let nnz_raw = self.nnz();
let mut map = vec![0usize; nnz_raw];
let mut order: Vec<usize> = (0..nnz_raw).collect();
order.sort_by(|&a, &b| {
self.cols[a].cmp(&self.cols[b]).then(self.rows[a].cmp(&self.rows[b]))
});
let mut csc_idx = 0usize;
let mut prev_col = u32::MAX;
let mut prev_row = u32::MAX;
for &orig_idx in &order {
let r = self.rows[orig_idx];
let c = self.cols[orig_idx];
if c == prev_col && r == prev_row {
map[orig_idx] = csc_idx - 1;
} else {
map[orig_idx] = csc_idx;
csc_idx += 1;
prev_col = c;
prev_row = r;
}
}
debug_assert_eq!(csc_idx, csc.vals.len());
map
}
pub fn to_csc_with_map(&self) -> (CscMatrix<T>, Vec<usize>) {
let n = self.n;
let nnz_raw = self.nnz();
let mut col_count = vec![0usize; n];
for i in 0..nnz_raw {
col_count[self.cols[i] as usize] += 1;
}
let mut col_ptr = vec![0usize; n + 1];
for j in 0..n {
col_ptr[j + 1] = col_ptr[j] + col_count[j];
}
let mut order = vec![0usize; nnz_raw];
let mut col_pos = col_ptr[..n].to_vec(); for i in 0..nnz_raw {
let c = self.cols[i] as usize;
order[col_pos[c]] = i;
col_pos[c] += 1;
}
for j in 0..n {
let start = col_ptr[j];
let end = col_ptr[j + 1];
order[start..end].sort_by_key(|&idx| self.rows[idx]);
}
let mut row_idx = Vec::with_capacity(nnz_raw);
let mut vals = Vec::with_capacity(nnz_raw);
let mut map = vec![0usize; nnz_raw];
let mut new_col_ptr = vec![0usize; n + 1];
let mut prev_col = u32::MAX;
let mut prev_row = u32::MAX;
let mut csc_idx = 0usize;
for &orig_idx in &order {
let r = self.rows[orig_idx];
let c = self.cols[orig_idx];
let v = self.vals[orig_idx];
if c == prev_col && r == prev_row {
*vals.last_mut().unwrap() = *vals.last().unwrap() + v;
map[orig_idx] = csc_idx - 1;
} else {
row_idx.push(r);
vals.push(v);
new_col_ptr[c as usize + 1] += 1;
map[orig_idx] = csc_idx;
csc_idx += 1;
prev_col = c;
prev_row = r;
}
}
for j in 1..=n {
new_col_ptr[j] += new_col_ptr[j - 1];
}
let mut diag_pos = vec![0usize; n];
for j in 0..n {
for k in new_col_ptr[j]..new_col_ptr[j + 1] {
if row_idx[k] as usize == j {
diag_pos[j] = k;
break;
}
}
}
(CscMatrix { n, col_ptr: new_col_ptr, row_idx, vals, diag_pos }, map)
}
}
pub struct CscMatrix<T> {
pub n: usize,
pub col_ptr: Vec<usize>,
pub row_idx: Vec<u32>,
pub vals: Vec<T>,
pub diag_pos: Vec<usize>,
}
impl<T: Float> CscMatrix<T> {
pub fn empty(n: usize) -> Self {
CscMatrix {
n,
col_ptr: vec![0; n + 1],
row_idx: Vec::new(),
vals: Vec::new(),
diag_pos: vec![0; n],
}
}
pub fn nnz(&self) -> usize { self.vals.len() }
pub fn find_pos(&self, row: usize, col: usize) -> Option<usize> {
let start = self.col_ptr[col];
let end = self.col_ptr[col + 1];
let row_u32 = row as u32;
self.row_idx[start..end]
.binary_search(&row_u32)
.ok()
.map(|offset| start + offset)
}
pub fn symv(&self, x: &[T], y: &mut [T]) {
for v in y.iter_mut() { *v = T::zero(); }
for j in 0..self.n {
for k in self.col_ptr[j]..self.col_ptr[j + 1] {
let i = self.row_idx[k] as usize;
let a = self.vals[k];
y[i] = y[i] + a * x[j];
if i != j {
y[j] = y[j] + a * x[i];
}
}
}
}
}
#[cfg(feature = "lapack")]
pub fn solve_spd_band_lapack(n: usize, kd: usize, band: &mut [f64], b: &mut [f64]) -> bool {
let mut info: i32 = 0;
let ldab = (kd + 1) as i32;
unsafe {
lapack_sys::dpbsv_(
&b'U', &(n as i32),
&(kd as i32),
&1i32, band.as_mut_ptr(),
&ldab,
b.as_mut_ptr(),
&(n as i32),
&mut info,
);
}
info == 0
}
#[cfg(feature = "lapack")]
pub fn solve_spd_band_lapack_f32(n: usize, kd: usize, band: &mut [f32], b: &mut [f32]) -> bool {
let mut info: i32 = 0;
let ldab = (kd + 1) as i32;
unsafe {
lapack_sys::spbsv_(
&b'U',
&(n as i32),
&(kd as i32),
&1i32,
band.as_mut_ptr(),
&ldab,
b.as_mut_ptr(),
&(n as i32),
&mut info,
);
}
info == 0
}
pub fn solve_spd_band(n: usize, kd: usize, band: &mut [f64], b: &mut [f64]) -> bool {
let ldab = kd + 1;
for j in 0..n {
let mut s = band[kd + j * ldab]; let k_start = if j > kd { j - kd } else { 0 };
for k in k_start..j {
let rkj = band[(kd + k - j) + j * ldab]; s -= rkj * rkj;
}
if s <= 0.0 { return false; }
let rjj = s.sqrt();
band[kd + j * ldab] = rjj;
let i_end = (j + kd + 1).min(n);
for i in (j + 1)..i_end {
let mut s = band[(kd + j - i) + i * ldab]; let k_start2 = if i > kd { i - kd } else { 0 };
let k_lo = k_start.max(k_start2);
for k in k_lo..j {
let rkj = band[(kd + k - j) + j * ldab];
let rki = band[(kd + k - i) + i * ldab];
s -= rkj * rki;
}
band[(kd + j - i) + i * ldab] = s / rjj;
}
}
for i in 0..n {
let mut s = b[i];
let k_start = if i > kd { i - kd } else { 0 };
for k in k_start..i {
s -= band[(kd + k - i) + i * ldab] * b[k]; }
b[i] = s / band[kd + i * ldab]; }
for i in (0..n).rev() {
let mut s = b[i];
let k_end = (i + kd + 1).min(n);
for k in (i + 1)..k_end {
s -= band[(kd + i - k) + k * ldab] * b[k]; }
b[i] = s / band[kd + i * ldab]; }
true
}
pub fn solve_spd_band_f32(n: usize, kd: usize, band: &mut [f32], b: &mut [f32]) -> bool {
let ldab = kd + 1;
for j in 0..n {
let mut s = band[kd + j * ldab];
let k_start = if j > kd { j - kd } else { 0 };
for k in k_start..j {
let rkj = band[(kd + k - j) + j * ldab];
s -= rkj * rkj;
}
if s <= 0.0 { return false; }
let rjj = s.sqrt();
band[kd + j * ldab] = rjj;
let i_end = (j + kd + 1).min(n);
for i in (j + 1)..i_end {
let mut s = band[(kd + j - i) + i * ldab];
let k_start2 = if i > kd { i - kd } else { 0 };
let k_lo = k_start.max(k_start2);
for k in k_lo..j {
let rkj = band[(kd + k - j) + j * ldab];
let rki = band[(kd + k - i) + i * ldab];
s -= rkj * rki;
}
band[(kd + j - i) + i * ldab] = s / rjj;
}
}
for i in 0..n {
let mut s = b[i];
let k_start = if i > kd { i - kd } else { 0 };
for k in k_start..i {
s -= band[(kd + k - i) + i * ldab] * b[k];
}
b[i] = s / band[kd + i * ldab];
}
for i in (0..n).rev() {
let mut s = b[i];
let k_end = (i + kd + 1).min(n);
for k in (i + 1)..k_end {
s -= band[(kd + i - k) + k * ldab] * b[k];
}
b[i] = s / band[kd + i * ldab];
}
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cholesky_solve_2x2() {
let a = [4.0, 2.0, 2.0, 3.0];
let b = [1.0, 2.0];
let mut x = [0.0; 2];
assert!(solve_spd(2, &a, &b, &mut x));
assert!((x[0] - (-0.125)).abs() < 1e-10, "x[0]={}", x[0]);
assert!((x[1] - 0.75).abs() < 1e-10, "x[1]={}", x[1]);
}
#[test]
fn cholesky_solve_4x4() {
let m = [1.0, 0.5, 0.2, 0.1,
0.3, 1.2, 0.4, 0.6,
0.7, 0.1, 0.9, 0.3,
0.2, 0.8, 0.5, 1.1];
let mut a = [0.0f64; 16];
for i in 0..4 {
for j in 0..4 {
let mut sum = 0.0;
for k in 0..4 { sum += m[k * 4 + i] * m[k * 4 + j]; }
a[i * 4 + j] = sum;
}
a[i * 4 + i] += 4.0;
}
let b = [1.0, 2.0, 3.0, 4.0];
let mut x = [0.0f64; 4];
assert!(solve_spd(4, &a, &b, &mut x));
for i in 0..4 {
let mut ax_i = 0.0;
for j in 0..4 { ax_i += a[i * 4 + j] * x[j]; }
assert!((ax_i - b[i]).abs() < 1e-10, "row {}: Ax={}, b={}", i, ax_i, b[i]);
}
}
#[test]
fn solve_quadratic() {
let mut p = FnProblem {
cost: |x: &[f64]| (x[0] - 3.0) * (x[0] - 3.0) + (x[1] - 7.0) * (x[1] - 7.0),
grad_hessian: |x: &[f64], grad: &mut [f64], hess: &mut [f64]| {
grad[0] = 2.0 * (x[0] - 3.0);
grad[1] = 2.0 * (x[1] - 7.0);
hess[0] = 2.0; hess[1] = 0.0;
hess[2] = 0.0; hess[3] = 2.0;
},
};
let result = solve(
&[0.0, 0.0],
&mut p,
&LmConfig { abs_precision: 1e-10, max_iters: 100, initial_lambda: 0.001, ..Default::default() },
);
assert!((result.x[0] - 3.0).abs() < 1e-6, "x={}", result.x[0]);
assert!((result.x[1] - 7.0).abs() < 1e-6, "y={}", result.x[1]);
assert!(result.end_cost < 1e-10);
}
#[test]
fn solve_rosenbrock() {
let mut p = FnProblem {
cost: |p: &[f64]| {
let (x, y) = (p[0], p[1]);
(1.0 - x).powi(2) + 100.0 * (y - x * x).powi(2)
},
grad_hessian: |p: &[f64], grad: &mut [f64], hess: &mut [f64]| {
let (x, y) = (p[0], p[1]);
grad[0] = -2.0 * (1.0 - x) - 400.0 * x * (y - x * x);
grad[1] = 200.0 * (y - x * x);
hess[0] = 2.0 + 1200.0 * x * x - 400.0 * y;
hess[1] = -400.0 * x;
hess[2] = -400.0 * x;
hess[3] = 200.0;
},
};
let result = solve(
&[-1.0, 1.0],
&mut p,
&LmConfig { abs_precision: 1e-12, max_iters: 500, initial_lambda: 0.001, ..Default::default() },
);
assert!((result.x[0] - 1.0).abs() < 1e-4, "x={}", result.x[0]);
assert!((result.x[1] - 1.0).abs() < 1e-4, "y={}", result.x[1]);
}
#[test]
fn solve_f32_quadratic() {
let mut p = FnProblem {
cost: |x: &[f32]| (x[0] - 3.0) * (x[0] - 3.0) + (x[1] - 7.0) * (x[1] - 7.0),
grad_hessian: |x: &[f32], grad: &mut [f32], hess: &mut [f32]| {
grad[0] = 2.0 * (x[0] - 3.0);
grad[1] = 2.0 * (x[1] - 7.0);
hess[0] = 2.0; hess[1] = 0.0;
hess[2] = 0.0; hess[3] = 2.0;
},
};
let result = solve_f32(
&[0.0f32, 0.0],
&mut p,
&LmConfig { abs_precision: 1e-4, max_iters: 100, initial_lambda: 0.001, ..Default::default() },
);
assert!((result.x[0] - 3.0).abs() < 1e-3, "x={}", result.x[0]);
assert!((result.x[1] - 7.0).abs() < 1e-3, "y={}", result.x[1]);
}
#[test]
fn band_cholesky_2x2() {
let mut band = vec![0.0, 4.0, 2.0, 3.0]; let mut b = vec![1.0, 2.0];
assert!(solve_spd_band(2, 1, &mut band, &mut b));
assert!((b[0] - (-0.125)).abs() < 1e-10, "x[0]={}", b[0]);
assert!((b[1] - 0.75).abs() < 1e-10, "x[1]={}", b[1]);
}
#[test]
fn band_cholesky_tridiag_4x4() {
let mut band = vec![
0.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, ];
let mut b = vec![1.0, 2.0, 3.0, 4.0];
assert!(solve_spd_band(4, 1, &mut band, &mut b));
let a = [4.0, 1.0, 0.0, 0.0,
1.0, 4.0, 1.0, 0.0,
0.0, 1.0, 4.0, 1.0,
0.0, 0.0, 1.0, 4.0];
let rhs = [1.0, 2.0, 3.0, 4.0];
let mut x_dense = [0.0; 4];
assert!(solve_spd(4, &a, &rhs, &mut x_dense));
for i in 0..4 {
assert!((b[i] - x_dense[i]).abs() < 1e-10,
"i={}: band={}, dense={}", i, b[i], x_dense[i]);
}
}
#[test]
fn band_cholesky_block_tridiag_6x6() {
let n = 6;
let kd = 3;
let ldab = kd + 1;
let mut dense = vec![0.0f64; n * n];
for i in 0..n { dense[i * n + i] = 10.0; }
let offdiag = [
(0,1, 1.0), (0,2, 0.5), (0,3, 0.2),
(1,2, 1.5), (1,3, 0.8), (1,4, 0.3),
(2,3, 2.0), (2,4, 1.0), (2,5, 0.4),
(3,4, 1.2), (3,5, 0.6),
(4,5, 1.8),
];
for &(i, j, v) in &offdiag {
dense[i * n + j] += v;
dense[j * n + i] += v;
}
let mut band = vec![0.0f64; ldab * n];
for j in 0..n {
let i_start = if j > kd { j - kd } else { 0 };
for i in i_start..=j {
band[(kd + i - j) + j * ldab] = dense[i * n + j];
}
}
let rhs = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let mut band_copy = band.clone();
let mut b_band = rhs.clone();
assert!(solve_spd_band(n, kd, &mut band_copy, &mut b_band));
let mut x_dense = vec![0.0; n];
assert!(solve_spd(n, &dense, &rhs, &mut x_dense));
for i in 0..n {
assert!((b_band[i] - x_dense[i]).abs() < 1e-8,
"i={}: band={}, dense={}", i, b_band[i], x_dense[i]);
}
}
#[test]
#[cfg(feature = "lapack")]
fn band_cholesky_lapack_vs_rust() {
let n = 6;
let kd = 3;
let ldab = kd + 1;
let mut dense = vec![0.0f64; n * n];
for i in 0..n { dense[i * n + i] = 10.0; }
let offdiag = [
(0,1, 1.0), (0,2, 0.5), (0,3, 0.2),
(1,2, 1.5), (1,3, 0.8), (1,4, 0.3),
(2,3, 2.0), (2,4, 1.0), (2,5, 0.4),
(3,4, 1.2), (3,5, 0.6),
(4,5, 1.8),
];
for &(i, j, v) in &offdiag {
dense[i * n + j] += v;
dense[j * n + i] += v;
}
let mut band = vec![0.0f64; ldab * n];
for j in 0..n {
let i_start = if j > kd { j - kd } else { 0 };
for i in i_start..=j {
band[(kd + i - j) + j * ldab] = dense[i * n + j];
}
}
let rhs = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let mut band_rust = band.clone();
let mut b_rust = rhs.clone();
assert!(solve_spd_band(n, kd, &mut band_rust, &mut b_rust));
let mut band_lapack = band.clone();
let mut b_lapack = rhs.clone();
assert!(solve_spd_band_lapack(n, kd, &mut band_lapack, &mut b_lapack));
for i in 0..n {
assert!((b_rust[i] - b_lapack[i]).abs() < 1e-12,
"i={}: rust={}, lapack={}", i, b_rust[i], b_lapack[i]);
}
}
fn random_spd_band(n: usize, kd: usize, seed: u64) -> (Vec<f64>, Vec<f64>, Vec<f64>) {
use rand::prelude::*;
use rand::rngs::StdRng;
let mut rng = StdRng::seed_from_u64(seed);
let ldab = kd + 1;
let mut dense = vec![0.0f64; n * n];
let mut b_mat = vec![0.0f64; n * n];
for j in 0..n {
let i_start = if j > kd { j - kd } else { 0 };
for i in i_start..=j {
b_mat[i * n + j] = rng.random::<f64>() * 2.0 - 1.0;
}
}
for i in 0..n {
for j in i..n {
if j - i > kd { continue; }
let mut sum = 0.0;
for k in 0..n {
sum += b_mat[k * n + i] * b_mat[k * n + j];
}
dense[i * n + j] = sum;
dense[j * n + i] = sum;
}
}
for i in 0..n {
dense[i * n + i] += n as f64;
}
let mut band = vec![0.0f64; ldab * n];
for j in 0..n {
let i_start = if j > kd { j - kd } else { 0 };
for i in i_start..=j {
band[(kd + i - j) + j * ldab] = dense[i * n + j];
}
}
let rhs: Vec<f64> = (0..n).map(|_| rng.random::<f64>() * 10.0 - 5.0).collect();
(dense, band, rhs)
}
#[test]
fn band_cholesky_residual_check() {
for &(n, kd) in &[(10, 3), (20, 5), (50, 11), (100, 11), (200, 15)] {
let (dense, band, rhs) = random_spd_band(n, kd, 42 + n as u64);
let mut band_copy = band.clone();
let mut x = rhs.clone();
assert!(solve_spd_band(n, kd, &mut band_copy, &mut x),
"band Cholesky failed for n={}, kd={}", n, kd);
let mut max_residual = 0.0f64;
let b_norm: f64 = rhs.iter().map(|v| v * v).sum::<f64>().sqrt();
for i in 0..n {
let mut ax_i = 0.0;
for j in 0..n {
ax_i += dense[i * n + j] * x[j];
}
let r = (ax_i - rhs[i]).abs();
max_residual = max_residual.max(r);
}
let rel_residual = max_residual / b_norm;
assert!(rel_residual < 1e-10,
"n={}, kd={}: relative residual {:.2e} too large", n, kd, rel_residual);
}
}
#[test]
fn band_cholesky_f32_residual_check() {
for &(n, kd) in &[(10, 3), (20, 5), (50, 11), (100, 11)] {
let (dense_f64, band_f64, rhs_f64) = random_spd_band(n, kd, 99 + n as u64);
let dense: Vec<f32> = dense_f64.iter().map(|&v| v as f32).collect();
let band: Vec<f32> = band_f64.iter().map(|&v| v as f32).collect();
let rhs: Vec<f32> = rhs_f64.iter().map(|&v| v as f32).collect();
let mut band_copy = band.clone();
let mut x = rhs.clone();
assert!(solve_spd_band_f32(n, kd, &mut band_copy, &mut x),
"f32 band Cholesky failed for n={}, kd={}", n, kd);
let mut max_residual = 0.0f32;
let b_norm: f32 = rhs.iter().map(|v| v * v).sum::<f32>().sqrt();
for i in 0..n {
let mut ax_i = 0.0f32;
for j in 0..n {
ax_i += dense[i * n + j] * x[j];
}
let r = (ax_i - rhs[i]).abs();
max_residual = max_residual.max(r);
}
let rel_residual = max_residual / b_norm;
assert!(rel_residual < 1e-4,
"f32 n={}, kd={}: relative residual {:.2e} too large", n, kd, rel_residual);
}
}
#[test]
#[cfg(feature = "lapack")]
fn band_cholesky_lapack_vs_rust_large() {
for &(n, kd) in &[(20, 5), (50, 11), (100, 11), (200, 15)] {
let (_dense, band, rhs) = random_spd_band(n, kd, 77 + n as u64);
let mut band_rust = band.clone();
let mut x_rust = rhs.clone();
assert!(solve_spd_band(n, kd, &mut band_rust, &mut x_rust));
let mut band_lapack = band.clone();
let mut x_lapack = rhs.clone();
assert!(solve_spd_band_lapack(n, kd, &mut band_lapack, &mut x_lapack));
let mut max_diff = 0.0f64;
let x_norm: f64 = x_rust.iter().map(|v| v * v).sum::<f64>().sqrt();
for i in 0..n {
max_diff = max_diff.max((x_rust[i] - x_lapack[i]).abs());
}
let rel_diff = max_diff / x_norm;
assert!(rel_diff < 1e-12,
"n={}, kd={}: Rust vs LAPACK relative diff {:.2e}", n, kd, rel_diff);
}
}
#[test]
fn coo_to_csc_basic() {
let mut coo = CooMatrix::<f64>::new(3);
coo.push(0, 0, 4.0);
coo.push(0, 1, 1.0);
coo.push(1, 1, 3.0);
coo.push(1, 2, 2.0);
coo.push(2, 2, 5.0);
let csc = coo.to_csc();
assert_eq!(csc.n, 3);
assert_eq!(csc.col_ptr, vec![0, 1, 3, 5]);
assert_eq!(csc.row_idx, vec![0, 0, 1, 1, 2]);
assert_eq!(csc.vals, vec![4.0, 1.0, 3.0, 2.0, 5.0]);
assert_eq!(csc.diag_pos, vec![0, 2, 4]);
}
#[test]
fn coo_to_csc_duplicates() {
let mut coo = CooMatrix::<f64>::new(2);
coo.push(0, 0, 3.0);
coo.push(0, 1, 1.0);
coo.push(0, 0, 2.0); coo.push(1, 1, 4.0);
let csc = coo.to_csc();
assert_eq!(csc.vals, vec![5.0, 1.0, 4.0]);
}
#[test]
fn coo_scatter_roundtrip() {
let mut coo = CooMatrix::<f64>::new(3);
coo.push(0, 0, 4.0);
coo.push(0, 1, 1.0);
coo.push(1, 1, 3.0);
coo.push(1, 2, 2.0);
coo.push(2, 2, 5.0);
let csc = coo.to_csc();
let scatter_map = coo.build_scatter_map(&csc);
coo.clear();
coo.push(0, 0, 10.0);
coo.push(0, 1, 20.0);
coo.push(1, 1, 30.0);
coo.push(1, 2, 40.0);
coo.push(2, 2, 50.0);
let mut csc2 = CscMatrix::<f64>::empty(3);
csc2.col_ptr = csc.col_ptr.clone();
csc2.row_idx = csc.row_idx.clone();
csc2.vals = vec![0.0; csc.nnz()];
csc2.diag_pos = csc.diag_pos.clone();
coo.scatter_into(&mut csc2, &scatter_map);
assert_eq!(csc2.vals, vec![10.0, 20.0, 30.0, 40.0, 50.0]);
}
#[test]
fn sparse_solver_quadratic() {
struct QuadProblem;
impl LmProblem<f64> for QuadProblem {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0] - 3.0) * (x[0] - 3.0) + (x[1] - 7.0) * (x[1] - 7.0)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], grad: &mut [f64], hess: &mut [f64]) {
grad[0] = 2.0 * (x[0] - 3.0); grad[1] = 2.0 * (x[1] - 7.0);
hess[0] = 2.0; hess[1] = 0.0; hess[2] = 0.0; hess[3] = 2.0;
}
fn calc_grad_hessian_band(&mut self, _: &[f64], _: &mut [f64], _: &mut [f64], _: usize) -> Result<(), BandError> {
unimplemented!()
}
fn calc_grad_hessian_sparse_direct(&mut self, _: &[f64], _: &mut [f64], _: &mut CscMatrix<f64>) { unimplemented!() }
fn calc_grad_hessian_sparse_indexed(&mut self, _: &[f64], _: &mut [f64], _: &mut [f64], _: &[usize]) { unimplemented!() }
fn calc_grad_hessian_sparse(&mut self, x: &[f64], grad: &mut [f64], coo: &mut CooMatrix<f64>) {
grad[0] = 2.0 * (x[0] - 3.0); grad[1] = 2.0 * (x[1] - 7.0);
coo.clear();
coo.push(0, 0, 2.0);
coo.push(1, 1, 2.0);
}
}
let result = solve_sparse(
&[0.0, 0.0],
&mut QuadProblem,
&LmConfig { abs_precision: 1e-10, max_iters: 100, initial_lambda: 0.001, ..Default::default() },
);
assert!((result.x[0] - 3.0).abs() < 1e-6, "x={}", result.x[0]);
assert!((result.x[1] - 7.0).abs() < 1e-6, "y={}", result.x[1]);
assert!(result.end_cost < 1e-10);
}
#[test]
fn sparse_faer_quadratic() {
struct QuadProblem;
impl LmProblem<f64> for QuadProblem {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0] - 3.0) * (x[0] - 3.0) + (x[1] - 7.0) * (x[1] - 7.0)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], grad: &mut [f64], hess: &mut [f64]) {
grad[0] = 2.0 * (x[0] - 3.0); grad[1] = 2.0 * (x[1] - 7.0);
hess[0] = 2.0; hess[1] = 0.0; hess[2] = 0.0; hess[3] = 2.0;
}
fn calc_grad_hessian_band(&mut self, _: &[f64], _: &mut [f64], _: &mut [f64], _: usize) -> Result<(), BandError> { unimplemented!() }
fn calc_grad_hessian_sparse_direct(&mut self, _: &[f64], _: &mut [f64], _: &mut CscMatrix<f64>) { unimplemented!() }
fn calc_grad_hessian_sparse_indexed(&mut self, x: &[f64], grad: &mut [f64], vals: &mut [f64], positions: &[usize]) {
grad[0] = 2.0 * (x[0] - 3.0); grad[1] = 2.0 * (x[1] - 7.0);
vals.iter_mut().for_each(|v| *v = 0.0);
vals[positions[0]] += 2.0;
vals[positions[1]] += 2.0;
}
fn calc_grad_hessian_sparse(&mut self, x: &[f64], grad: &mut [f64], coo: &mut CooMatrix<f64>) {
grad[0] = 2.0 * (x[0] - 3.0); grad[1] = 2.0 * (x[1] - 7.0);
coo.clear();
coo.push(0, 0, 2.0);
coo.push(1, 1, 2.0);
}
}
let result = solve_sparse_faer(
&[0.0, 0.0],
&mut QuadProblem,
&LmConfig { abs_precision: 1e-10, max_iters: 100, initial_lambda: 0.001, ..Default::default() },
);
assert!((result.x[0] - 3.0).abs() < 1e-6, "x={}", result.x[0]);
assert!((result.x[1] - 7.0).abs() < 1e-6, "y={}", result.x[1]);
assert!(result.end_cost < 1e-10);
}
#[test]
fn csc_symv() {
let mut coo = CooMatrix::<f64>::new(2);
coo.push(0, 0, 4.0);
coo.push(0, 1, 1.0);
coo.push(1, 1, 3.0);
let csc = coo.to_csc();
let mut y = vec![0.0; 2];
csc.symv(&[1.0, 2.0], &mut y);
assert!((y[0] - 6.0).abs() < 1e-12);
assert!((y[1] - 7.0).abs() < 1e-12);
}
#[test]
fn dense_vs_sparse_hessian_equal() {
struct CoupledProblem;
impl LmProblem<f64> for CoupledProblem {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0]-1.0).powi(2) + (x[1]-2.0).powi(2) + (x[2]-3.0).powi(2) + (x[3]-4.0).powi(2)
+ (x[0]-x[2]).powi(2) + (x[1]-x[3]).powi(2)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], g: &mut [f64], h: &mut [f64]) {
g[0] = 2.0*(x[0]-1.0) + 2.0*(x[0]-x[2]);
g[1] = 2.0*(x[1]-2.0) + 2.0*(x[1]-x[3]);
g[2] = 2.0*(x[2]-3.0) - 2.0*(x[0]-x[2]);
g[3] = 2.0*(x[3]-4.0) - 2.0*(x[1]-x[3]);
h[0*4+0]=4.0; h[0*4+1]=0.0; h[0*4+2]=-2.0; h[0*4+3]=0.0;
h[1*4+0]=0.0; h[1*4+1]=4.0; h[1*4+2]=0.0; h[1*4+3]=-2.0;
h[2*4+0]=-2.0;h[2*4+1]=0.0; h[2*4+2]=4.0; h[2*4+3]=0.0;
h[3*4+0]=0.0; h[3*4+1]=-2.0;h[3*4+2]=0.0; h[3*4+3]=4.0;
}
fn calc_grad_hessian_band(&mut self, _: &[f64], _: &mut [f64], _: &mut [f64], _: usize) -> Result<(), BandError> { unimplemented!() }
fn calc_grad_hessian_sparse_direct(&mut self, _: &[f64], _: &mut [f64], _: &mut CscMatrix<f64>) { unimplemented!() }
fn calc_grad_hessian_sparse_indexed(&mut self, _: &[f64], _: &mut [f64], _: &mut [f64], _: &[usize]) { unimplemented!() }
fn calc_grad_hessian_sparse(&mut self, x: &[f64], g: &mut [f64], coo: &mut CooMatrix<f64>) {
g[0] = 2.0*(x[0]-1.0) + 2.0*(x[0]-x[2]);
g[1] = 2.0*(x[1]-2.0) + 2.0*(x[1]-x[3]);
g[2] = 2.0*(x[2]-3.0) - 2.0*(x[0]-x[2]);
g[3] = 2.0*(x[3]-4.0) - 2.0*(x[1]-x[3]);
coo.clear();
coo.push(0, 0, 4.0);
coo.push(0, 2, -2.0);
coo.push(1, 1, 4.0);
coo.push(1, 3, -2.0);
coo.push(2, 2, 4.0);
coo.push(3, 3, 4.0);
}
}
let x = [0.0, 0.0, 0.0, 0.0];
let n = 4;
let mut grad_dense = vec![0.0; n];
let mut hessian = vec![0.0; n * n];
CoupledProblem.calc_grad_hessian_dense(&x, &mut grad_dense, &mut hessian);
let mut grad_sparse = vec![0.0; n];
let mut coo = CooMatrix::new(n);
CoupledProblem.calc_grad_hessian_sparse(&x, &mut grad_sparse, &mut coo);
let csc = coo.to_csc();
for i in 0..n {
assert!((grad_dense[i] - grad_sparse[i]).abs() < 1e-12,
"grad[{}]: dense={}, sparse={}", i, grad_dense[i], grad_sparse[i]);
}
let mut sparse_as_dense = vec![0.0; n * n];
for j in 0..n {
for k in csc.col_ptr[j]..csc.col_ptr[j + 1] {
let i = csc.row_idx[k] as usize;
let v = csc.vals[k];
sparse_as_dense[i * n + j] = v;
if i != j { sparse_as_dense[j * n + i] = v; }
}
}
for i in 0..n {
for j in 0..n {
assert!((hessian[i * n + j] - sparse_as_dense[i * n + j]).abs() < 1e-12,
"H[{},{}]: dense={}, sparse={}", i, j, hessian[i*n+j], sparse_as_dense[i*n+j]);
}
}
let config = LmConfig { abs_precision: 1e-10, max_iters: 100, initial_lambda: 0.001, ..Default::default() };
let r_dense = solve(&x, &mut CoupledProblem, &config);
let r_sparse = solve_sparse(&x, &mut CoupledProblem, &config);
for i in 0..n {
assert!((r_dense.x[i] - r_sparse.x[i]).abs() < 1e-6,
"x[{}]: dense={}, sparse={}", i, r_dense.x[i], r_sparse.x[i]);
}
}
#[test]
#[cfg(feature = "eigen")]
fn sparse_eigen_quadratic() {
struct QP;
impl LmProblem<f64> for QP {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0]-3.0)*(x[0]-3.0) + (x[1]-7.0)*(x[1]-7.0)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], g: &mut [f64], h: &mut [f64]) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
h[0]=2.0; h[1]=0.0; h[2]=0.0; h[3]=2.0;
}
fn calc_grad_hessian_band(&mut self,_:&[f64],_:&mut[f64],_:&mut[f64],_:usize)->Result<(),BandError>{unimplemented!()}
fn calc_grad_hessian_sparse_direct(&mut self,_:&[f64],_:&mut[f64],_:&mut CscMatrix<f64>){unimplemented!()}
fn calc_grad_hessian_sparse_indexed(&mut self, x: &[f64], g: &mut [f64], vals: &mut [f64], pos: &[usize]) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
vals.iter_mut().for_each(|v| *v = 0.0);
vals[pos[0]] += 2.0; vals[pos[1]] += 2.0;
}
fn calc_grad_hessian_sparse(&mut self, x: &[f64], g: &mut [f64], coo: &mut CooMatrix<f64>) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
coo.clear(); coo.push(0,0,2.0); coo.push(1,1,2.0);
}
}
let r = solve_sparse_eigen(&[0.0,0.0], &mut QP,
&LmConfig{abs_precision:1e-10, max_iters:100, initial_lambda:0.001, verbose:false});
assert!((r.x[0]-3.0).abs()<1e-6, "x={}", r.x[0]);
assert!((r.x[1]-7.0).abs()<1e-6, "y={}", r.x[1]);
assert!(r.end_cost < 1e-10);
}
#[test]
#[cfg(feature = "eigen")]
fn sparse_eigen_coupled() {
struct CP;
impl LmProblem<f64> for CP {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0]-1.0).powi(2)+(x[1]-2.0).powi(2)+(x[2]-3.0).powi(2)+(x[3]-4.0).powi(2)
+(x[0]-x[2]).powi(2)+(x[1]-x[3]).powi(2)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], g: &mut [f64], h: &mut [f64]) {
g[0]=2.0*(x[0]-1.0)+2.0*(x[0]-x[2]); g[1]=2.0*(x[1]-2.0)+2.0*(x[1]-x[3]);
g[2]=2.0*(x[2]-3.0)-2.0*(x[0]-x[2]); g[3]=2.0*(x[3]-4.0)-2.0*(x[1]-x[3]);
h[0]=4.0;h[1]=0.0;h[2]=-2.0;h[3]=0.0;
h[4]=0.0;h[5]=4.0;h[6]=0.0;h[7]=-2.0;
h[8]=-2.0;h[9]=0.0;h[10]=4.0;h[11]=0.0;
h[12]=0.0;h[13]=-2.0;h[14]=0.0;h[15]=4.0;
}
fn calc_grad_hessian_band(&mut self,_:&[f64],_:&mut[f64],_:&mut[f64],_:usize)->Result<(),BandError>{unimplemented!()}
fn calc_grad_hessian_sparse_direct(&mut self,_:&[f64],_:&mut[f64],_:&mut CscMatrix<f64>){unimplemented!()}
fn calc_grad_hessian_sparse_indexed(&mut self, x: &[f64], g: &mut [f64], vals: &mut [f64], pos: &[usize]) {
g[0]=2.0*(x[0]-1.0)+2.0*(x[0]-x[2]); g[1]=2.0*(x[1]-2.0)+2.0*(x[1]-x[3]);
g[2]=2.0*(x[2]-3.0)-2.0*(x[0]-x[2]); g[3]=2.0*(x[3]-4.0)-2.0*(x[1]-x[3]);
vals.iter_mut().for_each(|v| *v = 0.0);
vals[pos[0]] += 4.0; vals[pos[1]] += -2.0;
vals[pos[2]] += 4.0; vals[pos[3]] += -2.0;
vals[pos[4]] += 4.0; vals[pos[5]] += 4.0;
}
fn calc_grad_hessian_sparse(&mut self, x: &[f64], g: &mut [f64], coo: &mut CooMatrix<f64>) {
g[0]=2.0*(x[0]-1.0)+2.0*(x[0]-x[2]); g[1]=2.0*(x[1]-2.0)+2.0*(x[1]-x[3]);
g[2]=2.0*(x[2]-3.0)-2.0*(x[0]-x[2]); g[3]=2.0*(x[3]-4.0)-2.0*(x[1]-x[3]);
coo.clear();
coo.push(0,0,4.0); coo.push(0,2,-2.0);
coo.push(1,1,4.0); coo.push(1,3,-2.0);
coo.push(2,2,4.0); coo.push(3,3,4.0);
}
}
let cfg = LmConfig{abs_precision:1e-10, max_iters:100, initial_lambda:0.001, verbose:false};
let rd = solve(&[0.0;4], &mut CP, &cfg);
let re = solve_sparse_eigen(&[0.0;4], &mut CP, &cfg);
for i in 0..4 {
assert!((rd.x[i]-re.x[i]).abs()<1e-6, "x[{}]: dense={}, eigen={}", i, rd.x[i], re.x[i]);
}
}
#[test]
#[cfg(feature = "cholmod")]
fn sparse_cholmod_quadratic() {
struct QP;
impl LmProblem<f64> for QP {
fn calc_cost(&mut self, x: &[f64]) -> f64 {
(x[0]-3.0)*(x[0]-3.0) + (x[1]-7.0)*(x[1]-7.0)
}
fn calc_grad_hessian_dense(&mut self, x: &[f64], g: &mut [f64], h: &mut [f64]) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
h[0]=2.0; h[1]=0.0; h[2]=0.0; h[3]=2.0;
}
fn calc_grad_hessian_band(&mut self,_:&[f64],_:&mut[f64],_:&mut[f64],_:usize)->Result<(),BandError>{unimplemented!()}
fn calc_grad_hessian_sparse_direct(&mut self,_:&[f64],_:&mut[f64],_:&mut CscMatrix<f64>){unimplemented!()}
fn calc_grad_hessian_sparse_indexed(&mut self, x: &[f64], g: &mut [f64], vals: &mut [f64], pos: &[usize]) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
vals.iter_mut().for_each(|v| *v = 0.0);
vals[pos[0]] += 2.0; vals[pos[1]] += 2.0;
}
fn calc_grad_hessian_sparse(&mut self, x: &[f64], g: &mut [f64], coo: &mut CooMatrix<f64>) {
g[0]=2.0*(x[0]-3.0); g[1]=2.0*(x[1]-7.0);
coo.clear(); coo.push(0,0,2.0); coo.push(1,1,2.0);
}
}
let r = solve_sparse_cholmod(&[0.0,0.0], &mut QP,
&LmConfig{abs_precision:1e-10, max_iters:100, initial_lambda:0.001, verbose:false});
assert!((r.x[0]-3.0).abs()<1e-6, "x={}", r.x[0]);
assert!((r.x[1]-7.0).abs()<1e-6, "y={}", r.x[1]);
assert!(r.end_cost < 1e-10);
}
}