use scirs2_core::ndarray::{Array2, ArrayView2};
use scirs2_core::numeric::{Float, One, Zero};
use std::fmt::Debug;
use crate::error::{LinalgError, LinalgResult};
#[derive(Debug, Clone)]
pub struct OptimizationConfig<F: Float> {
pub max_iterations: usize,
pub gradient_tolerance: F,
pub initial_stepsize: F,
pub backtrack_factor: F,
pub max_backtrack_steps: usize,
}
impl<F: Float> Default for OptimizationConfig<F> {
fn default() -> Self {
Self {
max_iterations: 1000,
gradient_tolerance: F::from(1e-6).expect("Operation failed"),
initial_stepsize: F::from(1.0).expect("Operation failed"),
backtrack_factor: F::from(0.5).expect("Operation failed"),
max_backtrack_steps: 20,
}
}
}
#[derive(Debug, Clone)]
pub struct OptimizationResult<F: Float> {
pub x: Array2<F>,
pub f_val: F,
pub gradient_norm: F,
pub iterations: usize,
pub converged: bool,
pub f_history: Vec<F>,
}
#[allow(dead_code)]
pub fn matrix_gradient_descent<F>(
f: &impl Fn(&ArrayView2<F>) -> LinalgResult<F>,
x0: &ArrayView2<F>,
config: &OptimizationConfig<F>,
) -> LinalgResult<OptimizationResult<F>>
where
F: Float + Zero + One + Copy + Debug + scirs2_core::ndarray::ScalarOperand,
{
let mut x = x0.to_owned();
let mut f_history = Vec::new();
let mut converged = false;
for iteration in 0..config.max_iterations {
let f_val = f(&x.view())?;
f_history.push(f_val);
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let grad_norm = grad.iter().fold(F::zero(), |acc, &g| acc + g * g).sqrt();
if grad_norm < config.gradient_tolerance {
converged = true;
return Ok(OptimizationResult {
x,
f_val,
gradient_norm: grad_norm,
iterations: iteration,
converged,
f_history,
});
}
let mut stepsize = config.initial_stepsize;
let mut x_new = x.clone();
for _ in 0..config.max_backtrack_steps {
x_new = &x - &(&grad * stepsize);
let f_new = f(&x_new.view())?;
let sufficient_decrease = f_val - f_new
>= F::from(1e-4).expect("Operation failed") * stepsize * grad_norm * grad_norm;
if sufficient_decrease {
break;
}
stepsize = stepsize * config.backtrack_factor;
}
x = x_new;
}
let f_val = f(&x.view())?;
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let grad_norm = grad.iter().fold(F::zero(), |acc, &g| acc + g * g).sqrt();
Ok(OptimizationResult {
x,
f_val,
gradient_norm: grad_norm,
iterations: config.max_iterations,
converged,
f_history,
})
}
#[allow(dead_code)]
pub fn matrix_newton_method<F>(
f: &impl Fn(&ArrayView2<F>) -> LinalgResult<F>,
x0: &ArrayView2<F>,
config: &OptimizationConfig<F>,
) -> LinalgResult<OptimizationResult<F>>
where
F: Float + Zero + One + Copy + Debug + scirs2_core::ndarray::ScalarOperand,
{
let mut x = x0.to_owned();
let mut f_history = Vec::new();
let mut converged = false;
for iteration in 0..config.max_iterations {
let f_val = f(&x.view())?;
f_history.push(f_val);
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let grad_norm = grad.iter().fold(F::zero(), |acc, &g| acc + g * g).sqrt();
if grad_norm < config.gradient_tolerance {
converged = true;
return Ok(OptimizationResult {
x,
f_val,
gradient_norm: grad_norm,
iterations: iteration,
converged,
f_history,
});
}
let stepsize = if iteration == 0 {
config.initial_stepsize
} else {
let f_prev = f_history[f_history.len() - 2];
let f_change = (f_val - f_prev).abs();
let adaptive_step = if f_change > F::epsilon() {
config.initial_stepsize * F::from(0.1).expect("Operation failed") / f_change
} else {
config.initial_stepsize
};
adaptive_step.min(config.initial_stepsize)
};
x = &x - &(&grad * stepsize);
}
let f_val = f(&x.view())?;
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let grad_norm = grad.iter().fold(F::zero(), |acc, &g| acc + g * g).sqrt();
Ok(OptimizationResult {
x,
f_val,
gradient_norm: grad_norm,
iterations: config.max_iterations,
converged,
f_history,
})
}
#[allow(dead_code)]
pub fn projected_gradient_descent<F>(
f: &impl Fn(&ArrayView2<F>) -> LinalgResult<F>,
project: impl Fn(&ArrayView2<F>) -> LinalgResult<Array2<F>>,
x0: &ArrayView2<F>,
config: &OptimizationConfig<F>,
) -> LinalgResult<OptimizationResult<F>>
where
F: Float + Zero + One + Copy + Debug + scirs2_core::ndarray::ScalarOperand,
{
let mut x = x0.to_owned();
let mut f_history = Vec::new();
let mut converged = false;
for iteration in 0..config.max_iterations {
let f_val = f(&x.view())?;
f_history.push(f_val);
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let mut stepsize = config.initial_stepsize;
let mut x_new = x.clone();
let mut found_step = false;
for _ in 0..config.max_backtrack_steps {
let x_unconstrained = &x - &(&grad * stepsize);
let x_candidate = project(&x_unconstrained.view())?;
let f_new = f(&x_candidate.view())?;
if f_new < f_val {
x_new = x_candidate;
found_step = true;
break;
}
stepsize = stepsize * config.backtrack_factor;
}
if !found_step {
let x_unconstrained = &x - &(&grad * stepsize);
x_new = project(&x_unconstrained.view())?;
}
let x_grad_step = &x - &grad;
let x_projected_grad_step = project(&x_grad_step.view())?;
let proj_grad = &x - &x_projected_grad_step;
let proj_grad_norm = proj_grad
.iter()
.fold(F::zero(), |acc, &g| acc + g * g)
.sqrt();
x = x_new;
if proj_grad_norm < config.gradient_tolerance {
converged = true;
let final_f_val = f(&x.view())?;
return Ok(OptimizationResult {
x,
f_val: final_f_val,
gradient_norm: proj_grad_norm,
iterations: iteration,
converged,
f_history,
});
}
}
let f_val = f(&x.view())?;
let grad = matrix_finite_difference_gradient(&f, &x.view())?;
let grad_norm = grad.iter().fold(F::zero(), |acc, &g| acc + g * g).sqrt();
Ok(OptimizationResult {
x,
f_val,
gradient_norm: grad_norm,
iterations: config.max_iterations,
converged,
f_history,
})
}
#[allow(dead_code)]
fn matrix_finite_difference_gradient<F>(
f: &impl Fn(&ArrayView2<F>) -> LinalgResult<F>,
x: &ArrayView2<F>,
) -> LinalgResult<Array2<F>>
where
F: Float + Zero + One + Copy + Debug,
{
let eps = F::epsilon().sqrt();
let f_x = f(x)?;
let mut grad = Array2::zeros(x.dim());
for i in 0..x.nrows() {
for j in 0..x.ncols() {
let mut x_pert = x.to_owned();
x_pert[[i, j]] = x_pert[[i, j]] + eps;
let f_pert = f(&x_pert.view())?;
grad[[i, j]] = (f_pert - f_x) / eps;
}
}
Ok(grad)
}
pub mod common_problems {
use super::*;
pub fn orthogonal_procrustes<F>(
a: &ArrayView2<F>,
b: &ArrayView2<F>,
config: &OptimizationConfig<F>,
) -> LinalgResult<Array2<F>>
where
F: Float + Zero + One + Copy + Debug + scirs2_core::ndarray::ScalarOperand,
{
if a.shape() != b.shape() {
return Err(LinalgError::ShapeError(format!(
"Matrices must have same shape: {:?} vs {:?}",
a.shape(),
b.shape()
)));
}
let f = |x: &ArrayView2<F>| -> LinalgResult<F> {
let diff = a - &x.dot(b);
let norm_sq = diff.iter().fold(F::zero(), |acc, &val| acc + val * val);
Ok(norm_sq)
};
let project_orthogonal = |x: &ArrayView2<F>| -> LinalgResult<Array2<F>> {
let mut result = x.to_owned();
for j in 0..result.ncols() {
let mut col = result.column_mut(j);
let norm = col
.iter()
.fold(F::zero(), |acc, &val| acc + val * val)
.sqrt();
if norm > F::epsilon() {
for elem in col.iter_mut() {
*elem = *elem / norm;
}
}
}
Ok(result)
};
let x0 = Array2::eye(a.nrows());
let result = projected_gradient_descent(&f, project_orthogonal, &x0.view(), config)?;
Ok(result.x)
}
pub fn positive_definite_completion<F>(
observations: &[(F, usize, usize)],
size: usize,
config: &OptimizationConfig<F>,
) -> LinalgResult<Array2<F>>
where
F: Float + Zero + One + Copy + Debug + scirs2_core::ndarray::ScalarOperand,
{
let f = |x: &ArrayView2<F>| -> LinalgResult<F> {
let mut error = F::zero();
for &(obs_val, i, j) in observations {
let diff = x[[i, j]] - obs_val;
error = error + diff * diff;
}
Ok(error)
};
let project_pd = |x: &ArrayView2<F>| -> LinalgResult<Array2<F>> {
let sym = (x + &x.t()) / F::from(2.0).expect("Operation failed");
let mut result = sym;
let reg = F::from(1e-6).expect("Operation failed");
for i in 0..size {
result[[i, i]] = result[[i, i]] + reg;
}
Ok(result)
};
let x0 = Array2::eye(size);
let result = projected_gradient_descent(&f, project_pd, &x0.view(), config)?;
Ok(result.x)
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn testmatrix_gradient_descent() {
fn objective(x: &ArrayView2<f64>) -> LinalgResult<f64> {
let frobenius_sq = x.iter().fold(0.0, |acc, &val| acc + val * val);
Ok(frobenius_sq)
}
let x0 = array![[1.0, 1.0], [1.0, 1.0]];
let config = OptimizationConfig {
max_iterations: 100,
gradient_tolerance: 1e-4,
..Default::default()
};
let result =
matrix_gradient_descent(&objective, &x0.view(), &config).expect("Operation failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[[0, 0]], 0.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[[0, 1]], 0.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[[1, 0]], 0.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[[1, 1]], 0.0, epsilon = 1e-2);
}
#[test]
fn test_projected_gradient_descent() {
fn objective(x: &ArrayView2<f64>) -> LinalgResult<f64> {
Ok(x.iter().fold(0.0, |acc, &val| acc + val * val))
}
fn project(x: &ArrayView2<f64>) -> LinalgResult<Array2<f64>> {
let mut result = Array2::zeros(x.dim());
for i in 0..x.nrows() {
result[[i, i]] = x[[i, i]];
}
Ok(result)
}
let x0 = array![[1.0, 0.5], [0.5, 1.0]];
let config = OptimizationConfig {
max_iterations: 200,
gradient_tolerance: 1e-4,
initial_stepsize: 0.1,
..Default::default()
};
let result = projected_gradient_descent(&objective, project, &x0.view(), &config)
.expect("Operation failed");
assert!(result.converged);
assert_abs_diff_eq!(result.x[[0, 0]], 0.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[[1, 1]], 0.0, epsilon = 1e-2);
assert_abs_diff_eq!(result.x[[0, 1]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(result.x[[1, 0]], 0.0, epsilon = 1e-10);
}
}