use crate::constrained::{Constraint, ConstraintFn, ConstraintKind, Options};
use crate::error::{OptimizeError, OptimizeResult};
use crate::result::OptimizeResults;
use scirs2_core::ndarray::{Array1, ArrayBase, Data, Ix1};
pub struct SQPSolver {
pub max_iter: usize,
pub tol: f64,
pub constraint_tol: f64,
pub eps: f64,
pub lambda_init: f64,
}
impl Default for SQPSolver {
fn default() -> Self {
SQPSolver {
max_iter: 200,
tol: 1e-8,
constraint_tol: 1e-8,
eps: 1e-7,
lambda_init: 0.0,
}
}
}
impl SQPSolver {
pub fn new() -> Self {
SQPSolver::default()
}
pub fn solve<F, GF, E, G>(
&self,
f: F,
grad_f: Option<GF>,
eq_cons: &[E],
ineq_cons: &[G],
x0: &[f64],
) -> OptimizeResult<OptimizeResults<f64>>
where
F: Fn(&[f64]) -> f64 + Clone,
GF: Fn(&[f64]) -> Vec<f64>,
E: Fn(&[f64]) -> f64 + Clone,
G: Fn(&[f64]) -> f64 + Clone,
{
let n = x0.len();
if n == 0 {
return Err(OptimizeError::InvalidInput("x0 must be non-empty".to_string()));
}
let mut x = x0.to_vec();
let mut nfev = 0usize;
let mut njev = 0usize;
let mut nit = 0usize;
let mut hess: Vec<Vec<f64>> = (0..n)
.map(|i| {
let mut row = vec![0.0; n];
row[i] = 1.0;
row
})
.collect();
let n_eq = eq_cons.len();
let n_ineq = ineq_cons.len();
let n_lambda = n_eq + n_ineq;
let mut lambda = vec![self.lambda_init; n_lambda];
let compute_grad = |xv: &[f64], nfev: &mut usize, njev: &mut usize| -> Vec<f64> {
*njev += 1;
if let Some(ref gf) = grad_f {
gf(xv)
} else {
let h = self.eps;
let mut g = vec![0.0; n];
let mut xp = xv.to_vec();
let mut xm = xv.to_vec();
for i in 0..n {
xp[i] = xv[i] + h;
xm[i] = xv[i] - h;
*nfev += 2;
g[i] = (f(&xp) - f(&xm)) / (2.0 * h);
xp[i] = xv[i];
xm[i] = xv[i];
}
g
}
};
let compute_constraint_jac = |xv: &[f64], nfev: &mut usize| -> (Vec<f64>, Vec<Vec<f64>>) {
let h = self.eps;
let mut c_vals = Vec::with_capacity(n_lambda);
let mut c_jac: Vec<Vec<f64>> = Vec::with_capacity(n_lambda);
for e in eq_cons {
let cv = e(xv);
c_vals.push(cv);
*nfev += 1;
let mut jrow = vec![0.0; n];
let mut xp = xv.to_vec();
for j in 0..n {
xp[j] = xv[j] + h;
*nfev += 1;
jrow[j] = (e(&xp) - cv) / h;
xp[j] = xv[j];
}
c_jac.push(jrow);
}
for g in ineq_cons {
let cv = g(xv);
c_vals.push(cv);
*nfev += 1;
let mut jrow = vec![0.0; n];
let mut xp = xv.to_vec();
for j in 0..n {
xp[j] = xv[j] + h;
*nfev += 1;
jrow[j] = (g(&xp) - cv) / h;
xp[j] = xv[j];
}
c_jac.push(jrow);
}
(c_vals, c_jac)
};
let mut prev_grad_lag: Option<Vec<f64>> = None;
let mut prev_x: Option<Vec<f64>> = None;
for _iter in 0..self.max_iter {
nit += 1;
nfev += 1;
let fx = f(&x);
let grad = compute_grad(&x, &mut nfev, &mut njev);
let (c_vals, c_jac) = compute_constraint_jac(&x, &mut nfev);
let mut grad_lag: Vec<f64> = grad.clone();
for j in 0..n_lambda {
for i in 0..n {
grad_lag[i] += lambda[j] * c_jac[j][i];
}
}
let grad_lag_norm = grad_lag.iter().map(|v| v * v).sum::<f64>().sqrt();
let eq_viol: f64 = c_vals[..n_eq].iter().map(|v| v.abs()).sum();
let ineq_viol: f64 = c_vals[n_eq..].iter().map(|v| v.max(0.0)).sum();
let cv = eq_viol + ineq_viol;
if grad_lag_norm <= self.tol && cv <= self.constraint_tol {
return Ok(OptimizeResults {
x: Array1::from_vec(x),
fun: fx,
jac: Some(grad),
hess: None,
constr: Some(Array1::from_vec(c_vals)),
nit,
nfev,
njev,
nhev: 0,
maxcv: 0,
message: "KKT conditions satisfied".to_string(),
success: true,
status: 0,
});
}
if let (Some(ref px), Some(ref pg)) = (&prev_x, &prev_grad_lag) {
let s: Vec<f64> = x.iter().zip(px.iter()).map(|(&xi, &pxi)| xi - pxi).collect();
let y: Vec<f64> = grad_lag.iter().zip(pg.iter()).map(|(&gi, &pgi)| gi - pgi).collect();
let sy: f64 = s.iter().zip(y.iter()).map(|(&si, &yi)| si * yi).sum();
let hs: Vec<f64> = (0..n)
.map(|i| hess[i].iter().zip(s.iter()).map(|(&h, &si)| h * si).sum::<f64>())
.collect();
let sths: f64 = s.iter().zip(hs.iter()).map(|(&si, &hsi)| si * hsi).sum();
let sy_damp = sy.max(0.2 * sths);
if sy_damp.abs() > 1e-10 && sths.abs() > 1e-10 {
for i in 0..n {
for j in 0..n {
hess[i][j] += y[i] * y[j] / sy_damp - hs[i] * hs[j] / sths;
}
}
}
}
prev_x = Some(x.clone());
prev_grad_lag = Some(grad_lag.clone());
let d = solve_newton_step(&hess, &grad_lag, n);
let mu_merit = lambda.iter().map(|v| v.abs()).fold(1.0_f64, f64::max) + 1.0;
let mut merit = |xv: &[f64]| -> f64 {
let fv = f(xv);
nfev += 1;
let cv_eq: f64 = eq_cons.iter().map(|e| e(xv).abs()).sum::<f64>();
let cv_ineq: f64 = ineq_cons.iter().map(|g| g(xv).max(0.0)).sum::<f64>();
fv + mu_merit * (cv_eq + cv_ineq)
};
let merit0 = merit(&x);
let d_merit: f64 = grad_lag.iter().zip(d.iter()).map(|(&gi, &di)| gi * di).sum::<f64>();
let mut alpha = 1.0_f64;
let armijo_c = 1e-4;
let backtrack = 0.5;
let max_ls = 20;
for _ls in 0..max_ls {
let xnew: Vec<f64> = x.iter().zip(d.iter()).map(|(&xi, &di)| xi + alpha * di).collect();
let m_new = merit(&xnew);
if m_new <= merit0 + armijo_c * alpha * d_merit.min(0.0) {
break;
}
alpha *= backtrack;
}
let xnew: Vec<f64> = x.iter().zip(d.iter()).map(|(&xi, &di)| xi + alpha * di).collect();
x = xnew;
let (new_c_vals, new_c_jac) = compute_constraint_jac(&x, &mut nfev);
let new_grad = compute_grad(&x, &mut nfev, &mut njev);
update_lagrange_multipliers(
&new_grad,
&new_c_jac,
&new_c_vals,
&mut lambda,
n_eq,
n_ineq,
);
}
nfev += 1;
let fx = f(&x);
let (c_vals, _) = compute_constraint_jac(&x, &mut nfev);
let eq_viol: f64 = c_vals[..n_eq].iter().map(|v| v.abs()).sum();
let ineq_viol: f64 = c_vals[n_eq..].iter().map(|v| v.max(0.0)).sum();
let cv = eq_viol + ineq_viol;
Ok(OptimizeResults {
x: Array1::from_vec(x),
fun: fx,
jac: None,
hess: None,
constr: Some(Array1::from_vec(c_vals)),
nit,
nfev,
njev,
nhev: 0,
maxcv: 0,
message: format!(
"Maximum iterations reached (cv={:.2e})",
cv
),
success: cv <= self.constraint_tol,
status: if cv <= self.constraint_tol { 0 } else { 1 },
})
}
}
fn solve_newton_step(hess: &Vec<Vec<f64>>, grad: &[f64], n: usize) -> Vec<f64> {
let mut a: Vec<Vec<f64>> = hess.iter().map(|row| row.clone()).collect();
let mut b: Vec<f64> = grad.iter().map(|&gi| -gi).collect();
for col in 0..n {
let mut max_row = col;
let mut max_val = a[col][col].abs();
for row in (col + 1)..n {
if a[row][col].abs() > max_val {
max_val = a[row][col].abs();
max_row = row;
}
}
if max_val < 1e-12 {
return grad.iter().map(|&gi| -gi * 0.01).collect();
}
a.swap(col, max_row);
b.swap(col, max_row);
let pivot = a[col][col];
for row in (col + 1)..n {
let factor = a[row][col] / pivot;
for k in col..n {
let val = a[col][k] * factor;
a[row][k] -= val;
}
let bv = b[col] * factor;
b[row] -= bv;
}
}
let mut d = vec![0.0; n];
for i in (0..n).rev() {
let mut sum = b[i];
for j in (i + 1)..n {
sum -= a[i][j] * d[j];
}
if a[i][i].abs() < 1e-12 {
d[i] = 0.0;
} else {
d[i] = sum / a[i][i];
}
}
let dn = d.iter().map(|v| v * v).sum::<f64>().sqrt();
if dn > 10.0 {
let scale = 10.0 / dn;
d.iter_mut().for_each(|v| *v *= scale);
}
d
}
fn update_lagrange_multipliers(
grad: &[f64],
c_jac: &[Vec<f64>],
c_vals: &[f64],
lambda: &mut Vec<f64>,
n_eq: usize,
n_ineq: usize,
) {
let n = grad.len();
let m = lambda.len();
if m == 0 {
return;
}
let mut jjt = vec![vec![0.0_f64; m]; m];
let mut jg = vec![0.0_f64; m];
for i in 0..m {
for j in 0..m {
let dot: f64 = c_jac[i].iter().zip(c_jac[j].iter()).map(|(&a, &b)| a * b).sum();
jjt[i][j] = dot;
}
jg[i] = c_jac[i].iter().zip(grad.iter()).map(|(&a, &b)| a * b).sum::<f64>();
}
for i in 0..m {
jjt[i][i] += 1e-8;
}
if let Some(new_lambda) = solve_small_system_sqp(&jjt, &jg) {
for i in 0..m {
lambda[i] = -new_lambda[i];
}
for i in n_eq..n_eq + n_ineq {
if c_vals[i] > 0.0 {
lambda[i] = lambda[i].max(0.0);
} else if c_vals[i] < -1e-6 {
lambda[i] = 0.0;
}
}
}
}
fn solve_small_system_sqp(a: &Vec<Vec<f64>>, b: &[f64]) -> Option<Vec<f64>> {
let n = b.len();
if n == 0 {
return Some(vec![]);
}
let mut m: Vec<Vec<f64>> = a.iter().map(|row| row.clone()).collect();
let mut r: Vec<f64> = b.to_vec();
for col in 0..n {
let mut max_row = col;
let mut max_val = m[col][col].abs();
for row in (col + 1)..n {
if m[row][col].abs() > max_val {
max_val = m[row][col].abs();
max_row = row;
}
}
if max_val < 1e-12 {
return None;
}
m.swap(col, max_row);
r.swap(col, max_row);
let pivot = m[col][col];
for row in (col + 1)..n {
let factor = m[row][col] / pivot;
for k in col..n {
let val = m[col][k] * factor;
m[row][k] -= val;
}
let rv = r[col] * factor;
r[row] -= rv;
}
}
let mut x = vec![0.0; n];
for i in (0..n).rev() {
let mut sum = r[i];
for j in (i + 1)..n {
sum -= m[i][j] * x[j];
}
if m[i][i].abs() < 1e-12 {
return None;
}
x[i] = sum / m[i][i];
}
Some(x)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_sqp_equality_constrained() {
let f = |x: &[f64]| x[0].powi(2) + x[1].powi(2);
let h = |x: &[f64]| x[0] + x[1] - 1.0;
let solver = SQPSolver::default();
let result = solver
.solve(
f,
None::<fn(&[f64]) -> Vec<f64>>,
&[h],
&[] as &[fn(&[f64]) -> f64],
&[0.0, 0.0],
)
.expect("solve failed");
assert_abs_diff_eq!(result.x[0], 0.5, epsilon = 1e-3);
assert_abs_diff_eq!(result.x[1], 0.5, epsilon = 1e-3);
assert_abs_diff_eq!(result.fun, 0.5, epsilon = 1e-3);
}
#[test]
fn test_sqp_inequality_constrained() {
let f = |x: &[f64]| x[0].powi(2) + x[1].powi(2);
let g = |x: &[f64]| 1.0 - x[0] - x[1];
let solver = SQPSolver::default();
let result = solver
.solve(
f,
None::<fn(&[f64]) -> Vec<f64>>,
&[] as &[fn(&[f64]) -> f64],
&[g],
&[2.0, 2.0],
)
.expect("solve failed");
assert_abs_diff_eq!(result.fun, 0.5, epsilon = 1e-2);
}
#[test]
fn test_sqp_unconstrained() {
let f = |x: &[f64]| (x[0] - 1.0).powi(2) + (x[1] - 2.0).powi(2);
let solver = SQPSolver::default();
let result = solver
.solve(
f,
None::<fn(&[f64]) -> Vec<f64>>,
&[] as &[fn(&[f64]) -> f64],
&[] as &[fn(&[f64]) -> f64],
&[0.0, 0.0],
)
.expect("solve failed");
assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.x[1], 2.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.fun, 0.0, epsilon = 1e-4);
}
#[test]
fn test_sqp_with_gradient() {
let f = |x: &[f64]| x[0].powi(2) + x[1].powi(2);
let gf = |x: &[f64]| vec![2.0 * x[0], 2.0 * x[1]];
let h = |x: &[f64]| x[0] + x[1] - 2.0;
let solver = SQPSolver::default();
let result = solver
.solve(
f,
Some(gf),
&[h],
&[] as &[fn(&[f64]) -> f64],
&[0.0, 0.0],
)
.expect("solve failed");
assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-3);
assert_abs_diff_eq!(result.fun, 2.0, epsilon = 1e-3);
}
#[test]
fn test_sqp_mixed_constraints() {
let f = |x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 2.0).powi(2);
let h = |x: &[f64]| x[0] + x[1] - 3.0;
let g = |x: &[f64]| x[0] - 2.0;
let solver = SQPSolver {
max_iter: 300,
tol: 1e-6,
constraint_tol: 1e-5,
..Default::default()
};
let result = solver
.solve(
f,
None::<fn(&[f64]) -> Vec<f64>>,
&[h],
&[g],
&[1.0, 2.0],
)
.expect("solve failed");
assert!(result.fun <= 1.0 + 1e-3, "fun={}", result.fun);
}
#[test]
fn test_sqp_3d_equality() {
let f = |x: &[f64]| x[0].powi(2) + x[1].powi(2) + x[2].powi(2);
let h = |x: &[f64]| x[0] + 2.0 * x[1] + 3.0 * x[2] - 6.0;
let solver = SQPSolver {
max_iter: 500,
..Default::default()
};
let result = solver
.solve(
f,
None::<fn(&[f64]) -> Vec<f64>>,
&[h],
&[] as &[fn(&[f64]) -> f64],
&[1.0, 1.0, 1.0],
)
.expect("solve failed");
assert!(result.fun <= 3.0, "fun={}", result.fun);
}
}