use crate::error::{OptimizeError, OptimizeResult};
#[derive(Debug, Clone)]
pub struct SqpSolver {
pub max_iter: usize,
pub tol: f64,
pub merit_rho: f64,
pub fd_eps: f64,
pub max_ls_iter: usize,
pub armijo_c: f64,
pub backtrack_factor: f64,
pub bfgs_damping: f64,
}
impl Default for SqpSolver {
fn default() -> Self {
Self {
max_iter: 200,
tol: 1e-8,
merit_rho: 10.0,
fd_eps: 1e-7,
max_ls_iter: 40,
armijo_c: 1e-4,
backtrack_factor: 0.5,
bfgs_damping: 0.2,
}
}
}
impl SqpSolver {
pub fn new(max_iter: usize, tol: f64) -> Self {
Self { max_iter, tol, ..Default::default() }
}
}
#[derive(Debug, Clone)]
pub struct SqpResult {
pub x: Vec<f64>,
pub f_val: f64,
pub constraint_violation: f64,
pub n_iter: usize,
pub converged: bool,
pub multipliers_eq: Vec<f64>,
pub multipliers_ineq: Vec<f64>,
}
impl SqpSolver {
#[allow(clippy::too_many_arguments)]
pub fn minimize<F, GF, G, JG, H, JH>(
&self,
f: F,
grad_f: Option<GF>,
g_ineq: Option<G>,
jac_g: Option<JG>,
h_eq: Option<H>,
jac_h: Option<JH>,
x0: &[f64],
bounds: Option<&[(f64, f64)]>,
) -> SqpResult
where
F: Fn(&[f64]) -> f64,
GF: Fn(&[f64]) -> Vec<f64>,
G: Fn(&[f64]) -> Vec<f64>,
JG: Fn(&[f64]) -> Vec<Vec<f64>>,
H: Fn(&[f64]) -> Vec<f64>,
JH: Fn(&[f64]) -> Vec<Vec<f64>>,
{
let n = x0.len();
let mut x = x0.to_vec();
let n_ineq = g_ineq.as_ref().map(|g| g(&x).len()).unwrap_or(0);
let n_eq = h_eq.as_ref().map(|h| h(&x).len()).unwrap_or(0);
let mut bfgs_h = identity_matrix(n);
let mut lam_eq = vec![0.0f64; n_eq];
let mut lam_ineq = vec![0.0f64; n_ineq];
let mut rho = self.merit_rho;
let mut n_iter = 0usize;
let mut converged = false;
for iter in 0..self.max_iter {
n_iter = iter + 1;
let _fx = f(&x);
let gf = match &grad_f {
Some(gf) => gf(&x),
None => finite_diff_gradient(&f, &x, self.fd_eps),
};
let g_vals: Vec<f64> = match &g_ineq {
Some(g) => g(&x),
None => vec![],
};
let g_jac: Vec<Vec<f64>> = if n_ineq > 0 {
match &jac_g {
Some(jg) => jg(&x),
None => {
let gf_ineq = g_ineq.as_ref().map(|g| g as &dyn Fn(&[f64]) -> Vec<f64>);
if let Some(gf_ref) = gf_ineq {
finite_diff_jacobian(gf_ref, &x, n_ineq, self.fd_eps)
} else {
vec![]
}
}
}
} else {
vec![]
};
let h_vals: Vec<f64> = match &h_eq {
Some(h) => h(&x),
None => vec![],
};
let h_jac: Vec<Vec<f64>> = if n_eq > 0 {
match &jac_h {
Some(jh) => jh(&x),
None => {
let hf_eq = h_eq.as_ref().map(|h| h as &dyn Fn(&[f64]) -> Vec<f64>);
if let Some(hf_ref) = hf_eq {
finite_diff_jacobian(hf_ref, &x, n_eq, self.fd_eps)
} else {
vec![]
}
}
}
} else {
vec![]
};
let cv = constraint_violation(&g_vals, &h_vals);
let kkt = kkt_residual(&gf, &g_jac, &lam_ineq, &h_jac, &lam_eq, n);
if kkt < self.tol && cv < self.tol {
converged = true;
break;
}
if cv > 1e-3 {
rho = (rho * 2.0).min(1e8);
}
let step = solve_qp_subproblem(
&bfgs_h,
&gf,
&h_jac,
&h_vals,
&g_jac,
&g_vals,
n,
);
let mut alpha = 1.0;
let merit0 = l1_merit(&f, &g_ineq, &h_eq, &x, rho);
let d_merit = directional_derivative_l1(&gf, &g_jac, &lam_ineq, &h_jac, &lam_eq, &step, rho);
let mut x_new = x.clone();
let mut ls_ok = false;
for _ls in 0..self.max_ls_iter {
x_new = x.iter().zip(&step).map(|(&xi, &di)| xi + alpha * di).collect();
if let Some(bds) = bounds {
for (xi, &(lb, ub)) in x_new.iter_mut().zip(bds.iter()) {
*xi = xi.clamp(lb, ub);
}
}
let merit_new = l1_merit(&f, &g_ineq, &h_eq, &x_new, rho);
if merit_new <= merit0 + self.armijo_c * alpha * d_merit {
ls_ok = true;
break;
}
alpha *= self.backtrack_factor;
}
if !ls_ok {
x_new = x.iter().zip(&step).map(|(&xi, &di)| xi + alpha * di).collect();
if let Some(bds) = bounds {
for (xi, &(lb, ub)) in x_new.iter_mut().zip(bds.iter()) {
*xi = xi.clamp(lb, ub);
}
}
}
let s: Vec<f64> = x_new.iter().zip(&x).map(|(xn, xo)| xn - xo).collect();
let gf_new = match &grad_f {
Some(gf_fn) => gf_fn(&x_new),
None => finite_diff_gradient(&f, &x_new, self.fd_eps),
};
let y = lagrangian_grad_diff(
&gf,
&gf_new,
&g_jac,
&g_vals,
&g_ineq,
&x_new,
&lam_ineq,
&h_jac,
&h_vals,
&h_eq,
&x_new,
&lam_eq,
n,
self.fd_eps,
);
bfgs_update_damped(&mut bfgs_h, &s, &y, self.bfgs_damping);
update_multipliers(
&gf_new,
&g_jac,
&mut lam_ineq,
&g_vals,
&h_jac,
&mut lam_eq,
n,
);
x = x_new;
}
let fx_final = f(&x);
let g_final: Vec<f64> = g_ineq.as_ref().map(|g| g(&x)).unwrap_or_default();
let h_final: Vec<f64> = h_eq.as_ref().map(|h| h(&x)).unwrap_or_default();
let cv_final = constraint_violation(&g_final, &h_final);
SqpResult {
x,
f_val: fx_final,
constraint_violation: cv_final,
n_iter,
converged,
multipliers_eq: lam_eq,
multipliers_ineq: lam_ineq,
}
}
}
pub fn solve_qp_subproblem(
h: &[Vec<f64>],
g: &[f64],
a_eq: &[Vec<f64>],
b_eq: &[f64],
a_ineq: &[Vec<f64>],
b_ineq: &[f64],
n: usize,
) -> Vec<f64> {
let n_ineq = a_ineq.len();
let mut active: Vec<bool> = vec![false; n_ineq];
let max_as_iter = 50 * (n + n_ineq + 1);
let mut d = vec![0.0f64; n];
for _as_iter in 0..max_as_iter {
let mut eq_rows: Vec<Vec<f64>> = a_eq.to_vec();
let mut eq_rhs: Vec<f64> = b_eq.iter().map(|&bi| -bi).collect();
for (i, &act) in active.iter().enumerate() {
if act {
eq_rows.push(a_ineq[i].clone());
eq_rhs.push(-b_ineq[i]);
}
}
d = solve_eq_qp(h, g, &eq_rows, &eq_rhs, n);
let mut most_violated = None;
let mut max_viol = 1e-10;
for (i, &act) in active.iter().enumerate() {
if !act {
let viol = dot(&a_ineq[i], &d) + b_ineq[i];
if viol > max_viol {
max_viol = viol;
most_violated = Some(i);
}
}
}
if let Some(idx) = most_violated {
active[idx] = true;
continue;
}
let n_eq_orig = a_eq.len();
let mut n_active = 0usize;
let lagr = compute_qp_multipliers(h, g, &eq_rows, &d, n);
let mut drop_idx = None;
let mut min_lam = -1e-12;
for (i, &act) in active.iter().enumerate() {
if act {
let lam_i = lagr.get(n_eq_orig + n_active).copied().unwrap_or(0.0);
if lam_i < min_lam {
min_lam = lam_i;
drop_idx = Some(i);
}
n_active += 1;
}
}
if let Some(idx) = drop_idx {
active[idx] = false;
} else {
break;
}
}
d
}
fn solve_eq_qp(
h: &[Vec<f64>],
g: &[f64],
a: &[Vec<f64>],
rhs: &[f64],
n: usize,
) -> Vec<f64> {
let m = a.len();
let sz = n + m;
let mut mat = vec![vec![0.0f64; sz]; sz];
let mut rh = vec![0.0f64; sz];
for i in 0..n {
for j in 0..n {
mat[i][j] = h[i][j];
}
mat[i][i] += 1e-8;
rh[i] = -g[i];
}
for (ci, row) in a.iter().enumerate() {
for (j, &aij) in row.iter().enumerate() {
mat[n + ci][j] = aij; mat[j][n + ci] = aij; }
rh[n + ci] = rhs[ci];
}
let sol = gaussian_eliminate(&mat, &rh).unwrap_or_else(|_| vec![0.0; sz]);
sol[..n].to_vec()
}
fn compute_qp_multipliers(
h: &[Vec<f64>],
g: &[f64],
a: &[Vec<f64>],
d: &[f64],
n: usize,
) -> Vec<f64> {
let m = a.len();
if m == 0 {
return vec![];
}
let r: Vec<f64> = (0..n)
.map(|i| dot(&h[i], d) + g[i])
.collect();
let aat = mat_mul_t(a, a); let ar: Vec<f64> = (0..m).map(|i| dot(&a[i], &r)).collect();
gaussian_eliminate(&aat, &ar).unwrap_or_else(|_| vec![0.0; m])
}
fn gaussian_eliminate(mat: &[Vec<f64>], rhs: &[f64]) -> OptimizeResult<Vec<f64>> {
let n = rhs.len();
let mut a: Vec<Vec<f64>> = mat.to_vec();
let mut b: Vec<f64> = rhs.to_vec();
for col in 0..n {
let pivot_row = (col..n)
.max_by(|&r1, &r2| a[r1][col].abs().partial_cmp(&a[r2][col].abs())
.unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(col);
a.swap(col, pivot_row);
b.swap(col, pivot_row);
let piv = a[col][col];
if piv.abs() < 1e-14 {
return Err(OptimizeError::ComputationError(
"Singular matrix in QP subproblem".to_string(),
));
}
for row in (col + 1)..n {
let factor = a[row][col] / piv;
for k in col..n {
let tmp = a[col][k];
a[row][k] -= factor * tmp;
}
b[row] -= factor * b[col];
}
}
let mut x = vec![0.0f64; n];
for i in (0..n).rev() {
let mut s = b[i];
for j in (i + 1)..n {
s -= a[i][j] * x[j];
}
if a[i][i].abs() < 1e-14 {
x[i] = 0.0;
} else {
x[i] = s / a[i][i];
}
}
Ok(x)
}
fn identity_matrix(n: usize) -> Vec<Vec<f64>> {
(0..n)
.map(|i| {
let mut row = vec![0.0f64; n];
row[i] = 1.0;
row
})
.collect()
}
fn dot(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).map(|(&ai, &bi)| ai * bi).sum()
}
fn mat_mul_t(a: &[Vec<f64>], b: &[Vec<f64>]) -> Vec<Vec<f64>> {
let m1 = a.len();
let m2 = b.len();
(0..m1)
.map(|i| (0..m2).map(|j| dot(&a[i], &b[j])).collect())
.collect()
}
fn finite_diff_gradient<F: Fn(&[f64]) -> f64>(f: &F, x: &[f64], eps: f64) -> Vec<f64> {
let n = x.len();
let mut grad = vec![0.0f64; n];
let mut xp = x.to_vec();
let mut xm = x.to_vec();
for i in 0..n {
xp[i] += eps;
xm[i] -= eps;
grad[i] = (f(&xp) - f(&xm)) / (2.0 * eps);
xp[i] = x[i];
xm[i] = x[i];
}
grad
}
fn finite_diff_jacobian<F: Fn(&[f64]) -> Vec<f64>>(
f: F,
x: &[f64],
m: usize,
eps: f64,
) -> Vec<Vec<f64>> {
let n = x.len();
let mut jac = vec![vec![0.0f64; n]; m];
let mut xp = x.to_vec();
let mut xm = x.to_vec();
for j in 0..n {
xp[j] += eps;
xm[j] -= eps;
let fp = f(&xp);
let fm = f(&xm);
for i in 0..m {
jac[i][j] = (fp[i] - fm[i]) / (2.0 * eps);
}
xp[j] = x[j];
xm[j] = x[j];
}
jac
}
fn constraint_violation(g_vals: &[f64], h_vals: &[f64]) -> f64 {
let ineq_viol = g_vals.iter().map(|&gi| gi.max(0.0)).fold(0.0f64, f64::max);
let eq_viol = h_vals.iter().map(|&hi| hi.abs()).fold(0.0f64, f64::max);
ineq_viol.max(eq_viol)
}
fn kkt_residual(
gf: &[f64],
g_jac: &[Vec<f64>],
lam_ineq: &[f64],
h_jac: &[Vec<f64>],
lam_eq: &[f64],
n: usize,
) -> f64 {
let mut res = 0.0f64;
for i in 0..n {
let mut r = gf[i];
for (j, row) in g_jac.iter().enumerate() {
r += lam_ineq.get(j).copied().unwrap_or(0.0) * row.get(i).copied().unwrap_or(0.0);
}
for (j, row) in h_jac.iter().enumerate() {
r += lam_eq.get(j).copied().unwrap_or(0.0) * row.get(i).copied().unwrap_or(0.0);
}
res = res.max(r.abs());
}
res
}
fn l1_merit<F, G, H>(
f: &F,
g_ineq: &Option<G>,
h_eq: &Option<H>,
x: &[f64],
rho: f64,
) -> f64
where
F: Fn(&[f64]) -> f64,
G: Fn(&[f64]) -> Vec<f64>,
H: Fn(&[f64]) -> Vec<f64>,
{
let fx = f(x);
let mut penalty = 0.0f64;
if let Some(g) = g_ineq {
for gi in g(x) {
penalty += gi.max(0.0);
}
}
if let Some(h) = h_eq {
for hi in h(x) {
penalty += hi.abs();
}
}
fx + rho * penalty
}
fn directional_derivative_l1(
gf: &[f64],
g_jac: &[Vec<f64>],
lam_ineq: &[f64],
h_jac: &[Vec<f64>],
lam_eq: &[f64],
d: &[f64],
rho: f64,
) -> f64 {
let grad_dot = dot(gf, d);
let mut penalty_d = 0.0f64;
for row in g_jac {
let linear = dot(row, d);
penalty_d += linear.max(0.0);
}
for row in h_jac {
let linear = dot(row, d);
penalty_d += linear.abs();
}
let _ = (lam_ineq, lam_eq); grad_dot - rho * penalty_d.abs() - 1e-10
}
#[allow(clippy::too_many_arguments)]
fn lagrangian_grad_diff<G, H>(
gf_old: &[f64],
gf_new: &[f64],
g_jac_old: &[Vec<f64>],
g_vals_old: &[f64],
g_ineq: &Option<G>,
x_new: &[f64],
lam_ineq: &[f64],
h_jac_old: &[Vec<f64>],
h_vals_old: &[f64],
h_eq: &Option<H>,
_x_new2: &[f64],
lam_eq: &[f64],
n: usize,
fd_eps: f64,
) -> Vec<f64>
where
G: Fn(&[f64]) -> Vec<f64>,
H: Fn(&[f64]) -> Vec<f64>,
{
let _ = (g_vals_old, h_vals_old, fd_eps);
let n_ineq = lam_ineq.len();
let n_eq = lam_eq.len();
let g_jac_new: Vec<Vec<f64>> = if n_ineq > 0 {
if let Some(g) = g_ineq {
finite_diff_jacobian(|x| g(x), x_new, n_ineq, 1e-7)
} else {
vec![]
}
} else {
vec![]
};
let h_jac_new: Vec<Vec<f64>> = if n_eq > 0 {
if let Some(h) = h_eq {
finite_diff_jacobian(|x| h(x), x_new, n_eq, 1e-7)
} else {
vec![]
}
} else {
vec![]
};
(0..n)
.map(|i| {
let lag_new = gf_new[i]
+ lam_ineq.iter().enumerate().map(|(j, &l)| l * g_jac_new.get(j).and_then(|r| r.get(i)).copied().unwrap_or(0.0)).sum::<f64>()
+ lam_eq.iter().enumerate().map(|(j, &l)| l * h_jac_new.get(j).and_then(|r| r.get(i)).copied().unwrap_or(0.0)).sum::<f64>();
let lag_old = gf_old[i]
+ lam_ineq.iter().enumerate().map(|(j, &l)| l * g_jac_old.get(j).and_then(|r| r.get(i)).copied().unwrap_or(0.0)).sum::<f64>()
+ lam_eq.iter().enumerate().map(|(j, &l)| l * h_jac_old.get(j).and_then(|r| r.get(i)).copied().unwrap_or(0.0)).sum::<f64>();
lag_new - lag_old
})
.collect()
}
fn bfgs_update_damped(h: &mut Vec<Vec<f64>>, s: &[f64], y: &[f64], theta: f64) {
let n = s.len();
let sy = dot(s, y);
let shs: f64 = {
let hs: Vec<f64> = (0..n).map(|i| dot(&h[i], s)).collect();
dot(s, &hs)
};
let (s_use, y_use): (Vec<f64>, Vec<f64>) = if sy < theta * shs {
let factor = if shs.abs() > 1e-14 {
(1.0 - theta) * shs / (shs - sy)
} else {
0.0
};
let hs: Vec<f64> = (0..n).map(|i| dot(&h[i], s)).collect();
let y_damp: Vec<f64> = y.iter().zip(&hs).map(|(&yi, &hsi)| factor * hsi + (1.0 - factor) * yi).collect();
(s.to_vec(), y_damp)
} else {
(s.to_vec(), y.to_vec())
};
let sy2 = dot(&s_use, &y_use);
if sy2.abs() < 1e-14 {
return;
}
let hs: Vec<f64> = (0..n).map(|i| dot(&h[i], &s_use)).collect();
let shs2 = dot(&s_use, &hs);
if shs2.abs() < 1e-14 {
return;
}
for i in 0..n {
for j in 0..n {
h[i][j] += y_use[i] * y_use[j] / sy2 - hs[i] * hs[j] / shs2;
}
}
}
fn update_multipliers(
gf: &[f64],
g_jac: &[Vec<f64>],
lam_ineq: &mut Vec<f64>,
g_vals: &[f64],
h_jac: &[Vec<f64>],
lam_eq: &mut Vec<f64>,
n: usize,
) {
let n_ineq = lam_ineq.len();
let n_eq = lam_eq.len();
if n_ineq == 0 && n_eq == 0 {
return;
}
let n_c = n_ineq + n_eq;
let mut jac: Vec<Vec<f64>> = Vec::with_capacity(n_c);
jac.extend_from_slice(g_jac);
jac.extend_from_slice(h_jac);
let jtj = mat_mul_t(&jac, &jac); let jt_neg_gf: Vec<f64> = (0..n_c)
.map(|i| -dot(&jac[i], gf))
.collect();
if let Ok(lam_all) = gaussian_eliminate(&jtj, &jt_neg_gf) {
for (i, lam) in lam_ineq.iter_mut().enumerate() {
let raw: f64 = lam_all.get(i).copied().unwrap_or(0.0);
let active = g_vals.get(i).map(|&gv| gv > -1e-6).unwrap_or(false);
*lam = if active { raw.max(0.0) } else { 0.0 };
}
for (i, lam) in lam_eq.iter_mut().enumerate() {
*lam = lam_all.get(n_ineq + i).copied().unwrap_or(0.0);
}
}
let _ = n;
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_sqp_unconstrained_quadratic() {
let solver = SqpSolver::new(100, 1e-6);
let res = solver.minimize(
|x: &[f64]| x[0].powi(2) + x[1].powi(2),
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
&[1.0, 1.0],
None,
);
assert!(res.converged, "should converge");
assert_abs_diff_eq!(res.x[0], 0.0, epsilon = 1e-4);
assert_abs_diff_eq!(res.x[1], 0.0, epsilon = 1e-4);
}
#[test]
fn test_sqp_equality_constrained() {
let solver = SqpSolver::new(200, 1e-6);
let res = solver.minimize(
|x: &[f64]| x[0].powi(2) + x[1].powi(2),
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
Some(|x: &[f64]| vec![x[0] + x[1] - 1.0]),
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
&[0.0, 0.0],
None,
);
assert!(
res.constraint_violation < 1e-3,
"constraint should be satisfied, cv = {}",
res.constraint_violation
);
assert!(
(res.f_val - 0.5).abs() < 0.05,
"f* ≈ 0.5, got {}",
res.f_val
);
}
#[test]
fn test_sqp_inequality_constrained() {
let solver = SqpSolver::new(200, 1e-5);
let res = solver.minimize(
|x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 2.0).powi(2),
None::<fn(&[f64]) -> Vec<f64>>,
Some(|x: &[f64]| vec![x[0] + x[1] - 3.0]),
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
&[0.5, 0.5],
None,
);
assert!(
res.constraint_violation < 1e-3,
"constraint violated: cv={}",
res.constraint_violation
);
assert!(res.f_val < 1.0, "objective should be small, got {}", res.f_val);
}
#[test]
fn test_qp_subproblem_unconstrained() {
let h = vec![vec![1.0, 0.0], vec![0.0, 1.0]];
let g = vec![1.0, 0.0];
let d = solve_qp_subproblem(&h, &g, &[], &[], &[], &[], 2);
assert_abs_diff_eq!(d[0], -1.0, epsilon = 1e-6);
assert_abs_diff_eq!(d[1], 0.0, epsilon = 1e-6);
}
#[test]
fn test_sqp_result_fields() {
let solver = SqpSolver::default();
let res = solver.minimize(
|x: &[f64]| x[0].powi(2),
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
None::<fn(&[f64]) -> Vec<f64>>,
None::<fn(&[f64]) -> Vec<Vec<f64>>>,
&[3.0],
None,
);
assert!(res.n_iter >= 1);
assert!(res.constraint_violation >= 0.0);
assert!(res.multipliers_eq.is_empty());
assert!(res.multipliers_ineq.is_empty());
}
}