numra_optim/
sqp.rs

1//! Sequential Quadratic Programming (SQP) for constrained nonlinear optimization.
2//!
3//! At each iteration: (1) form a QP subproblem using a BFGS Hessian approximation,
4//! (2) solve the QP for a search direction, (3) line search with L1 merit function.
5//!
6//! Author: Moussa Leblouba
7//! Date: 5 March 2026
8//! Modified: 2 May 2026
9
10use numra_core::Scalar;
11use numra_linalg::{DenseMatrix, Matrix};
12
13use crate::error::OptimError;
14use crate::problem::{finite_diff_gradient, Constraint, ConstraintKind};
15use crate::types::{IterationRecord, OptimResult, OptimStatus};
16
17/// Options for the SQP solver.
18#[derive(Clone, Debug)]
19pub struct SqpOptions<S: Scalar> {
20    pub max_iter: usize,
21    pub tol: S,
22    pub verbose: bool,
23}
24
25impl<S: Scalar> Default for SqpOptions<S> {
26    fn default() -> Self {
27        Self {
28            max_iter: 500,
29            tol: S::from_f64(1e-6),
30            verbose: false,
31        }
32    }
33}
34
35/// SQP solver for nonlinearly constrained optimization.
36///
37/// Minimizes `f(x)` subject to equality and inequality constraints.
38pub fn sqp_minimize<S, F, G>(
39    f: F,
40    grad_f: G,
41    constraints: &[Constraint<S>],
42    x0: &[S],
43    opts: &SqpOptions<S>,
44) -> Result<OptimResult<S>, OptimError>
45where
46    S: Scalar + faer::SimpleEntity + faer::Conjugate<Canonical = S> + faer::ComplexField,
47    F: Fn(&[S]) -> S,
48    G: Fn(&[S], &mut [S]),
49{
50    let start = std::time::Instant::now();
51    let n = x0.len();
52
53    let eq_indices: Vec<usize> = constraints
54        .iter()
55        .enumerate()
56        .filter(|(_, c)| c.kind == ConstraintKind::Equality)
57        .map(|(i, _)| i)
58        .collect();
59    let ineq_indices: Vec<usize> = constraints
60        .iter()
61        .enumerate()
62        .filter(|(_, c)| c.kind == ConstraintKind::Inequality)
63        .map(|(i, _)| i)
64        .collect();
65    let n_eq = eq_indices.len();
66    let n_ineq = ineq_indices.len();
67
68    let mut x = x0.to_vec();
69    let mut grad = vec![S::ZERO; n];
70    grad_f(&x, &mut grad);
71
72    // BFGS Hessian approximation (starts at identity)
73    let mut hess = DenseMatrix::<S>::zeros(n, n);
74    for i in 0..n {
75        hess.set(i, i, S::ONE);
76    }
77
78    let mut n_feval = 1_usize;
79    let mut n_geval = 1_usize;
80    let mut history: Vec<IterationRecord<S>> = Vec::new();
81    let mut converged = false;
82    let mut iterations = 0;
83
84    // L1 merit penalty parameter — start with a moderate value and increase as needed
85    let mut mu = S::from_f64(10.0);
86    let mut prev_step_norm = S::INFINITY;
87    let mut prev_violation = S::INFINITY;
88    let mut stagnation_count = 0_usize;
89
90    for iter in 0..opts.max_iter {
91        iterations = iter + 1;
92        let f_val = f(&x);
93        n_feval += 1;
94
95        // Evaluate constraints
96        let c_vals: Vec<S> = constraints.iter().map(|c| (c.func)(&x)).collect();
97
98        // Constraint violation
99        let max_violation = constraint_violation(&c_vals, &eq_indices, &ineq_indices);
100
101        // Compute constraint Jacobians
102        let mut c_grads: Vec<Vec<S>> = Vec::with_capacity(constraints.len());
103        for c in constraints {
104            let mut cg = vec![S::ZERO; n];
105            if let Some(ref g) = c.grad {
106                g(&x, &mut cg);
107            } else {
108                finite_diff_gradient(&*c.func, &x, &mut cg);
109            }
110            c_grads.push(cg);
111        }
112
113        // Compute Lagrangian gradient for KKT check
114        let grad_norm = grad.iter().map(|&g| g * g).sum::<S>().sqrt();
115        let kkt_norm = if !eq_indices.is_empty() || !ineq_indices.is_empty() {
116            lagrangian_gradient_norm(&grad, &c_vals, &c_grads, &eq_indices, &ineq_indices, n)
117        } else {
118            grad_norm
119        };
120
121        if opts.verbose {
122            eprintln!(
123                "SQP iter {}: f={:.6e}, ||kkt||={:.2e}, cv={:.2e}, mu={:.1e}, step={:.2e}",
124                iter,
125                f_val.to_f64(),
126                kkt_norm.to_f64(),
127                max_violation.to_f64(),
128                mu.to_f64(),
129                prev_step_norm.to_f64()
130            );
131        }
132
133        history.push(IterationRecord {
134            iteration: iter,
135            objective: f_val,
136            gradient_norm: kkt_norm,
137            step_size: prev_step_norm,
138            constraint_violation: max_violation,
139        });
140
141        // Check convergence: KKT optimality + feasibility
142        if kkt_norm < opts.tol && max_violation < opts.tol {
143            converged = true;
144            break;
145        }
146        // Also converge if the actual step is tiny and we're feasible
147        if prev_step_norm < opts.tol && max_violation < opts.tol {
148            converged = true;
149            break;
150        }
151
152        // Feasibility restoration: if constraint violation stagnates, take a Newton step
153        // toward the constraint surface (ignoring the objective temporarily)
154        if max_violation > opts.tol {
155            let cv_decrease = prev_violation - max_violation;
156            if cv_decrease < S::from_f64(1e-8) * max_violation {
157                stagnation_count += 1;
158            } else {
159                stagnation_count = 0;
160            }
161
162            if stagnation_count >= 5 {
163                // Feasibility restoration: minimize sum c_i^2 via one Gauss-Newton step
164                // J^T J d = -J^T c  (normal equations)
165                let m = constraints.len();
166                let mut jtj = vec![S::ZERO; n * n];
167                let mut jtc = vec![S::ZERO; n];
168                for i in 0..m {
169                    let ci = c_vals[i];
170                    // Skip inactive inequalities
171                    if constraints[i].kind == ConstraintKind::Inequality && ci < S::ZERO {
172                        continue;
173                    }
174                    for j in 0..n {
175                        jtc[j] += c_grads[i][j] * ci;
176                        for k in 0..n {
177                            jtj[j * n + k] += c_grads[i][j] * c_grads[i][k];
178                        }
179                    }
180                }
181                // Regularize and solve
182                for j in 0..n {
183                    jtj[j * n + j] += S::from_f64(1e-6);
184                }
185                let jtj_mat = DenseMatrix::<S>::from_row_major(n, n, &jtj);
186                let neg_jtc: Vec<S> = jtc.iter().map(|&v| -v).collect();
187                if let Ok(d_feas) = jtj_mat.solve(&neg_jtc) {
188                    // Take a damped feasibility step
189                    let mut alpha_f = S::ONE;
190                    for _ in 0..10 {
191                        let x_trial: Vec<S> = (0..n).map(|j| x[j] + alpha_f * d_feas[j]).collect();
192                        let c_trial: Vec<S> =
193                            constraints.iter().map(|c| (c.func)(&x_trial)).collect();
194                        let cv_trial = constraint_violation(&c_trial, &eq_indices, &ineq_indices);
195                        if cv_trial < max_violation {
196                            x = x_trial;
197                            grad_f(&x, &mut grad);
198                            n_geval += 1;
199                            n_feval += 1;
200                            // Reset Hessian to identity after restoration
201                            for i in 0..n {
202                                for j in 0..n {
203                                    hess.set(i, j, if i == j { S::ONE } else { S::ZERO });
204                                }
205                            }
206                            stagnation_count = 0;
207                            break;
208                        }
209                        alpha_f *= S::HALF;
210                    }
211                }
212                prev_violation = max_violation;
213                continue;
214            }
215        } else {
216            stagnation_count = 0;
217        }
218        prev_violation = max_violation;
219
220        // Solve QP subproblem for search direction and multiplier estimates
221        let (d, max_mult) = solve_qp_subproblem(
222            &hess,
223            &grad,
224            &c_vals,
225            &c_grads,
226            &eq_indices,
227            &ineq_indices,
228            n,
229        );
230
231        let d_norm = d.iter().map(|&di| di * di).sum::<S>().sqrt();
232        if d_norm < S::from_f64(1e-15) {
233            if max_violation < opts.tol {
234                converged = true;
235            }
236            break;
237        }
238
239        // Update mu: must be > max |λ_i| for L1 merit to be exact
240        let mu_min = max_mult + S::from_f64(0.1);
241        if mu < mu_min {
242            mu = mu_min;
243        }
244
245        // Compute directional derivative of L1 merit function
246        let df_dd: S = grad.iter().zip(d.iter()).map(|(&gi, &di)| gi * di).sum();
247        let merit_deriv = df_dd - mu * l1_penalty(&c_vals, &eq_indices, &ineq_indices);
248
249        // Further increase mu if merit is not descending
250        if merit_deriv > S::ZERO {
251            mu *= S::TWO;
252        }
253
254        let merit_x = f_val + mu * l1_penalty(&c_vals, &eq_indices, &ineq_indices);
255
256        // Backtracking line search on L1 merit function
257        let mut alpha = S::ONE;
258        let eta = S::from_f64(1e-4);
259        let mut step_evals = 0;
260
261        for _ in 0..30 {
262            let x_trial: Vec<S> = (0..n).map(|j| x[j] + alpha * d[j]).collect();
263            let f_trial = f(&x_trial);
264            let c_trial: Vec<S> = constraints.iter().map(|c| (c.func)(&x_trial)).collect();
265            step_evals += 1;
266
267            let merit_trial = f_trial + mu * l1_penalty(&c_trial, &eq_indices, &ineq_indices);
268
269            // Accept if sufficient decrease in merit
270            if merit_trial <= merit_x + eta * alpha * merit_deriv {
271                break;
272            }
273            // Also accept if merit decreased at all and alpha is small enough
274            if merit_trial < merit_x && alpha < S::from_f64(0.1) {
275                break;
276            }
277
278            alpha *= S::HALF;
279        }
280        n_feval += step_evals;
281
282        // Update x
283        let old_x = x.clone();
284        let old_grad = grad.clone();
285        for j in 0..n {
286            x[j] += alpha * d[j];
287        }
288        prev_step_norm = alpha * d_norm;
289
290        // Update gradient
291        grad_f(&x, &mut grad);
292        n_geval += 1;
293
294        // BFGS Hessian update (Powell's damped variant)
295        let s: Vec<S> = (0..n).map(|j| x[j] - old_x[j]).collect();
296        let y: Vec<S> = (0..n).map(|j| grad[j] - old_grad[j]).collect();
297
298        let ss: S = s.iter().map(|&si| si * si).sum();
299        if ss > S::from_f64(1e-20) {
300            let sy: S = s.iter().zip(y.iter()).map(|(&si, &yi)| si * yi).sum();
301            let mut hs = vec![S::ZERO; n];
302            hess.mul_vec(&s, &mut hs);
303            let shs: S = s.iter().zip(hs.iter()).map(|(&si, &hi)| si * hi).sum();
304
305            if shs > S::from_f64(1e-20) {
306                // Powell's damping
307                let theta = if sy >= S::from_f64(0.2) * shs {
308                    S::ONE
309                } else if (shs - sy).to_f64().abs() > 1e-20 {
310                    S::from_f64(0.8) * shs / (shs - sy)
311                } else {
312                    S::ONE
313                };
314
315                let r: Vec<S> = (0..n)
316                    .map(|j| theta * y[j] + (S::ONE - theta) * hs[j])
317                    .collect();
318                let sr: S = s.iter().zip(r.iter()).map(|(&si, &ri)| si * ri).sum();
319
320                if sr > S::from_f64(1e-20) {
321                    for i in 0..n {
322                        for j in 0..n {
323                            let val = hess.get(i, j) - hs[i] * hs[j] / shs + r[i] * r[j] / sr;
324                            hess.set(i, j, val);
325                        }
326                    }
327                }
328            }
329        }
330    }
331
332    let f_final = f(&x);
333    let c_final: Vec<S> = constraints.iter().map(|c| (c.func)(&x)).collect();
334    let final_violation = constraint_violation(&c_final, &eq_indices, &ineq_indices);
335
336    let (status, message) = if converged {
337        (
338            OptimStatus::GradientConverged,
339            format!("SQP converged after {} iterations", iterations),
340        )
341    } else {
342        (
343            OptimStatus::MaxIterations,
344            format!("SQP: max iterations ({}) reached", opts.max_iter),
345        )
346    };
347
348    Ok(OptimResult {
349        x,
350        f: f_final,
351        grad,
352        iterations,
353        n_feval,
354        n_geval,
355        converged,
356        message,
357        status,
358        history,
359        lambda_eq: vec![S::ZERO; n_eq],
360        lambda_ineq: vec![S::ZERO; n_ineq],
361        active_bounds: Vec::new(),
362        constraint_violation: final_violation,
363        wall_time_secs: 0.0,
364        pareto: None,
365        sensitivity: None,
366    }
367    .with_wall_time(start))
368}
369
370/// Compute the Lagrangian gradient norm as a KKT optimality measure.
371///
372/// For equality constraints: solve min ||∇f + A_eq^T λ||^2 for λ.
373/// For a practical approximation: compute the component of ∇f orthogonal to the
374/// constraint gradient space. If ∇f is in span(∇c_i), KKT norm ≈ 0.
375fn lagrangian_gradient_norm<S: Scalar>(
376    grad: &[S],
377    c_vals: &[S],
378    c_grads: &[Vec<S>],
379    eq_indices: &[usize],
380    ineq_indices: &[usize],
381    n: usize,
382) -> S {
383    // Collect active constraint gradients (equality + active inequality)
384    let mut active_grads: Vec<&Vec<S>> = Vec::new();
385    for &i in eq_indices {
386        active_grads.push(&c_grads[i]);
387    }
388    for &i in ineq_indices {
389        if c_vals[i] > S::from_f64(-1e-6) {
390            active_grads.push(&c_grads[i]);
391        }
392    }
393
394    if active_grads.is_empty() {
395        return grad.iter().map(|&g| g * g).sum::<S>().sqrt();
396    }
397
398    // Compute lambda via normal equations: A * A^T * lambda = -A * grad
399    let m = active_grads.len();
400    // For small m, direct computation
401    // Project grad onto the space spanned by active constraint gradients
402    // Residual = grad - A^T * (A * A^T)^{-1} * A * grad
403    // For m=1: residual = grad - (a . grad)/(a . a) * a
404    if m == 1 {
405        let a = &active_grads[0];
406        let ag: S = a.iter().zip(grad.iter()).map(|(&ai, &gi)| ai * gi).sum();
407        let aa: S = a.iter().map(|&ai| ai * ai).sum();
408        if aa < S::from_f64(1e-20) {
409            return grad.iter().map(|&g| g * g).sum::<S>().sqrt();
410        }
411        // KKT: ∇f + λ∇c = 0  =>  λ = -(a·g)/(a·a)
412        let lambda = -ag / aa;
413        let mut residual_sq = S::ZERO;
414        for j in 0..n {
415            let r = grad[j] + lambda * a[j];
416            residual_sq += r * r;
417        }
418        return residual_sq.sqrt();
419    }
420
421    // General case: form A (m x n), compute A*A^T (m x m), solve for lambda
422    // Then residual = grad + A^T * lambda
423    let mut aat = vec![S::ZERO; m * m];
424    let mut ag = vec![S::ZERO; m];
425    for i in 0..m {
426        for j in 0..m {
427            let mut dot = S::ZERO;
428            for (ak_i, ak_j) in active_grads[i].iter().zip(active_grads[j].iter()).take(n) {
429                dot += *ak_i * *ak_j;
430            }
431            aat[i * m + j] = dot;
432        }
433        let mut dot = S::ZERO;
434        for k in 0..n {
435            dot += active_grads[i][k] * grad[k];
436        }
437        ag[i] = -dot;
438    }
439
440    // Solve A*A^T * lambda = -A * grad via Gaussian elimination (small system)
441    // Augmented matrix [aat | ag]
442    let mut aug = vec![S::ZERO; m * (m + 1)];
443    for i in 0..m {
444        for j in 0..m {
445            aug[i * (m + 1) + j] = aat[i * m + j];
446        }
447        aug[i * (m + 1) + m] = ag[i];
448    }
449
450    for col in 0..m {
451        // Find pivot
452        let mut max_abs = S::ZERO;
453        let mut pivot_row = col;
454        for row in col..m {
455            let v = aug[row * (m + 1) + col].abs();
456            if v > max_abs {
457                max_abs = v;
458                pivot_row = row;
459            }
460        }
461        if max_abs < S::from_f64(1e-20) {
462            // Singular — just return gradient norm
463            return grad.iter().map(|&g| g * g).sum::<S>().sqrt();
464        }
465        // Swap rows
466        if pivot_row != col {
467            for j in 0..=m {
468                aug.swap(col * (m + 1) + j, pivot_row * (m + 1) + j);
469            }
470        }
471        // Eliminate
472        let pivot = aug[col * (m + 1) + col];
473        for row in (col + 1)..m {
474            let factor = aug[row * (m + 1) + col] / pivot;
475            for j in col..=m {
476                let val = aug[col * (m + 1) + j];
477                aug[row * (m + 1) + j] -= factor * val;
478            }
479        }
480    }
481
482    // Back-substitute
483    let mut lambda = vec![S::ZERO; m];
484    for i in (0..m).rev() {
485        let mut s = aug[i * (m + 1) + m];
486        for j in (i + 1)..m {
487            s -= aug[i * (m + 1) + j] * lambda[j];
488        }
489        lambda[i] = s / aug[i * (m + 1) + i];
490    }
491
492    // Compute Lagrangian gradient: ∇f + Σ λ_i ∇c_i
493    let mut residual_sq = S::ZERO;
494    for j in 0..n {
495        let mut r = grad[j];
496        for (i, ag_i) in active_grads.iter().enumerate() {
497            r += lambda[i] * ag_i[j];
498        }
499        residual_sq += r * r;
500    }
501    residual_sq.sqrt()
502}
503
504/// Compute max constraint violation.
505fn constraint_violation<S: Scalar>(
506    c_vals: &[S],
507    eq_indices: &[usize],
508    ineq_indices: &[usize],
509) -> S {
510    let mut v = S::ZERO;
511    for &i in eq_indices {
512        let cv = c_vals[i].abs();
513        if cv > v {
514            v = cv;
515        }
516    }
517    for &i in ineq_indices {
518        if c_vals[i] > v {
519            v = c_vals[i];
520        }
521    }
522    v
523}
524
525/// Compute L1 penalty: sum|h_eq| + sum max(0, g_ineq).
526fn l1_penalty<S: Scalar>(c_vals: &[S], eq_indices: &[usize], ineq_indices: &[usize]) -> S {
527    let mut p = S::ZERO;
528    for &i in eq_indices {
529        p += c_vals[i].abs();
530    }
531    for &i in ineq_indices {
532        if c_vals[i] > S::ZERO {
533            p += c_vals[i];
534        }
535    }
536    p
537}
538
539/// Solve the QP subproblem via KKT system.
540/// Returns (search_direction, max_abs_multiplier).
541fn solve_qp_subproblem<S>(
542    hess: &DenseMatrix<S>,
543    grad: &[S],
544    c_vals: &[S],
545    c_grads: &[Vec<S>],
546    eq_indices: &[usize],
547    ineq_indices: &[usize],
548    n: usize,
549) -> (Vec<S>, S)
550where
551    S: Scalar + faer::SimpleEntity + faer::Conjugate<Canonical = S> + faer::ComplexField,
552{
553    // Active inequalities: violated or nearly active (tighter threshold)
554    let active_ineq: Vec<usize> = ineq_indices
555        .iter()
556        .copied()
557        .filter(|&i| c_vals[i] > S::from_f64(-1e-6))
558        .collect();
559
560    // Filter linearly dependent constraints via incremental independence check.
561    // Accept a constraint gradient only if it adds significant new direction.
562    let mut independent_eq: Vec<usize> = Vec::new();
563    let mut independent_ineq: Vec<usize> = Vec::new();
564    let mut accepted_grads: Vec<&Vec<S>> = Vec::new();
565
566    for &ci in eq_indices {
567        if is_independent(&c_grads[ci], &accepted_grads, n) {
568            accepted_grads.push(&c_grads[ci]);
569            independent_eq.push(ci);
570        }
571    }
572    for &ci in &active_ineq {
573        if is_independent(&c_grads[ci], &accepted_grads, n) {
574            accepted_grads.push(&c_grads[ci]);
575            independent_ineq.push(ci);
576        }
577    }
578
579    let n_ind_eq = independent_eq.len();
580    let n_ind_ineq = independent_ineq.len();
581    let n_total = n_ind_eq + n_ind_ineq;
582
583    if n_total == 0 {
584        // Unconstrained QP: H * d = -grad
585        let neg_grad: Vec<S> = grad.iter().map(|&g| -g).collect();
586        let d = hess.solve(&neg_grad).unwrap_or(neg_grad);
587        return (d, S::ZERO);
588    }
589
590    // Build KKT system
591    let kkt_n = n + n_total;
592    let mut kkt = DenseMatrix::<S>::zeros(kkt_n, kkt_n);
593    let mut rhs = vec![S::ZERO; kkt_n];
594
595    // H block with scale-dependent regularization
596    let diag_max = (0..n)
597        .map(|i| hess.get(i, i).abs())
598        .fold(S::ZERO, |a, b| if b > a { b } else { a });
599    let reg = S::from_f64(1e-8) * (S::ONE + diag_max);
600    for i in 0..n {
601        for j in 0..n {
602            kkt.set(i, j, hess.get(i, j));
603        }
604        kkt.set(i, i, kkt.get(i, i) + reg);
605        rhs[i] = -grad[i];
606    }
607
608    // Equality constraints
609    for (row, &ci) in independent_eq.iter().enumerate() {
610        let cg = &c_grads[ci];
611        for (j, &cgj) in cg.iter().enumerate().take(n) {
612            kkt.set(n + row, j, cgj);
613            kkt.set(j, n + row, cgj);
614        }
615        rhs[n + row] = -c_vals[ci];
616    }
617
618    // Active inequality constraints
619    for (row, &ci) in independent_ineq.iter().enumerate() {
620        let kkt_row = n + n_ind_eq + row;
621        let cg = &c_grads[ci];
622        for (j, &cgj) in cg.iter().enumerate().take(n) {
623            kkt.set(kkt_row, j, cgj);
624            kkt.set(j, kkt_row, cgj);
625        }
626        rhs[kkt_row] = -c_vals[ci];
627    }
628
629    // Small negative diagonal on constraint block for numerical regularization
630    for i in 0..n_total {
631        kkt.set(n + i, n + i, kkt.get(n + i, n + i) - S::from_f64(1e-10));
632    }
633
634    match kkt.solve(&rhs) {
635        Ok(sol) => {
636            let d = sol[..n].to_vec();
637            // Extract max absolute multiplier
638            let max_mult =
639                sol[n..]
640                    .iter()
641                    .map(|&v| v.abs())
642                    .fold(S::ZERO, |a, b| if b > a { b } else { a });
643            (d, max_mult)
644        }
645        Err(_) => {
646            // Fallback: steepest descent toward feasibility
647            let mut d: Vec<S> = grad.iter().map(|&g| -g).collect();
648            for &i in eq_indices {
649                let cv = c_vals[i];
650                for j in 0..n {
651                    d[j] -= cv * c_grads[i][j];
652                }
653            }
654            for &i in ineq_indices {
655                if c_vals[i] > S::ZERO {
656                    for j in 0..n {
657                        d[j] -= c_vals[i] * c_grads[i][j];
658                    }
659                }
660            }
661            (d, S::from_f64(100.0))
662        }
663    }
664}
665
666/// Check if a constraint gradient is linearly independent from already-accepted ones.
667/// Uses a Gram-Schmidt-like residual check: project `g` onto the span of `accepted`,
668/// and reject if the residual is too small relative to `g`.
669fn is_independent<S: Scalar>(g: &[S], accepted: &[&Vec<S>], n: usize) -> bool {
670    let g_norm_sq: S = g.iter().map(|&v| v * v).sum();
671    if g_norm_sq < S::from_f64(1e-20) {
672        return false; // zero gradient
673    }
674
675    if accepted.is_empty() {
676        return true;
677    }
678
679    // Compute residual after projecting out accepted directions
680    let mut residual: Vec<S> = g.to_vec();
681    for &a in accepted {
682        let a_norm_sq: S = a.iter().map(|&v| v * v).sum();
683        if a_norm_sq < S::from_f64(1e-20) {
684            continue;
685        }
686        let dot: S = residual
687            .iter()
688            .zip(a.iter())
689            .take(n)
690            .map(|(&ri, &ai)| ri * ai)
691            .sum();
692        let coeff = dot / a_norm_sq;
693        for j in 0..n.min(residual.len()) {
694            residual[j] -= coeff * a[j];
695        }
696    }
697
698    let res_norm_sq: S = residual.iter().map(|&v| v * v).sum();
699    // Independent if residual retains at least 1% of original norm
700    res_norm_sq > S::from_f64(1e-4) * g_norm_sq
701}
702
703#[cfg(test)]
704mod tests {
705    use super::*;
706    use crate::problem::Constraint;
707
708    #[test]
709    fn test_sqp_equality_circle() {
710        // minimize x0 + x1  s.t.  x0^2 + x1^2 = 1
711        // Optimal: (-1/sqrt(2), -1/sqrt(2))
712        let constraints = vec![Constraint {
713            func: Box::new(|x: &[f64]| x[0] * x[0] + x[1] * x[1] - 1.0),
714            grad: Some(Box::new(|x: &[f64], g: &mut [f64]| {
715                g[0] = 2.0 * x[0];
716                g[1] = 2.0 * x[1];
717            })),
718            kind: ConstraintKind::Equality,
719        }];
720
721        let result = sqp_minimize(
722            |x: &[f64]| x[0] + x[1],
723            |_x: &[f64], g: &mut [f64]| {
724                g[0] = 1.0;
725                g[1] = 1.0;
726            },
727            &constraints,
728            &[1.0, 0.0],
729            &SqpOptions::default(),
730        )
731        .unwrap();
732
733        assert!(result.converged, "SQP did not converge: {}", result.message);
734        let expected = -1.0 / 2.0_f64.sqrt();
735        assert!(
736            (result.x[0] - expected).abs() < 0.05,
737            "x0={}, expected {}",
738            result.x[0],
739            expected
740        );
741        assert!(result.constraint_violation < 1e-3);
742    }
743
744    #[test]
745    fn test_sqp_inequality() {
746        // minimize (x0-2)^2 + (x1-2)^2  s.t.  x0 + x1 <= 2
747        // Optimal at (1, 1)
748        let constraints = vec![Constraint {
749            func: Box::new(|x: &[f64]| x[0] + x[1] - 2.0),
750            grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
751                g[0] = 1.0;
752                g[1] = 1.0;
753            })),
754            kind: ConstraintKind::Inequality,
755        }];
756
757        let result = sqp_minimize(
758            |x: &[f64]| (x[0] - 2.0).powi(2) + (x[1] - 2.0).powi(2),
759            |x: &[f64], g: &mut [f64]| {
760                g[0] = 2.0 * (x[0] - 2.0);
761                g[1] = 2.0 * (x[1] - 2.0);
762            },
763            &constraints,
764            &[0.0, 0.0],
765            &SqpOptions::default(),
766        )
767        .unwrap();
768
769        assert!(result.converged, "SQP did not converge: {}", result.message);
770        assert!(
771            (result.x[0] - 1.0).abs() < 0.1,
772            "x0={}, expected ~1.0",
773            result.x[0]
774        );
775    }
776
777    #[test]
778    fn test_sqp_mixed_constraints() {
779        // minimize x0^2 + x1^2
780        // s.t. x0 + x1 = 1, 0.6 - x0 <= 0
781        // Optimal: x0=0.6, x1=0.4
782        let constraints = vec![
783            Constraint {
784                func: Box::new(|x: &[f64]| x[0] + x[1] - 1.0),
785                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
786                    g[0] = 1.0;
787                    g[1] = 1.0;
788                })),
789                kind: ConstraintKind::Equality,
790            },
791            Constraint {
792                func: Box::new(|x: &[f64]| 0.6 - x[0]),
793                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
794                    g[0] = -1.0;
795                    g[1] = 0.0;
796                })),
797                kind: ConstraintKind::Inequality,
798            },
799        ];
800
801        let result = sqp_minimize(
802            |x: &[f64]| x[0] * x[0] + x[1] * x[1],
803            |x: &[f64], g: &mut [f64]| {
804                g[0] = 2.0 * x[0];
805                g[1] = 2.0 * x[1];
806            },
807            &constraints,
808            &[1.0, 1.0],
809            &SqpOptions::default(),
810        )
811        .unwrap();
812
813        assert!(result.converged, "SQP did not converge: {}", result.message);
814        assert!(
815            (result.x[0] - 0.6).abs() < 0.1,
816            "x0={}, expected ~0.6",
817            result.x[0]
818        );
819    }
820
821    #[test]
822    fn test_sqp_multiple_inequality_constraints() {
823        // minimize x0^2 + x1^2 s.t. x0 >= 1, x1 >= 1, x0 + x1 <= 4
824        // 3 active or near-active inequality constraints
825        // Optimal at (1, 1)
826        let constraints = vec![
827            Constraint {
828                func: Box::new(|x: &[f64]| -x[0] + 1.0), // 1 - x0 <= 0
829                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
830                    g[0] = -1.0;
831                    g[1] = 0.0;
832                })),
833                kind: ConstraintKind::Inequality,
834            },
835            Constraint {
836                func: Box::new(|x: &[f64]| -x[1] + 1.0), // 1 - x1 <= 0
837                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
838                    g[0] = 0.0;
839                    g[1] = -1.0;
840                })),
841                kind: ConstraintKind::Inequality,
842            },
843            Constraint {
844                func: Box::new(|x: &[f64]| x[0] + x[1] - 4.0), // x0+x1 <= 4
845                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
846                    g[0] = 1.0;
847                    g[1] = 1.0;
848                })),
849                kind: ConstraintKind::Inequality,
850            },
851        ];
852
853        let result = sqp_minimize(
854            |x: &[f64]| x[0] * x[0] + x[1] * x[1],
855            |x: &[f64], g: &mut [f64]| {
856                g[0] = 2.0 * x[0];
857                g[1] = 2.0 * x[1];
858            },
859            &constraints,
860            &[0.5, 0.5],
861            &SqpOptions::default(),
862        )
863        .unwrap();
864
865        assert!(result.converged, "SQP did not converge: {}", result.message);
866        assert!(
867            (result.x[0] - 1.0).abs() < 0.15,
868            "x0={}, expected ~1.0",
869            result.x[0]
870        );
871        assert!(
872            (result.x[1] - 1.0).abs() < 0.15,
873            "x1={}, expected ~1.0",
874            result.x[1]
875        );
876    }
877
878    #[test]
879    fn test_sqp_dependent_constraints() {
880        // Nearly parallel constraints: x0 + x1 <= 2 and x0 + 1.001*x1 <= 2.001
881        // minimize (x0-3)^2 + (x1-3)^2 -> optimal at (1, 1)
882        let constraints = vec![
883            Constraint {
884                func: Box::new(|x: &[f64]| x[0] + x[1] - 2.0),
885                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
886                    g[0] = 1.0;
887                    g[1] = 1.0;
888                })),
889                kind: ConstraintKind::Inequality,
890            },
891            Constraint {
892                func: Box::new(|x: &[f64]| x[0] + 1.001 * x[1] - 2.001),
893                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
894                    g[0] = 1.0;
895                    g[1] = 1.001;
896                })),
897                kind: ConstraintKind::Inequality,
898            },
899        ];
900
901        let result = sqp_minimize(
902            |x: &[f64]| (x[0] - 3.0).powi(2) + (x[1] - 3.0).powi(2),
903            |x: &[f64], g: &mut [f64]| {
904                g[0] = 2.0 * (x[0] - 3.0);
905                g[1] = 2.0 * (x[1] - 3.0);
906            },
907            &constraints,
908            &[0.0, 0.0],
909            &SqpOptions::default(),
910        )
911        .unwrap();
912
913        // Should not crash, and should find a feasible point near (1, 1)
914        assert!(result.converged, "SQP did not converge: {}", result.message);
915        assert!(
916            result.x[0] + result.x[1] <= 2.0 + 0.01,
917            "constraint violated: x0+x1={}",
918            result.x[0] + result.x[1]
919        );
920    }
921
922    #[test]
923    fn test_sqp_many_constraints() {
924        // 5 constraints (2 eq + 3 ineq): min x0^2 + x1^2 + x2^2
925        // s.t. x0+x1+x2 = 3, x0-x1 = 0, x0>=0, x1>=0, x2>=0
926        // Optimal: (1, 1, 1)
927        let constraints = vec![
928            Constraint {
929                func: Box::new(|x: &[f64]| x[0] + x[1] + x[2] - 3.0),
930                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
931                    g[0] = 1.0;
932                    g[1] = 1.0;
933                    g[2] = 1.0;
934                })),
935                kind: ConstraintKind::Equality,
936            },
937            Constraint {
938                func: Box::new(|x: &[f64]| x[0] - x[1]),
939                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
940                    g[0] = 1.0;
941                    g[1] = -1.0;
942                    g[2] = 0.0;
943                })),
944                kind: ConstraintKind::Equality,
945            },
946            Constraint {
947                func: Box::new(|x: &[f64]| -x[0]),
948                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
949                    g[0] = -1.0;
950                    g[1] = 0.0;
951                    g[2] = 0.0;
952                })),
953                kind: ConstraintKind::Inequality,
954            },
955            Constraint {
956                func: Box::new(|x: &[f64]| -x[1]),
957                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
958                    g[0] = 0.0;
959                    g[1] = -1.0;
960                    g[2] = 0.0;
961                })),
962                kind: ConstraintKind::Inequality,
963            },
964            Constraint {
965                func: Box::new(|x: &[f64]| -x[2]),
966                grad: Some(Box::new(|_x: &[f64], g: &mut [f64]| {
967                    g[0] = 0.0;
968                    g[1] = 0.0;
969                    g[2] = -1.0;
970                })),
971                kind: ConstraintKind::Inequality,
972            },
973        ];
974
975        let result = sqp_minimize(
976            |x: &[f64]| x[0] * x[0] + x[1] * x[1] + x[2] * x[2],
977            |x: &[f64], g: &mut [f64]| {
978                g[0] = 2.0 * x[0];
979                g[1] = 2.0 * x[1];
980                g[2] = 2.0 * x[2];
981            },
982            &constraints,
983            &[2.0, 1.0, 0.5],
984            &SqpOptions::default(),
985        )
986        .unwrap();
987
988        assert!(result.converged, "SQP did not converge: {}", result.message);
989        assert!(
990            (result.x[0] - 1.0).abs() < 0.15,
991            "x0={}, expected ~1.0",
992            result.x[0]
993        );
994        assert!(
995            (result.x[1] - 1.0).abs() < 0.15,
996            "x1={}, expected ~1.0",
997            result.x[1]
998        );
999        assert!(
1000            (result.x[2] - 1.0).abs() < 0.15,
1001            "x2={}, expected ~1.0",
1002            result.x[2]
1003        );
1004    }
1005
1006    #[test]
1007    fn test_sqp_unconstrained() {
1008        let result = sqp_minimize(
1009            |x: &[f64]| x[0] * x[0] + 4.0 * x[1] * x[1],
1010            |x: &[f64], g: &mut [f64]| {
1011                g[0] = 2.0 * x[0];
1012                g[1] = 8.0 * x[1];
1013            },
1014            &[],
1015            &[5.0, 3.0],
1016            &SqpOptions::default(),
1017        )
1018        .unwrap();
1019
1020        assert!(result.converged, "did not converge: {}", result.message);
1021        assert!(result.x[0].abs() < 1e-3, "x0={}", result.x[0]);
1022        assert!(result.x[1].abs() < 1e-3, "x1={}", result.x[1]);
1023    }
1024}
numra_optim/sqp.rs

numra_optim/
sqp.rs