scirs2_optimize/constrained/
trust_constr.rs

1//! Trust-region algorithm for constrained optimization
2
3use crate::constrained::{Constraint, ConstraintFn, ConstraintKind, Options};
4use crate::error::OptimizeResult;
5use crate::result::OptimizeResults;
6use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1};
7
8#[allow(clippy::many_single_char_names)]
9#[allow(dead_code)]
10pub fn minimize_trust_constr<F, S>(
11    func: F,
12    x0: &ArrayBase<S, Ix1>,
13    constraints: &[Constraint<ConstraintFn>],
14    options: &Options,
15) -> OptimizeResult<OptimizeResults<f64>>
16where
17    F: Fn(&[f64]) -> f64,
18    S: Data<Elem = f64>,
19{
20    // Get options or use defaults
21    let ftol = options.ftol.unwrap_or(1e-8);
22    let gtol = options.gtol.unwrap_or(1e-8);
23    let ctol = options.ctol.unwrap_or(1e-8);
24    let maxiter = options.maxiter.unwrap_or(100 * x0.len());
25    let eps = options.eps.unwrap_or(1e-8);
26
27    // Initialize variables
28    let n = x0.len();
29    let mut x = x0.to_owned();
30    let mut f = func(x.as_slice().unwrap());
31    let mut nfev = 1;
32
33    // Initialize the Lagrange multipliers
34    let mut lambda = Array1::zeros(constraints.len());
35
36    // Calculate initial gradient using finite differences
37    let mut g = Array1::zeros(n);
38    for i in 0..n {
39        let mut x_h = x.clone();
40        x_h[i] += eps;
41        let f_h = func(x_h.as_slice().unwrap());
42        g[i] = (f_h - f) / eps;
43        nfev += 1;
44    }
45
46    // Evaluate initial constraints
47    let mut c = Array1::zeros(constraints.len());
48    for (i, constraint) in constraints.iter().enumerate() {
49        if !constraint.is_bounds() {
50            let val = (constraint.fun)(x.as_slice().unwrap());
51
52            match constraint.kind {
53                ConstraintKind::Inequality => {
54                    c[i] = val; // g(x) >= 0 constraint
55                }
56                ConstraintKind::Equality => {
57                    c[i] = val; // h(x) = 0 constraint
58                }
59            }
60        }
61    }
62
63    // Calculate constraint Jacobian
64    let mut a = Array2::zeros((constraints.len(), n));
65    for (i, constraint) in constraints.iter().enumerate() {
66        if !constraint.is_bounds() {
67            for j in 0..n {
68                let mut x_h = x.clone();
69                x_h[j] += eps;
70                let c_h = (constraint.fun)(x_h.as_slice().unwrap());
71                a[[i, j]] = (c_h - c[i]) / eps;
72                nfev += 1;
73            }
74        }
75    }
76
77    // Initialize trust region radius
78    let mut delta = 1.0;
79
80    // Initialize approximation of the Hessian of the Lagrangian
81    let mut b = Array2::eye(n);
82
83    // Main optimization loop
84    let mut iter = 0;
85
86    while iter < maxiter {
87        // Check constraint violation
88        let mut max_constraint_violation = 0.0;
89        for (i, &ci) in c.iter().enumerate() {
90            match constraints[i].kind {
91                ConstraintKind::Inequality => {
92                    if ci < -ctol {
93                        max_constraint_violation = f64::max(max_constraint_violation, -ci);
94                    }
95                }
96                ConstraintKind::Equality => {
97                    // For equality constraints, violation is |h(x)|
98                    let violation = ci.abs();
99                    if violation > ctol {
100                        max_constraint_violation = f64::max(max_constraint_violation, violation);
101                    }
102                }
103            }
104        }
105
106        // Check convergence on gradient and constraints
107        if g.iter().all(|&gi| gi.abs() < gtol) && max_constraint_violation < ctol {
108            break;
109        }
110
111        // Compute the Lagrangian gradient
112        let mut lag_grad = g.clone();
113        for (i, &li) in lambda.iter().enumerate() {
114            let include_constraint = match constraints[i].kind {
115                ConstraintKind::Inequality => {
116                    // For inequality constraints: include if active (lambda > 0) or violated (c < 0)
117                    li > 0.0 || c[i] < -ctol
118                }
119                ConstraintKind::Equality => {
120                    // For equality constraints: always include (always active)
121                    true
122                }
123            };
124
125            if include_constraint {
126                for j in 0..n {
127                    // L = f - sum(lambda_i * c_i) for constraints
128                    // So gradient of L is grad(f) - sum(lambda_i * grad(c_i))
129                    lag_grad[j] -= li * a[[i, j]];
130                }
131            }
132        }
133
134        // Compute the step using a trust-region approach
135        // Solve the constrained quadratic subproblem:
136        // min 0.5 * p^T B p + g^T p  s.t. ||p|| <= delta and linearized constraints
137
138        let (p, predicted_reduction) =
139            compute_trust_region_step_constrained(&lag_grad, &b, &a, &c, delta, constraints, ctol);
140
141        // Try the step
142        let x_new = &x + &p;
143
144        // Evaluate function and constraints at new point
145        let f_new = func(x_new.as_slice().unwrap());
146        nfev += 1;
147
148        let mut c_new = Array1::zeros(constraints.len());
149        for (i, constraint) in constraints.iter().enumerate() {
150            if !constraint.is_bounds() {
151                c_new[i] = (constraint.fun)(x_new.as_slice().unwrap());
152                nfev += 1;
153            }
154        }
155
156        // Compute change in merit function (includes constraint violation)
157        let mut merit = f;
158        let mut merit_new = f_new;
159
160        // Add constraint violation penalty
161        let penalty = 10.0; // Simple fixed penalty parameter
162        for (i, &ci) in c.iter().enumerate() {
163            match constraints[i].kind {
164                ConstraintKind::Inequality => {
165                    // For inequality constraints: penalize only violations (g(x) < 0)
166                    merit += penalty * f64::max(0.0, -ci);
167                    merit_new += penalty * f64::max(0.0, -c_new[i]);
168                }
169                ConstraintKind::Equality => {
170                    // For equality constraints: penalize any deviation from zero
171                    merit += penalty * ci.abs();
172                    merit_new += penalty * c_new[i].abs();
173                }
174            }
175        }
176
177        // Compute actual reduction in merit function
178        let actual_reduction = merit - merit_new;
179
180        // Compute ratio of actual to predicted reduction
181        let rho = if predicted_reduction > 0.0 {
182            actual_reduction / predicted_reduction
183        } else {
184            0.0
185        };
186
187        // Update trust region radius based on the quality of the step
188        if rho < 0.25 {
189            delta *= 0.5;
190        } else if rho > 0.75 && p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt() >= 0.9 * delta {
191            delta *= 2.0;
192        }
193
194        // Accept or reject the step
195        if rho > 0.1 {
196            // Accept the step
197            x = x_new;
198            f = f_new;
199            c = c_new;
200
201            // Check convergence on function value
202            if (merit - merit_new).abs() < ftol * (1.0 + merit.abs()) {
203                break;
204            }
205
206            // Compute new gradient
207            let mut g_new = Array1::zeros(n);
208            for i in 0..n {
209                let mut x_h = x.clone();
210                x_h[i] += eps;
211                let f_h = func(x_h.as_slice().unwrap());
212                g_new[i] = (f_h - f) / eps;
213                nfev += 1;
214            }
215
216            // Compute new constraint Jacobian
217            let mut a_new = Array2::zeros((constraints.len(), n));
218            for (i, constraint) in constraints.iter().enumerate() {
219                if !constraint.is_bounds() {
220                    for j in 0..n {
221                        let mut x_h = x.clone();
222                        x_h[j] += eps;
223                        let c_h = (constraint.fun)(x_h.as_slice().unwrap());
224                        a_new[[i, j]] = (c_h - c[i]) / eps;
225                        nfev += 1;
226                    }
227                }
228            }
229
230            // Update Lagrange multipliers using projected gradient method
231            for (i, constraint) in constraints.iter().enumerate() {
232                match constraint.kind {
233                    ConstraintKind::Inequality => {
234                        if c[i] < -ctol {
235                            // Active or violated constraint
236                            // Increase multiplier if constraint is violated
237                            lambda[i] = f64::max(0.0, lambda[i] - c[i] * penalty);
238                        } else {
239                            // Decrease multiplier towards zero
240                            lambda[i] = f64::max(0.0, lambda[i] - 0.1 * lambda[i]);
241                        }
242                    }
243                    ConstraintKind::Equality => {
244                        // For equality constraints, multipliers can be positive or negative
245                        // Update based on constraint violation to drive h(x) -> 0
246                        let step_size = 0.1;
247                        lambda[i] -= step_size * c[i] * penalty;
248
249                        // Optional: add some damping to prevent oscillations
250                        lambda[i] *= 0.9;
251                    }
252                }
253            }
254
255            // Update Hessian approximation using BFGS or SR1
256            let s = &p;
257            let y = &g_new - &g;
258
259            // Simple BFGS update for the Hessian approximation
260            let s_dot_y = s.dot(&y);
261            if s_dot_y > 1e-10 {
262                let s_col = s.clone().insert_axis(Axis(1));
263                let s_row = s.clone().insert_axis(Axis(0));
264
265                let bs = b.dot(s);
266                let bs_col = bs.clone().insert_axis(Axis(1));
267                let bs_row = bs.clone().insert_axis(Axis(0));
268
269                let term1 = s_dot_y + s.dot(&bs);
270                let term2 = &s_col.dot(&s_row) * (term1 / (s_dot_y * s_dot_y));
271
272                let term3 = &bs_col.dot(&s_row) + &s_col.dot(&bs_row);
273
274                b = &b + &term2 - &(&term3 / s_dot_y);
275            }
276
277            // Update variables for next iteration
278            g = g_new;
279            a = a_new;
280        }
281
282        iter += 1;
283    }
284
285    // Prepare constraint values for the result
286    let mut c_result = Array1::zeros(constraints.len());
287    for (i, constraint) in constraints.iter().enumerate() {
288        if !constraint.is_bounds() {
289            c_result[i] = c[i];
290        }
291    }
292
293    // Create and return result
294    let mut result = OptimizeResults::default();
295    result.x = x;
296    result.fun = f;
297    result.jac = Some(g.into_raw_vec_and_offset().0);
298    result.constr = Some(c_result);
299    result.nfev = nfev;
300    result.nit = iter;
301    result.success = iter < maxiter;
302
303    if result.success {
304        result.message = "Optimization terminated successfully.".to_string();
305    } else {
306        result.message = "Maximum iterations reached.".to_string();
307    }
308
309    Ok(result)
310}
311
312/// Compute a trust-region step for constrained optimization
313#[allow(clippy::many_single_char_names)]
314#[allow(dead_code)]
315fn compute_trust_region_step_constrained(
316    g: &Array1<f64>,
317    b: &Array2<f64>,
318    a: &Array2<f64>,
319    c: &Array1<f64>,
320    delta: f64,
321    constraints: &[Constraint<ConstraintFn>],
322    ctol: f64,
323) -> (Array1<f64>, f64) {
324    let n = g.len();
325    let n_constr = constraints.len();
326
327    // Compute the unconstrained Cauchy point (steepest descent direction)
328    let p_unconstrained = compute_unconstrained_cauchy_point(g, b, delta);
329
330    // Check if unconstrained Cauchy point satisfies linearized constraints
331    let mut constraint_violated = false;
332    for i in 0..n_constr {
333        let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p_unconstrained[j]).sum::<f64>();
334
335        match constraints[i].kind {
336            ConstraintKind::Inequality => {
337                // For inequality constraints: check if g(x) + grad_g^T p >= -ctol
338                if c[i] + grad_c_dot_p < -ctol {
339                    constraint_violated = true;
340                    break;
341                }
342            }
343            ConstraintKind::Equality => {
344                // For equality constraints: check if |h(x) + grad_h^T p| <= ctol
345                if (c[i] + grad_c_dot_p).abs() > ctol {
346                    constraint_violated = true;
347                    break;
348                }
349            }
350        }
351    }
352
353    // If unconstrained point is feasible, use it
354    if !constraint_violated {
355        // Compute predicted reduction
356        let g_dot_p = g.dot(&p_unconstrained);
357        let bp = b.dot(&p_unconstrained);
358        let p_dot_bp = p_unconstrained.dot(&bp);
359        let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
360
361        return (p_unconstrained, predicted_reduction);
362    }
363
364    // Otherwise, project onto the linearized feasible region
365    // This is a simplified approach - in practice, you would solve a CQP
366
367    // Start with the steepest descent direction
368    let mut p = Array1::zeros(n);
369    for i in 0..n {
370        p[i] = -g[i];
371    }
372
373    // Normalize to trust region radius
374    let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
375    if p_norm > 1e-10 {
376        p = &p * (delta / p_norm);
377    }
378
379    // Project onto each constraint
380    for _iter in 0..5 {
381        // Limited iterations for projection
382        let mut max_viol = 0.0;
383        let mut most_violated = 0;
384
385        // Find most violated constraint
386        for i in 0..n_constr {
387            let grad_c_dot_p = (0..n).map(|j| a[[i, j]] * p[j]).sum::<f64>();
388
389            let viol = match constraints[i].kind {
390                ConstraintKind::Inequality => {
391                    // For inequality constraints: violation is max(0, -(g + grad_g^T p))
392                    f64::max(0.0, -(c[i] + grad_c_dot_p))
393                }
394                ConstraintKind::Equality => {
395                    // For equality constraints: violation is |h + grad_h^T p|
396                    (c[i] + grad_c_dot_p).abs()
397                }
398            };
399
400            if viol > max_viol {
401                max_viol = viol;
402                most_violated = i;
403            }
404        }
405
406        if max_viol < ctol {
407            break;
408        }
409
410        // Project p onto the constraint
411        let mut a_norm_sq = 0.0;
412        for j in 0..n {
413            a_norm_sq += a[[most_violated, j]] * a[[most_violated, j]];
414        }
415
416        if a_norm_sq > 1e-10 {
417            let grad_c_dot_p = (0..n).map(|j| a[[most_violated, j]] * p[j]).sum::<f64>();
418
419            let proj_dist = match constraints[most_violated].kind {
420                ConstraintKind::Inequality => {
421                    // For inequality constraints: project to boundary when violated
422                    if c[most_violated] + grad_c_dot_p < 0.0 {
423                        -(c[most_violated] + grad_c_dot_p) / a_norm_sq
424                    } else {
425                        0.0
426                    }
427                }
428                ConstraintKind::Equality => {
429                    // For equality constraints: always project to satisfy h + grad_h^T p = 0
430                    -(c[most_violated] + grad_c_dot_p) / a_norm_sq
431                }
432            };
433
434            // Project p
435            for j in 0..n {
436                p[j] += a[[most_violated, j]] * proj_dist;
437            }
438
439            // Rescale to trust region
440            let p_norm = p.iter().map(|&pi| pi * pi).sum::<f64>().sqrt();
441            if p_norm > delta {
442                p = &p * (delta / p_norm);
443            }
444        }
445    }
446
447    // Compute predicted reduction
448    let g_dot_p = g.dot(&p);
449    let bp = b.dot(&p);
450    let p_dot_bp = p.dot(&bp);
451    let predicted_reduction = -g_dot_p - 0.5 * p_dot_bp;
452
453    (p, predicted_reduction)
454}
455
456/// Compute the unconstrained Cauchy point (steepest descent to trust region boundary)
457#[allow(dead_code)]
458fn compute_unconstrained_cauchy_point(g: &Array1<f64>, b: &Array2<f64>, delta: f64) -> Array1<f64> {
459    let n = g.len();
460
461    // Compute the steepest descent direction: -g
462    let mut p = Array1::zeros(n);
463    for i in 0..n {
464        p[i] = -g[i];
465    }
466
467    // Compute g^T B g and ||g||^2
468    let bg = b.dot(g);
469    let g_dot_bg = g.dot(&bg);
470    let g_norm_sq = g.dot(g);
471
472    // Check if the gradient is practically zero
473    if g_norm_sq < 1e-10 {
474        // If gradient is practically zero, don't move
475        return Array1::zeros(n);
476    }
477
478    // Compute tau (step to the boundary)
479    let tau = if g_dot_bg <= 0.0 {
480        // Negative curvature or zero curvature case
481        delta / g_norm_sq.sqrt()
482    } else {
483        // Positive curvature case
484        f64::min(delta / g_norm_sq.sqrt(), g_norm_sq / g_dot_bg)
485    };
486
487    // Scale the direction by tau
488    for i in 0..n {
489        p[i] *= tau;
490    }
491
492    p
493}